Skip to content

[test pr] doi: add new doi backend #6

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 2 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions backend/all/all.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
_ "github.com/rclone/rclone/backend/combine"
_ "github.com/rclone/rclone/backend/compress"
_ "github.com/rclone/rclone/backend/crypt"
_ "github.com/rclone/rclone/backend/doi"
_ "github.com/rclone/rclone/backend/drive"
_ "github.com/rclone/rclone/backend/dropbox"
_ "github.com/rclone/rclone/backend/fichier"
Expand Down
38 changes: 38 additions & 0 deletions backend/doi/api/dataversetypes.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// Type definitions specific to Dataverse

package api

// DataverseDatasetResponse is returned by the Dataverse dataset API
type DataverseDatasetResponse struct {
Status string `json:"status"`
Data DataverseDataset `json:"data"`
}

// DataverseDataset is the representation of a dataset
type DataverseDataset struct {
LatestVersion DataverseDatasetVersion `json:"latestVersion"`
}

// DataverseDatasetVersion is the representation of a dataset version
type DataverseDatasetVersion struct {
LastUpdateTime string `json:"lastUpdateTime"`
Files []DataverseFile `json:"files"`
}

// DataverseFile is the representation of a file found in a dataset
type DataverseFile struct {
DirectoryLabel string `json:"directoryLabel"`
DataFile DataverseDataFile `json:"dataFile"`
}

// DataverseDataFile represents file metadata details
type DataverseDataFile struct {
ID int64 `json:"id"`
Filename string `json:"filename"`
ContentType string `json:"contentType"`
FileSize int64 `json:"filesize"`
OriginalFileFormat string `json:"originalFileFormat"`
OriginalFileSize int64 `json:"originalFileSize"`
OriginalFileName string `json:"originalFileName"`
MD5 string `json:"md5"`
}
34 changes: 34 additions & 0 deletions backend/doi/api/inveniotypes.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// Type definitions specific to InvenioRDM

package api

// InvenioRecordResponse is the representation of a record stored in InvenioRDM
type InvenioRecordResponse struct {
Links InvenioRecordResponseLinks `json:"links"`
// Metadata InvenioRecordMetadata `json:"metadata"`
}

// InvenioRecordResponseLinks represents a record's links
type InvenioRecordResponseLinks struct {
Self string `json:"self"`
}

// InvenioFilesResponse is the representation of a record's files
type InvenioFilesResponse struct {
Entries []InvenioFilesResponseEntry `json:"entries"`
}

// InvenioFilesResponseEntry is the representation of a file entry
type InvenioFilesResponseEntry struct {
Key string `json:"key"`
Checksum string `json:"checksum"`
Size int64 `json:"size"`
Updated string `json:"updated"`
MimeType string `json:"mimetype"`
Links InvenioFilesResponseEntryLinks `json:"links"`
}

// InvenioFilesResponseEntryLinks represents file links details
type InvenioFilesResponseEntryLinks struct {
Content string `json:"content"`
}
26 changes: 26 additions & 0 deletions backend/doi/api/types.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
// Package api has general type definitions for doi
package api

// DoiResolverResponse is returned by the DOI resolver API
//
// Reference: https://www.doi.org/the-identifier/resources/factsheets/doi-resolution-documentation
type DoiResolverResponse struct {
ResponseCode int `json:"responseCode"`
Handle string `json:"handle"`
Values []DoiResolverResponseValue `json:"values"`
}

// DoiResolverResponseValue is a single handle record value
type DoiResolverResponseValue struct {
Index int `json:"index"`
Type string `json:"type"`
Data DoiResolverResponseValueData `json:"data"`
TTL int `json:"ttl"`
Timestamp string `json:"timestamp"`
}

// DoiResolverResponseValueData is the data held in a handle value
type DoiResolverResponseValueData struct {
Format string `json:"format"`
Value any `json:"value"`
}
139 changes: 139 additions & 0 deletions backend/doi/dataverse.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
// Implementation for Dataverse

package doi

import (
"context"
"fmt"
"net/http"
"net/url"
"path"
"strings"
"time"

"github.com/rclone/rclone/backend/doi/api"
"github.com/rclone/rclone/fs"
"github.com/rclone/rclone/lib/rest"
)

// Returns true if resolvedURL is likely a DOI hosted on a Dataverse intallation
func activateDataverse(resolvedURL *url.URL) (isActive bool) {
queryValues := resolvedURL.Query()
persistentID := queryValues.Get("persistentId")
return persistentID != ""
}

// Resolve the main API endpoint for a DOI hosted on a Dataverse installation
func resolveDataverseEndpoint(resolvedURL *url.URL) (provider Provider, endpoint *url.URL, err error) {
queryValues := resolvedURL.Query()
persistentID := queryValues.Get("persistentId")

query := url.Values{}
query.Add("persistentId", persistentID)
endpointURL := resolvedURL.ResolveReference(&url.URL{Path: "/api/datasets/:persistentId/", RawQuery: query.Encode()})

return Dataverse, endpointURL, nil
}

// Implements Fs.List() for Dataverse installations
func (f *Fs) listDataverse(ctx context.Context, dir string) (entries fs.DirEntries, err error) {
fileEntries, err := f.listDataverseDoiFiles(ctx)
if err != nil {
return nil, fmt.Errorf("error listing %q: %w", dir, err)
}

fullDir := path.Join(f.root, dir)
if fullDir != "" {
fullDir += "/"
}
dirPaths := map[string]bool{}
for _, entry := range fileEntries {
// First, filter out files not in `fullDir`
if !strings.HasPrefix(entry.remote, fullDir) {
continue
}
// Then, find entries in subfolers
remotePath := entry.remote
if fullDir != "" {
remotePath = strings.TrimLeft(strings.TrimPrefix(remotePath, fullDir), "/")
}
parts := strings.SplitN(remotePath, "/", 2)
if len(parts) == 1 {
newEntry := *entry
newEntry.remote = path.Join(dir, remotePath)
entries = append(entries, &newEntry)
} else {
dirPaths[path.Join(dir, parts[0])] = true
}
}
for dirPath := range dirPaths {
entry := fs.NewDir(dirPath, time.Time{})
entries = append(entries, entry)
}
return entries, nil
}

// List the files contained in the DOI
func (f *Fs) listDataverseDoiFiles(ctx context.Context) (entries []*Object, err error) {
// Use the cache if populated
cachedEntries, found := f.cache.GetMaybe("files")
if found {
parsedEntries, ok := cachedEntries.([]Object)
if ok {
for _, entry := range parsedEntries {
newEntry := entry
entries = append(entries, &newEntry)
}
return entries, nil
}
}

filesURL := f.endpoint
var res *http.Response
var result api.DataverseDatasetResponse
opts := rest.Opts{
Method: "GET",
Path: strings.TrimLeft(filesURL.EscapedPath(), "/"),
Parameters: filesURL.Query(),
}
err = f.pacer.Call(func() (bool, error) {
res, err = f.srv.CallJSON(ctx, &opts, nil, &result)
return shouldRetry(ctx, res, err)
})
if err != nil {
return nil, fmt.Errorf("readDir failed: %w", err)
}
modTime, modTimeErr := time.Parse(time.RFC3339, result.Data.LatestVersion.LastUpdateTime)
if modTimeErr != nil {
fs.Logf(f, "error: could not parse last update time %v", modTimeErr)
modTime = timeUnset
}
for _, file := range result.Data.LatestVersion.Files {
contentURLPath := fmt.Sprintf("/api/access/datafile/%d", file.DataFile.ID)
query := url.Values{}
query.Add("format", "original")
contentURL := f.endpoint.ResolveReference(&url.URL{Path: contentURLPath, RawQuery: query.Encode()})
entry := &Object{
fs: f,
remote: path.Join(file.DirectoryLabel, file.DataFile.Filename),
contentURL: contentURL.String(),
size: file.DataFile.FileSize,
modTime: modTime,
md5: file.DataFile.MD5,
contentType: file.DataFile.ContentType,
}
if file.DataFile.OriginalFileName != "" {
entry.remote = path.Join(file.DirectoryLabel, file.DataFile.OriginalFileName)
entry.size = file.DataFile.OriginalFileSize
entry.contentType = file.DataFile.OriginalFileFormat
}
entries = append(entries, entry)
}
// Populate the cache
cacheEntries := []Object{}
for _, entry := range entries {
cacheEntries = append(cacheEntries, *entry)
}
f.cache.Put("files", cacheEntries)
return entries, nil
}
Loading