mirror of
https://github.com/rclone/rclone.git
synced 2025-04-16 16:18:52 +08:00
Merge 0a0fbe965ff710d984b2b642dc5f68cc76531069 into 0b9671313b14ffe839ecbd7dd2ae5ac7f6f05db8
This commit is contained in:
commit
b7e1e4b19a
@ -14,6 +14,7 @@ import (
|
||||
_ "github.com/rclone/rclone/backend/combine"
|
||||
_ "github.com/rclone/rclone/backend/compress"
|
||||
_ "github.com/rclone/rclone/backend/crypt"
|
||||
_ "github.com/rclone/rclone/backend/doi"
|
||||
_ "github.com/rclone/rclone/backend/drive"
|
||||
_ "github.com/rclone/rclone/backend/dropbox"
|
||||
_ "github.com/rclone/rclone/backend/fichier"
|
||||
|
38
backend/doi/api/dataversetypes.go
Normal file
38
backend/doi/api/dataversetypes.go
Normal file
@ -0,0 +1,38 @@
|
||||
// Type definitions specific to Dataverse
|
||||
|
||||
package api
|
||||
|
||||
// DataverseDatasetResponse is returned by the Dataverse dataset API
|
||||
type DataverseDatasetResponse struct {
|
||||
Status string `json:"status"`
|
||||
Data DataverseDataset `json:"data"`
|
||||
}
|
||||
|
||||
// DataverseDataset is the representation of a dataset
|
||||
type DataverseDataset struct {
|
||||
LatestVersion DataverseDatasetVersion `json:"latestVersion"`
|
||||
}
|
||||
|
||||
// DataverseDatasetVersion is the representation of a dataset version
|
||||
type DataverseDatasetVersion struct {
|
||||
LastUpdateTime string `json:"lastUpdateTime"`
|
||||
Files []DataverseFile `json:"files"`
|
||||
}
|
||||
|
||||
// DataverseFile is the representation of a file found in a dataset
|
||||
type DataverseFile struct {
|
||||
DirectoryLabel string `json:"directoryLabel"`
|
||||
DataFile DataverseDataFile `json:"dataFile"`
|
||||
}
|
||||
|
||||
// DataverseDataFile represents file metadata details
|
||||
type DataverseDataFile struct {
|
||||
ID int64 `json:"id"`
|
||||
Filename string `json:"filename"`
|
||||
ContentType string `json:"contentType"`
|
||||
FileSize int64 `json:"filesize"`
|
||||
OriginalFileFormat string `json:"originalFileFormat"`
|
||||
OriginalFileSize int64 `json:"originalFileSize"`
|
||||
OriginalFileName string `json:"originalFileName"`
|
||||
MD5 string `json:"md5"`
|
||||
}
|
34
backend/doi/api/inveniotypes.go
Normal file
34
backend/doi/api/inveniotypes.go
Normal file
@ -0,0 +1,34 @@
|
||||
// Type definitions specific to InvenioRDM
|
||||
|
||||
package api
|
||||
|
||||
// InvenioRecordResponse is the representation of a record stored in InvenioRDM
|
||||
type InvenioRecordResponse struct {
|
||||
Links InvenioRecordResponseLinks `json:"links"`
|
||||
// Metadata InvenioRecordMetadata `json:"metadata"`
|
||||
}
|
||||
|
||||
// InvenioRecordResponseLinks represents a record's links
|
||||
type InvenioRecordResponseLinks struct {
|
||||
Self string `json:"self"`
|
||||
}
|
||||
|
||||
// InvenioFilesResponse is the representation of a record's files
|
||||
type InvenioFilesResponse struct {
|
||||
Entries []InvenioFilesResponseEntry `json:"entries"`
|
||||
}
|
||||
|
||||
// InvenioFilesResponseEntry is the representation of a file entry
|
||||
type InvenioFilesResponseEntry struct {
|
||||
Key string `json:"key"`
|
||||
Checksum string `json:"checksum"`
|
||||
Size int64 `json:"size"`
|
||||
Updated string `json:"updated"`
|
||||
MimeType string `json:"mimetype"`
|
||||
Links InvenioFilesResponseEntryLinks `json:"links"`
|
||||
}
|
||||
|
||||
// InvenioFilesResponseEntryLinks represents file links details
|
||||
type InvenioFilesResponseEntryLinks struct {
|
||||
Content string `json:"content"`
|
||||
}
|
26
backend/doi/api/types.go
Normal file
26
backend/doi/api/types.go
Normal file
@ -0,0 +1,26 @@
|
||||
// Package api has general type definitions for doi
|
||||
package api
|
||||
|
||||
// DoiResolverResponse is returned by the DOI resolver API
|
||||
//
|
||||
// Reference: https://www.doi.org/the-identifier/resources/factsheets/doi-resolution-documentation
|
||||
type DoiResolverResponse struct {
|
||||
ResponseCode int `json:"responseCode"`
|
||||
Handle string `json:"handle"`
|
||||
Values []DoiResolverResponseValue `json:"values"`
|
||||
}
|
||||
|
||||
// DoiResolverResponseValue is a single handle record value
|
||||
type DoiResolverResponseValue struct {
|
||||
Index int `json:"index"`
|
||||
Type string `json:"type"`
|
||||
Data DoiResolverResponseValueData `json:"data"`
|
||||
TTL int `json:"ttl"`
|
||||
Timestamp string `json:"timestamp"`
|
||||
}
|
||||
|
||||
// DoiResolverResponseValueData is the data held in a handle value
|
||||
type DoiResolverResponseValueData struct {
|
||||
Format string `json:"format"`
|
||||
Value any `json:"value"`
|
||||
}
|
139
backend/doi/dataverse.go
Normal file
139
backend/doi/dataverse.go
Normal file
@ -0,0 +1,139 @@
|
||||
// Implementation for Dataverse
|
||||
|
||||
package doi
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"path"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/rclone/rclone/backend/doi/api"
|
||||
"github.com/rclone/rclone/fs"
|
||||
"github.com/rclone/rclone/lib/rest"
|
||||
)
|
||||
|
||||
// Returns true if resolvedURL is likely a DOI hosted on a Dataverse intallation
|
||||
func activateDataverse(resolvedURL *url.URL) (isActive bool) {
|
||||
queryValues := resolvedURL.Query()
|
||||
persistentID := queryValues.Get("persistentId")
|
||||
return persistentID != ""
|
||||
}
|
||||
|
||||
// Resolve the main API endpoint for a DOI hosted on a Dataverse installation
|
||||
func resolveDataverseEndpoint(resolvedURL *url.URL) (provider Provider, endpoint *url.URL, err error) {
|
||||
queryValues := resolvedURL.Query()
|
||||
persistentID := queryValues.Get("persistentId")
|
||||
|
||||
query := url.Values{}
|
||||
query.Add("persistentId", persistentID)
|
||||
endpointURL := resolvedURL.ResolveReference(&url.URL{Path: "/api/datasets/:persistentId/", RawQuery: query.Encode()})
|
||||
|
||||
return Dataverse, endpointURL, nil
|
||||
}
|
||||
|
||||
// Implements Fs.List() for Dataverse installations
|
||||
func (f *Fs) listDataverse(ctx context.Context, dir string) (entries fs.DirEntries, err error) {
|
||||
fileEntries, err := f.listDataverseDoiFiles(ctx)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error listing %q: %w", dir, err)
|
||||
}
|
||||
|
||||
fullDir := path.Join(f.root, dir)
|
||||
if fullDir != "" {
|
||||
fullDir = fullDir + "/"
|
||||
}
|
||||
dirPaths := map[string]bool{}
|
||||
for _, entry := range fileEntries {
|
||||
// First, filter out files not in `fullDir`
|
||||
if !strings.HasPrefix(entry.remote, fullDir) {
|
||||
continue
|
||||
}
|
||||
// Then, find entries in subfolers
|
||||
remotePath := entry.remote
|
||||
if fullDir != "" {
|
||||
remotePath = strings.TrimLeft(strings.TrimPrefix(remotePath, fullDir), "/")
|
||||
}
|
||||
parts := strings.SplitN(remotePath, "/", 2)
|
||||
if len(parts) == 1 {
|
||||
newEntry := *entry
|
||||
newEntry.remote = path.Join(dir, remotePath)
|
||||
entries = append(entries, &newEntry)
|
||||
} else {
|
||||
dirPaths[path.Join(dir, parts[0])] = true
|
||||
}
|
||||
}
|
||||
for dirPath := range dirPaths {
|
||||
entry := fs.NewDir(dirPath, time.Time{})
|
||||
entries = append(entries, entry)
|
||||
}
|
||||
return entries, nil
|
||||
}
|
||||
|
||||
// List the files contained in the DOI
|
||||
func (f *Fs) listDataverseDoiFiles(ctx context.Context) (entries []*Object, err error) {
|
||||
// Use the cache if populated
|
||||
cachedEntries, found := f.cache.GetMaybe("files")
|
||||
if found {
|
||||
parsedEntries, ok := cachedEntries.([]Object)
|
||||
if ok {
|
||||
for _, entry := range parsedEntries {
|
||||
newEntry := entry
|
||||
entries = append(entries, &newEntry)
|
||||
}
|
||||
return entries, nil
|
||||
}
|
||||
}
|
||||
|
||||
filesURL := f.endpoint
|
||||
var res *http.Response
|
||||
var result api.DataverseDatasetResponse
|
||||
opts := rest.Opts{
|
||||
Method: "GET",
|
||||
Path: strings.TrimLeft(filesURL.EscapedPath(), "/"),
|
||||
Parameters: filesURL.Query(),
|
||||
}
|
||||
err = f.pacer.Call(func() (bool, error) {
|
||||
res, err = f.srv.CallJSON(ctx, &opts, nil, &result)
|
||||
return shouldRetry(ctx, res, err)
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("readDir failed: %w", err)
|
||||
}
|
||||
modTime, modTimeErr := time.Parse(time.RFC3339, result.Data.LatestVersion.LastUpdateTime)
|
||||
if modTimeErr != nil {
|
||||
fs.Logf(f, "error: could not parse last update time %v", modTimeErr)
|
||||
modTime = timeUnset
|
||||
}
|
||||
for _, file := range result.Data.LatestVersion.Files {
|
||||
contentURLPath := fmt.Sprintf("/api/access/datafile/%d", file.DataFile.ID)
|
||||
query := url.Values{}
|
||||
query.Add("format", "original")
|
||||
contentURL := f.endpoint.ResolveReference(&url.URL{Path: contentURLPath, RawQuery: query.Encode()})
|
||||
entry := &Object{
|
||||
fs: f,
|
||||
remote: path.Join(file.DirectoryLabel, file.DataFile.Filename),
|
||||
contentURL: contentURL.String(),
|
||||
size: file.DataFile.FileSize,
|
||||
modTime: modTime,
|
||||
md5: file.DataFile.MD5,
|
||||
contentType: file.DataFile.ContentType,
|
||||
}
|
||||
if file.DataFile.OriginalFileName != "" {
|
||||
entry.remote = path.Join(file.DirectoryLabel, file.DataFile.OriginalFileName)
|
||||
entry.size = file.DataFile.OriginalFileSize
|
||||
entry.contentType = file.DataFile.OriginalFileFormat
|
||||
}
|
||||
entries = append(entries, entry)
|
||||
}
|
||||
// Populate the cache
|
||||
cacheEntries := []Object{}
|
||||
for _, entry := range entries {
|
||||
cacheEntries = append(cacheEntries, *entry)
|
||||
}
|
||||
f.cache.Put("files", cacheEntries)
|
||||
return entries, nil
|
||||
}
|
599
backend/doi/doi.go
Normal file
599
backend/doi/doi.go
Normal file
@ -0,0 +1,599 @@
|
||||
// Package doi provides a filesystem interface for digital objects identified by DOIs.
|
||||
//
|
||||
// See: https://www.doi.org/the-identifier/what-is-a-doi/
|
||||
package doi
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"path"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/rclone/rclone/backend/doi/api"
|
||||
"github.com/rclone/rclone/fs"
|
||||
"github.com/rclone/rclone/fs/config/configmap"
|
||||
"github.com/rclone/rclone/fs/config/configstruct"
|
||||
"github.com/rclone/rclone/fs/fserrors"
|
||||
"github.com/rclone/rclone/fs/fshttp"
|
||||
"github.com/rclone/rclone/fs/hash"
|
||||
"github.com/rclone/rclone/lib/cache"
|
||||
"github.com/rclone/rclone/lib/pacer"
|
||||
"github.com/rclone/rclone/lib/rest"
|
||||
)
|
||||
|
||||
const (
|
||||
// the URL of the DOI resolver
|
||||
//
|
||||
// Reference: https://www.doi.org/the-identifier/resources/factsheets/doi-resolution-documentation
|
||||
doiResolverAPIURL = "https://doi.org/api"
|
||||
minSleep = 10 * time.Millisecond
|
||||
maxSleep = 2 * time.Second
|
||||
decayConstant = 2 // bigger for slower decay, exponential
|
||||
)
|
||||
|
||||
var (
|
||||
errorReadOnly = errors.New("doi remotes are read only")
|
||||
timeUnset = time.Unix(0, 0)
|
||||
)
|
||||
|
||||
func init() {
|
||||
fsi := &fs.RegInfo{
|
||||
Name: "doi",
|
||||
Description: "DOI datasets",
|
||||
NewFs: NewFs,
|
||||
CommandHelp: commandHelp,
|
||||
Options: []fs.Option{{
|
||||
Name: "doi",
|
||||
Help: "The DOI or the doi.org URL.",
|
||||
Required: true,
|
||||
}, {
|
||||
Name: fs.ConfigProvider,
|
||||
Help: `DOI provider.
|
||||
|
||||
The DOI provider can be set when rclone does not automatically recognize a supported DOI provider.`,
|
||||
Examples: []fs.OptionExample{
|
||||
{
|
||||
Value: "auto",
|
||||
Help: "Auto-detect provider",
|
||||
},
|
||||
{
|
||||
Value: string(Zenodo),
|
||||
Help: "Zenodo",
|
||||
}, {
|
||||
Value: string(Dataverse),
|
||||
Help: "Dataverse",
|
||||
}, {
|
||||
Value: string(Invenio),
|
||||
Help: "Invenio",
|
||||
}},
|
||||
Required: false,
|
||||
Advanced: true,
|
||||
}},
|
||||
}
|
||||
fs.Register(fsi)
|
||||
}
|
||||
|
||||
// Provider defines the type of provider hosting the DOI
|
||||
type Provider string
|
||||
|
||||
const (
|
||||
// Zenodo provider, see https://zenodo.org
|
||||
Zenodo Provider = "zenodo"
|
||||
// Dataverse provider, see https://dataverse.harvard.edu
|
||||
Dataverse Provider = "dataverse"
|
||||
// Invenio provider, see https://inveniordm.docs.cern.ch
|
||||
Invenio Provider = "invenio"
|
||||
)
|
||||
|
||||
// Options defines the configuration for this backend
|
||||
type Options struct {
|
||||
Doi string `config:"doi"` // The DOI, a digital identifier of an object, usually a dataset
|
||||
Provider string `config:"provider"` // The DOI provider
|
||||
}
|
||||
|
||||
// Fs stores the interface to the remote HTTP files
|
||||
type Fs struct {
|
||||
name string // name of this remote
|
||||
root string // the path we are working on
|
||||
provider Provider // the DOI provider
|
||||
features *fs.Features // optional features
|
||||
opt Options // options for this backend
|
||||
ci *fs.ConfigInfo // global config
|
||||
endpoint *url.URL // the main API endpoint for this remote
|
||||
endpointURL string // endpoint as a string
|
||||
srv *rest.Client // the connection to the server
|
||||
pacer *fs.Pacer // pacer for API calls
|
||||
cache *cache.Cache // a cache for the remote metadata
|
||||
}
|
||||
|
||||
// Object is a remote object that has been stat'd (so it exists, but is not necessarily open for reading)
|
||||
type Object struct {
|
||||
fs *Fs // what this object is part of
|
||||
remote string // the remote path
|
||||
contentURL string // the URL where the contents of the file can be downloaded
|
||||
size int64 // size of the object
|
||||
modTime time.Time // modification time of the object
|
||||
contentType string // content type of the object
|
||||
md5 string // MD5 hash of the object content
|
||||
}
|
||||
|
||||
// Parse the input string as a DOI
|
||||
// Examples:
|
||||
// 10.1000/182 -> 10.1000/182
|
||||
// https://doi.org/10.1000/182 -> 10.1000/182
|
||||
// doi:10.1000/182 -> 10.1000/182
|
||||
func parseDoi(doi string) string {
|
||||
doiURL, err := url.Parse(doi)
|
||||
if err != nil {
|
||||
return doi
|
||||
}
|
||||
if doiURL.Scheme == "doi" {
|
||||
return strings.TrimLeft(strings.TrimPrefix(doi, "doi:"), "/")
|
||||
}
|
||||
if strings.HasSuffix(doiURL.Hostname(), "doi.org") {
|
||||
return strings.TrimLeft(doiURL.Path, "/")
|
||||
}
|
||||
return doi
|
||||
}
|
||||
|
||||
// Resolve a DOI to a URL
|
||||
// Reference: https://www.doi.org/the-identifier/resources/factsheets/doi-resolution-documentation
|
||||
func resolveDoiURL(ctx context.Context, srv *rest.Client, pacer *fs.Pacer, opt *Options) (doiURL *url.URL, err error) {
|
||||
var result api.DoiResolverResponse
|
||||
params := url.Values{}
|
||||
params.Add("index", "1")
|
||||
opts := rest.Opts{
|
||||
Method: "GET",
|
||||
RootURL: doiResolverAPIURL,
|
||||
Path: "/handles/" + opt.Doi,
|
||||
Parameters: params,
|
||||
}
|
||||
err = pacer.Call(func() (bool, error) {
|
||||
res, err := srv.CallJSON(ctx, &opts, nil, &result)
|
||||
return shouldRetry(ctx, res, err)
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if result.ResponseCode != 1 {
|
||||
return nil, fmt.Errorf("could not resolve DOI (error code %d)", result.ResponseCode)
|
||||
}
|
||||
resolvedURLStr := ""
|
||||
for _, value := range result.Values {
|
||||
if value.Type == "URL" && value.Data.Format == "string" {
|
||||
valueStr, ok := value.Data.Value.(string)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("could not resolve DOI (incorrect response format)")
|
||||
}
|
||||
resolvedURLStr = valueStr
|
||||
}
|
||||
}
|
||||
resolvedURL, err := url.Parse(resolvedURLStr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return resolvedURL, nil
|
||||
}
|
||||
|
||||
// Resolve the passed configuration into a provider and enpoint
|
||||
func resolveEndpoint(ctx context.Context, srv *rest.Client, pacer *fs.Pacer, opt *Options) (provider Provider, endpoint *url.URL, err error) {
|
||||
resolvedURL, err := resolveDoiURL(ctx, srv, pacer, opt)
|
||||
if err != nil {
|
||||
return "", nil, err
|
||||
}
|
||||
|
||||
switch opt.Provider {
|
||||
case string(Dataverse):
|
||||
return resolveDataverseEndpoint(resolvedURL)
|
||||
case string(Invenio):
|
||||
return resolveInvenioEndpoint(ctx, srv, pacer, resolvedURL)
|
||||
case string(Zenodo):
|
||||
return resolveZenodoEndpoint(ctx, srv, pacer, resolvedURL, opt.Doi)
|
||||
}
|
||||
|
||||
hostname := strings.ToLower(resolvedURL.Hostname())
|
||||
if hostname == "dataverse.harvard.edu" || activateDataverse(resolvedURL) {
|
||||
return resolveDataverseEndpoint(resolvedURL)
|
||||
}
|
||||
if hostname == "zenodo.org" || strings.HasSuffix(hostname, ".zenodo.org") {
|
||||
return resolveZenodoEndpoint(ctx, srv, pacer, resolvedURL, opt.Doi)
|
||||
}
|
||||
if activateInvenio(ctx, srv, pacer, resolvedURL) {
|
||||
return resolveInvenioEndpoint(ctx, srv, pacer, resolvedURL)
|
||||
}
|
||||
|
||||
return "", nil, fmt.Errorf("provider '%s' is not supported", resolvedURL.Hostname())
|
||||
}
|
||||
|
||||
// Make the http connection from the passed options
|
||||
func (f *Fs) httpConnection(ctx context.Context, opt *Options) (isFile bool, err error) {
|
||||
provider, endpoint, err := resolveEndpoint(ctx, f.srv, f.pacer, opt)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
// Update f with the new parameters
|
||||
f.srv.SetRoot(endpoint.ResolveReference(&url.URL{Path: "/"}).String())
|
||||
f.endpoint = endpoint
|
||||
f.endpointURL = endpoint.String()
|
||||
f.provider = provider
|
||||
f.opt.Provider = string(provider)
|
||||
|
||||
// Determine if the root is a file
|
||||
switch f.provider {
|
||||
case Dataverse:
|
||||
entries, err := f.listDataverseDoiFiles(ctx)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
for _, entry := range entries {
|
||||
if entry.remote == f.root {
|
||||
isFile = true
|
||||
break
|
||||
}
|
||||
}
|
||||
case Invenio, Zenodo:
|
||||
isFile = f.root != ""
|
||||
}
|
||||
|
||||
return isFile, nil
|
||||
}
|
||||
|
||||
// retryErrorCodes is a slice of error codes that we will retry
|
||||
var retryErrorCodes = []int{
|
||||
429, // Too Many Requests.
|
||||
500, // Internal Server Error
|
||||
502, // Bad Gateway
|
||||
503, // Service Unavailable
|
||||
504, // Gateway Timeout
|
||||
509, // Bandwidth Limit Exceeded
|
||||
}
|
||||
|
||||
// shouldRetry returns a boolean as to whether this resp and err
|
||||
// deserve to be retried. It returns the err as a convenience
|
||||
func shouldRetry(ctx context.Context, res *http.Response, err error) (bool, error) {
|
||||
if fserrors.ContextError(ctx, &err) {
|
||||
return false, err
|
||||
}
|
||||
return fserrors.ShouldRetry(err) || fserrors.ShouldRetryHTTP(res, retryErrorCodes), err
|
||||
}
|
||||
|
||||
// NewFs creates a new Fs object from the name and root. It connects to
|
||||
// the host specified in the config file.
|
||||
func NewFs(ctx context.Context, name, root string, m configmap.Mapper) (fs.Fs, error) {
|
||||
root = strings.Trim(root, "/")
|
||||
|
||||
// Parse config into Options struct
|
||||
opt := new(Options)
|
||||
err := configstruct.Set(m, opt)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
opt.Doi = parseDoi(opt.Doi)
|
||||
|
||||
client := fshttp.NewClient(ctx)
|
||||
ci := fs.GetConfig(ctx)
|
||||
f := &Fs{
|
||||
name: name,
|
||||
root: root,
|
||||
opt: *opt,
|
||||
ci: ci,
|
||||
srv: rest.NewClient(client),
|
||||
pacer: fs.NewPacer(ctx, pacer.NewDefault(pacer.MinSleep(minSleep), pacer.MaxSleep(maxSleep), pacer.DecayConstant(decayConstant))),
|
||||
cache: cache.New(),
|
||||
}
|
||||
f.features = (&fs.Features{
|
||||
CanHaveEmptyDirectories: true,
|
||||
}).Fill(ctx, f)
|
||||
|
||||
isFile, err := f.httpConnection(ctx, opt)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if isFile {
|
||||
// return an error with an fs which points to the parent
|
||||
newRoot := path.Dir(f.root)
|
||||
if newRoot == "." {
|
||||
newRoot = ""
|
||||
}
|
||||
f.root = newRoot
|
||||
return f, fs.ErrorIsFile
|
||||
}
|
||||
|
||||
return f, nil
|
||||
}
|
||||
|
||||
// Name returns the configured name of the file system
|
||||
func (f *Fs) Name() string {
|
||||
return f.name
|
||||
}
|
||||
|
||||
// Root returns the root for the filesystem
|
||||
func (f *Fs) Root() string {
|
||||
return f.root
|
||||
}
|
||||
|
||||
// String returns the URL for the filesystem
|
||||
func (f *Fs) String() string {
|
||||
return fmt.Sprintf("DOI %s", f.opt.Doi)
|
||||
}
|
||||
|
||||
// Features returns the optional features of this Fs
|
||||
func (f *Fs) Features() *fs.Features {
|
||||
return f.features
|
||||
}
|
||||
|
||||
// Precision is the remote http file system's modtime precision, which we have no way of knowing. We estimate at 1s
|
||||
func (f *Fs) Precision() time.Duration {
|
||||
return time.Second
|
||||
}
|
||||
|
||||
// Hashes returns hash.HashNone to indicate remote hashing is unavailable
|
||||
func (f *Fs) Hashes() hash.Set {
|
||||
return hash.Set(hash.MD5)
|
||||
// return hash.Set(hash.None)
|
||||
}
|
||||
|
||||
// Mkdir makes the root directory of the Fs object
|
||||
func (f *Fs) Mkdir(ctx context.Context, dir string) error {
|
||||
return errorReadOnly
|
||||
}
|
||||
|
||||
// Remove a remote http file object
|
||||
func (o *Object) Remove(ctx context.Context) error {
|
||||
return errorReadOnly
|
||||
}
|
||||
|
||||
// Rmdir removes the root directory of the Fs object
|
||||
func (f *Fs) Rmdir(ctx context.Context, dir string) error {
|
||||
return errorReadOnly
|
||||
}
|
||||
|
||||
// NewObject creates a new remote http file object
|
||||
func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) {
|
||||
var entries []*Object
|
||||
var err error
|
||||
switch f.provider {
|
||||
case Dataverse:
|
||||
entries, err = f.listDataverseDoiFiles(ctx)
|
||||
case Invenio, Zenodo:
|
||||
entries, err = f.listInvevioDoiFiles(ctx)
|
||||
default:
|
||||
err = fmt.Errorf("provider type '%s' not supported", f.provider)
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, entry := range entries {
|
||||
if entry.Remote() == remote {
|
||||
return entry, nil
|
||||
}
|
||||
}
|
||||
|
||||
return nil, fs.ErrorObjectNotFound
|
||||
}
|
||||
|
||||
// List the objects and directories in dir into entries. The
|
||||
// entries can be returned in any order but should be for a
|
||||
// complete directory.
|
||||
//
|
||||
// dir should be "" to list the root, and should not have
|
||||
// trailing slashes.
|
||||
//
|
||||
// This should return ErrDirNotFound if the directory isn't
|
||||
// found.
|
||||
func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err error) {
|
||||
switch f.provider {
|
||||
case Dataverse:
|
||||
return f.listDataverse(ctx, dir)
|
||||
case Invenio, Zenodo:
|
||||
return f.listInvenio(ctx, dir)
|
||||
default:
|
||||
return nil, fmt.Errorf("provider type '%s' not supported", f.provider)
|
||||
}
|
||||
}
|
||||
|
||||
// Put in to the remote path with the modTime given of the given size
|
||||
//
|
||||
// May create the object even if it returns an error - if so
|
||||
// will return the object and the error, otherwise will return
|
||||
// nil and the error
|
||||
func (f *Fs) Put(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) {
|
||||
return nil, errorReadOnly
|
||||
}
|
||||
|
||||
// PutStream uploads to the remote path with the modTime given of indeterminate size
|
||||
func (f *Fs) PutStream(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) {
|
||||
return nil, errorReadOnly
|
||||
}
|
||||
|
||||
// Fs is the filesystem this remote http file object is located within
|
||||
func (o *Object) Fs() fs.Info {
|
||||
return o.fs
|
||||
}
|
||||
|
||||
// String returns the URL to the remote HTTP file
|
||||
func (o *Object) String() string {
|
||||
if o == nil {
|
||||
return "<nil>"
|
||||
}
|
||||
return o.remote
|
||||
}
|
||||
|
||||
// Remote the name of the remote HTTP file, relative to the fs root
|
||||
func (o *Object) Remote() string {
|
||||
return o.remote
|
||||
}
|
||||
|
||||
// Hash returns "" since HTTP (in Go or OpenSSH) doesn't support remote calculation of hashes
|
||||
func (o *Object) Hash(ctx context.Context, t hash.Type) (string, error) {
|
||||
if t != hash.MD5 {
|
||||
return "", hash.ErrUnsupported
|
||||
}
|
||||
return o.md5, nil
|
||||
}
|
||||
|
||||
// Size returns the size in bytes of the remote http file
|
||||
func (o *Object) Size() int64 {
|
||||
return o.size
|
||||
}
|
||||
|
||||
// ModTime returns the modification time of the remote http file
|
||||
func (o *Object) ModTime(ctx context.Context) time.Time {
|
||||
return o.modTime
|
||||
}
|
||||
|
||||
// SetModTime sets the modification and access time to the specified time
|
||||
//
|
||||
// it also updates the info field
|
||||
func (o *Object) SetModTime(ctx context.Context, modTime time.Time) error {
|
||||
return errorReadOnly
|
||||
}
|
||||
|
||||
// Storable returns whether the remote http file is a regular file (not a directory, symbolic link, block device, character device, named pipe, etc.)
|
||||
func (o *Object) Storable() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
// Open a remote http file object for reading. Seek is supported
|
||||
func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (in io.ReadCloser, err error) {
|
||||
fs.FixRangeOption(options, o.size)
|
||||
opts := rest.Opts{
|
||||
Method: "GET",
|
||||
RootURL: o.contentURL,
|
||||
Options: options,
|
||||
}
|
||||
var res *http.Response
|
||||
err = o.fs.pacer.Call(func() (bool, error) {
|
||||
res, err = o.fs.srv.Call(ctx, &opts)
|
||||
return shouldRetry(ctx, res, err)
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Open failed: %w", err)
|
||||
}
|
||||
|
||||
// Handle non-compliant redirects
|
||||
if res.Header.Get("Location") != "" {
|
||||
newURL, err := res.Location()
|
||||
if err == nil {
|
||||
opts.RootURL = newURL.String()
|
||||
err = o.fs.pacer.Call(func() (bool, error) {
|
||||
res, err = o.fs.srv.Call(ctx, &opts)
|
||||
return shouldRetry(ctx, res, err)
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Open failed: %w", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return res.Body, nil
|
||||
}
|
||||
|
||||
// Update in to the object with the modTime given of the given size
|
||||
func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) error {
|
||||
return errorReadOnly
|
||||
}
|
||||
|
||||
// MimeType of an Object if known, "" otherwise
|
||||
func (o *Object) MimeType(ctx context.Context) string {
|
||||
return o.contentType
|
||||
}
|
||||
|
||||
var commandHelp = []fs.CommandHelp{{
|
||||
Name: "metadata",
|
||||
Short: "Show metadata about the DOI.",
|
||||
Long: `This command returns a JSON object with some information about the DOI.
|
||||
|
||||
rclone backend medatadata doi:
|
||||
|
||||
It returns a JSON object representing metadata about the DOI.
|
||||
`,
|
||||
}, {
|
||||
Name: "set",
|
||||
Short: "Set command for updating the config parameters.",
|
||||
Long: `This set command can be used to update the config parameters
|
||||
for a running doi backend.
|
||||
|
||||
Usage Examples:
|
||||
|
||||
rclone backend set doi: [-o opt_name=opt_value] [-o opt_name2=opt_value2]
|
||||
rclone rc backend/command command=set fs=doi: [-o opt_name=opt_value] [-o opt_name2=opt_value2]
|
||||
rclone rc backend/command command=set fs=doi: -o doi=NEW_DOI
|
||||
|
||||
The option keys are named as they are in the config file.
|
||||
|
||||
This rebuilds the connection to the doi backend when it is called with
|
||||
the new parameters. Only new parameters need be passed as the values
|
||||
will default to those currently in use.
|
||||
|
||||
It doesn't return anything.
|
||||
`,
|
||||
}}
|
||||
|
||||
// Command the backend to run a named command
|
||||
//
|
||||
// The command run is name
|
||||
// args may be used to read arguments from
|
||||
// opts may be used to read optional arguments from
|
||||
//
|
||||
// The result should be capable of being JSON encoded
|
||||
// If it is a string or a []string it will be shown to the user
|
||||
// otherwise it will be JSON encoded and shown to the user like that
|
||||
func (f *Fs) Command(ctx context.Context, name string, arg []string, opt map[string]string) (out interface{}, err error) {
|
||||
switch name {
|
||||
case "metadata":
|
||||
return f.ShowMetadata(ctx)
|
||||
case "set":
|
||||
newOpt := f.opt
|
||||
err := configstruct.Set(configmap.Simple(opt), &newOpt)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("reading config: %w", err)
|
||||
}
|
||||
_, err = f.httpConnection(ctx, &newOpt)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("updating session: %w", err)
|
||||
}
|
||||
f.opt = newOpt
|
||||
keys := []string{}
|
||||
for k := range opt {
|
||||
keys = append(keys, k)
|
||||
}
|
||||
fs.Logf(f, "Updated config values: %s", strings.Join(keys, ", "))
|
||||
return nil, nil
|
||||
default:
|
||||
return nil, fs.ErrorCommandNotFound
|
||||
}
|
||||
}
|
||||
|
||||
// ShowMetadata returns some metadata about the corresponding DOI
|
||||
func (f *Fs) ShowMetadata(ctx context.Context) (metadata interface{}, err error) {
|
||||
doiURL, err := url.Parse("https://doi.org/" + f.opt.Doi)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
info := map[string]any{}
|
||||
info["DOI"] = f.opt.Doi
|
||||
info["URL"] = doiURL.String()
|
||||
info["metadataURL"] = f.endpointURL
|
||||
info["provider"] = f.provider
|
||||
return info, nil
|
||||
}
|
||||
|
||||
// Check the interfaces are satisfied
|
||||
var (
|
||||
_ fs.Fs = (*Fs)(nil)
|
||||
_ fs.PutStreamer = (*Fs)(nil)
|
||||
_ fs.Commander = (*Fs)(nil)
|
||||
_ fs.Object = (*Object)(nil)
|
||||
_ fs.MimeTyper = (*Object)(nil)
|
||||
)
|
34
backend/doi/doi_internal_test.go
Normal file
34
backend/doi/doi_internal_test.go
Normal file
@ -0,0 +1,34 @@
|
||||
package doi
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestParseDoi(t *testing.T) {
|
||||
// 10.1000/182 -> 10.1000/182
|
||||
doi := "10.1000/182"
|
||||
parsed := parseDoi(doi)
|
||||
assert.Equal(t, "10.1000/182", parsed)
|
||||
|
||||
// https://doi.org/10.1000/182 -> 10.1000/182
|
||||
doi = "https://doi.org/10.1000/182"
|
||||
parsed = parseDoi(doi)
|
||||
assert.Equal(t, "10.1000/182", parsed)
|
||||
|
||||
// https://dx.doi.org/10.1000/182 -> 10.1000/182
|
||||
doi = "https://dxdoi.org/10.1000/182"
|
||||
parsed = parseDoi(doi)
|
||||
assert.Equal(t, "10.1000/182", parsed)
|
||||
|
||||
// doi:10.1000/182 -> 10.1000/182
|
||||
doi = "doi:10.1000/182"
|
||||
parsed = parseDoi(doi)
|
||||
assert.Equal(t, "10.1000/182", parsed)
|
||||
|
||||
// doi://10.1000/182 -> 10.1000/182
|
||||
doi = "doi://10.1000/182"
|
||||
parsed = parseDoi(doi)
|
||||
assert.Equal(t, "10.1000/182", parsed)
|
||||
}
|
16
backend/doi/doi_test.go
Normal file
16
backend/doi/doi_test.go
Normal file
@ -0,0 +1,16 @@
|
||||
// Test DOI filesystem interface
|
||||
package doi
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/rclone/rclone/fstest/fstests"
|
||||
)
|
||||
|
||||
// TestIntegration runs integration tests against the remote
|
||||
func TestIntegration(t *testing.T) {
|
||||
fstests.Run(t, &fstests.Opt{
|
||||
RemoteName: "TestDoi:",
|
||||
NilObject: (*Object)(nil),
|
||||
})
|
||||
}
|
169
backend/doi/invenio.go
Normal file
169
backend/doi/invenio.go
Normal file
@ -0,0 +1,169 @@
|
||||
// Implementation for InvenioRDM
|
||||
|
||||
package doi
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/rclone/rclone/backend/doi/api"
|
||||
"github.com/rclone/rclone/fs"
|
||||
"github.com/rclone/rclone/lib/rest"
|
||||
)
|
||||
|
||||
var invenioRecordRegex = regexp.MustCompile(`\/records?\/(.+)`)
|
||||
|
||||
// Returns true if resolvedURL is likely a DOI hosted on an InvenioRDM intallation
|
||||
func activateInvenio(ctx context.Context, srv *rest.Client, pacer *fs.Pacer, resolvedURL *url.URL) (isActive bool) {
|
||||
_, _, err := resolveInvenioEndpoint(ctx, srv, pacer, resolvedURL)
|
||||
return err == nil
|
||||
}
|
||||
|
||||
// Resolve the main API endpoint for a DOI hosted on an InvenioRDM installation
|
||||
func resolveInvenioEndpoint(ctx context.Context, srv *rest.Client, pacer *fs.Pacer, resolvedURL *url.URL) (provider Provider, endpoint *url.URL, err error) {
|
||||
var res *http.Response
|
||||
opts := rest.Opts{
|
||||
Method: "GET",
|
||||
RootURL: resolvedURL.String(),
|
||||
}
|
||||
err = pacer.Call(func() (bool, error) {
|
||||
res, err = srv.Call(ctx, &opts)
|
||||
return shouldRetry(ctx, res, err)
|
||||
})
|
||||
if err != nil {
|
||||
return "", nil, err
|
||||
}
|
||||
|
||||
// First, attempt to grab the API URL from the headers
|
||||
var linksetURL *url.URL
|
||||
links := parseLinkHeader(res.Header.Get("Link"))
|
||||
for _, link := range links {
|
||||
if link.Rel == "linkset" && link.Type == "application/linkset+json" {
|
||||
parsed, err := url.Parse(link.Href)
|
||||
if err == nil {
|
||||
linksetURL = parsed
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if linksetURL != nil {
|
||||
endpoint, err = checkInvenioAPIURL(ctx, srv, pacer, linksetURL)
|
||||
if err == nil {
|
||||
return Invenio, endpoint, nil
|
||||
}
|
||||
fs.Logf(nil, "using linkset URL failed: %s", err.Error())
|
||||
}
|
||||
|
||||
// If there is no linkset header, try to grab the record ID from the URL
|
||||
recordID := ""
|
||||
resURL := res.Request.URL
|
||||
match := invenioRecordRegex.FindStringSubmatch(resURL.EscapedPath())
|
||||
if match != nil {
|
||||
recordID = match[1]
|
||||
guessedURL := res.Request.URL.ResolveReference(&url.URL{
|
||||
Path: "/api/records/" + recordID,
|
||||
})
|
||||
endpoint, err = checkInvenioAPIURL(ctx, srv, pacer, guessedURL)
|
||||
if err == nil {
|
||||
return Invenio, endpoint, nil
|
||||
}
|
||||
fs.Logf(nil, "guessing the URL failed: %s", err.Error())
|
||||
}
|
||||
|
||||
return "", nil, fmt.Errorf("could not resolve the Invenio API endpoint for '%s'", resolvedURL.String())
|
||||
}
|
||||
|
||||
func checkInvenioAPIURL(ctx context.Context, srv *rest.Client, pacer *fs.Pacer, resolvedURL *url.URL) (endpoint *url.URL, err error) {
|
||||
var result api.InvenioRecordResponse
|
||||
opts := rest.Opts{
|
||||
Method: "GET",
|
||||
RootURL: resolvedURL.String(),
|
||||
}
|
||||
err = pacer.Call(func() (bool, error) {
|
||||
res, err := srv.CallJSON(ctx, &opts, nil, &result)
|
||||
return shouldRetry(ctx, res, err)
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if result.Links.Self == "" {
|
||||
return nil, fmt.Errorf("could not parse API response from '%s'", resolvedURL.String())
|
||||
}
|
||||
return url.Parse(result.Links.Self)
|
||||
}
|
||||
|
||||
// Implements Fs.List() for Invenio
|
||||
func (f *Fs) listInvenio(ctx context.Context, dir string) (entries fs.DirEntries, err error) {
|
||||
if dir != "" {
|
||||
return nil, fs.ErrorDirNotFound
|
||||
}
|
||||
|
||||
fileEntries, err := f.listInvevioDoiFiles(ctx)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error listing %q: %w", dir, err)
|
||||
}
|
||||
for _, entry := range fileEntries {
|
||||
entries = append(entries, entry)
|
||||
}
|
||||
return entries, nil
|
||||
}
|
||||
|
||||
// List the files contained in the DOI
|
||||
func (f *Fs) listInvevioDoiFiles(ctx context.Context) (entries []*Object, err error) {
|
||||
// Use the cache if populated
|
||||
cachedEntries, found := f.cache.GetMaybe("files")
|
||||
if found {
|
||||
parsedEntries, ok := cachedEntries.([]Object)
|
||||
if ok {
|
||||
for _, entry := range parsedEntries {
|
||||
newEntry := entry
|
||||
entries = append(entries, &newEntry)
|
||||
}
|
||||
return entries, nil
|
||||
}
|
||||
}
|
||||
|
||||
filesURL := f.endpoint.JoinPath("files")
|
||||
var result api.InvenioFilesResponse
|
||||
opts := rest.Opts{
|
||||
Method: "GET",
|
||||
Path: strings.TrimLeft(filesURL.EscapedPath(), "/"),
|
||||
}
|
||||
err = f.pacer.Call(func() (bool, error) {
|
||||
res, err := f.srv.CallJSON(ctx, &opts, nil, &result)
|
||||
return shouldRetry(ctx, res, err)
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("readDir failed: %w", err)
|
||||
}
|
||||
for _, file := range result.Entries {
|
||||
modTime, modTimeErr := time.Parse(time.RFC3339, file.Updated)
|
||||
if modTimeErr != nil {
|
||||
fs.Logf(f, "error: could not parse last update time %v", modTimeErr)
|
||||
modTime = timeUnset
|
||||
}
|
||||
entry := &Object{
|
||||
fs: f,
|
||||
remote: file.Key,
|
||||
contentURL: file.Links.Content,
|
||||
size: file.Size,
|
||||
modTime: modTime,
|
||||
contentType: file.MimeType,
|
||||
md5: strings.TrimPrefix(file.Checksum, "md5:"),
|
||||
}
|
||||
entries = append(entries, entry)
|
||||
}
|
||||
// Populate the cache
|
||||
cacheEntries := []Object{}
|
||||
for _, entry := range entries {
|
||||
cacheEntries = append(cacheEntries, *entry)
|
||||
}
|
||||
f.cache.Put("files", cacheEntries)
|
||||
return entries, nil
|
||||
}
|
75
backend/doi/link_header.go
Normal file
75
backend/doi/link_header.go
Normal file
@ -0,0 +1,75 @@
|
||||
package doi
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
||||
var linkRegex = regexp.MustCompile(`^<(.+)>$`)
|
||||
var valueRegex = regexp.MustCompile(`^"(.+)"$`)
|
||||
|
||||
// headerLink represents a link as presented in HTTP headers
|
||||
// MDN Reference: https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/Link
|
||||
type headerLink struct {
|
||||
Href string
|
||||
Rel string
|
||||
Type string
|
||||
Extras map[string]string
|
||||
}
|
||||
|
||||
func parseLinkHeader(header string) (links []headerLink) {
|
||||
for _, link := range strings.Split(header, ",") {
|
||||
link = strings.TrimSpace(link)
|
||||
parsed := parseLink(link)
|
||||
if parsed != nil {
|
||||
links = append(links, *parsed)
|
||||
}
|
||||
}
|
||||
return links
|
||||
}
|
||||
|
||||
func parseLink(link string) (parsedLink *headerLink) {
|
||||
var parts []string
|
||||
for _, part := range strings.Split(link, ";") {
|
||||
parts = append(parts, strings.TrimSpace(part))
|
||||
}
|
||||
|
||||
match := linkRegex.FindStringSubmatch(parts[0])
|
||||
if match == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
result := &headerLink{
|
||||
Href: match[1],
|
||||
Extras: map[string]string{},
|
||||
}
|
||||
|
||||
for _, keyValue := range parts[1:] {
|
||||
parsed := parseKeyValue(keyValue)
|
||||
if parsed != nil {
|
||||
key, value := parsed[0], parsed[1]
|
||||
switch strings.ToLower(key) {
|
||||
case "rel":
|
||||
result.Rel = value
|
||||
case "type":
|
||||
result.Type = value
|
||||
default:
|
||||
result.Extras[key] = value
|
||||
}
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func parseKeyValue(keyValue string) []string {
|
||||
parts := strings.SplitN(keyValue, "=", 2)
|
||||
if parts[0] == "" || len(parts) < 2 {
|
||||
return nil
|
||||
}
|
||||
match := valueRegex.FindStringSubmatch(parts[1])
|
||||
if match != nil {
|
||||
parts[1] = match[1]
|
||||
return parts
|
||||
}
|
||||
return parts
|
||||
}
|
44
backend/doi/link_header_internal_test.go
Normal file
44
backend/doi/link_header_internal_test.go
Normal file
@ -0,0 +1,44 @@
|
||||
package doi
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestParseLinkHeader(t *testing.T) {
|
||||
header := "<https://zenodo.org/api/records/15063252> ; rel=\"linkset\" ; type=\"application/linkset+json\""
|
||||
links := parseLinkHeader(header)
|
||||
expected := headerLink{
|
||||
Href: "https://zenodo.org/api/records/15063252",
|
||||
Rel: "linkset",
|
||||
Type: "application/linkset+json",
|
||||
Extras: map[string]string{},
|
||||
}
|
||||
assert.Contains(t, links, expected)
|
||||
|
||||
header = "<https://api.example.com/issues?page=2>; rel=\"prev\", <https://api.example.com/issues?page=4>; rel=\"next\", <https://api.example.com/issues?page=10>; rel=\"last\", <https://api.example.com/issues?page=1>; rel=\"first\""
|
||||
links = parseLinkHeader(header)
|
||||
expectedList := []headerLink{{
|
||||
Href: "https://api.example.com/issues?page=2",
|
||||
Rel: "prev",
|
||||
Type: "",
|
||||
Extras: map[string]string{},
|
||||
}, {
|
||||
Href: "https://api.example.com/issues?page=4",
|
||||
Rel: "next",
|
||||
Type: "",
|
||||
Extras: map[string]string{},
|
||||
}, {
|
||||
Href: "https://api.example.com/issues?page=10",
|
||||
Rel: "last",
|
||||
Type: "",
|
||||
Extras: map[string]string{},
|
||||
}, {
|
||||
Href: "https://api.example.com/issues?page=1",
|
||||
Rel: "first",
|
||||
Type: "",
|
||||
Extras: map[string]string{},
|
||||
}}
|
||||
assert.Equal(t, links, expectedList)
|
||||
}
|
47
backend/doi/zenodo.go
Normal file
47
backend/doi/zenodo.go
Normal file
@ -0,0 +1,47 @@
|
||||
// Implementation for Zenodo
|
||||
|
||||
package doi
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/url"
|
||||
"regexp"
|
||||
|
||||
"github.com/rclone/rclone/backend/doi/api"
|
||||
"github.com/rclone/rclone/fs"
|
||||
"github.com/rclone/rclone/lib/rest"
|
||||
)
|
||||
|
||||
var zenodoRecordRegex = regexp.MustCompile(`zenodo[.](.+)`)
|
||||
|
||||
// Resolve the main API endpoint for a DOI hosted on Zenodo
|
||||
func resolveZenodoEndpoint(ctx context.Context, srv *rest.Client, pacer *fs.Pacer, resolvedURL *url.URL, doi string) (provider Provider, endpoint *url.URL, err error) {
|
||||
match := zenodoRecordRegex.FindStringSubmatch(doi)
|
||||
if match == nil {
|
||||
return "", nil, fmt.Errorf("could not derive API endpoint URL from '%s'", resolvedURL.String())
|
||||
}
|
||||
|
||||
recordID := match[1]
|
||||
endpointURL := resolvedURL.ResolveReference(&url.URL{Path: "/api/records/" + recordID})
|
||||
|
||||
var result api.InvenioRecordResponse
|
||||
opts := rest.Opts{
|
||||
Method: "GET",
|
||||
RootURL: endpointURL.String(),
|
||||
}
|
||||
err = pacer.Call(func() (bool, error) {
|
||||
res, err := srv.CallJSON(ctx, &opts, nil, &result)
|
||||
return shouldRetry(ctx, res, err)
|
||||
})
|
||||
if err != nil {
|
||||
return "", nil, err
|
||||
}
|
||||
|
||||
endpointURL, err = url.Parse(result.Links.Self)
|
||||
if err != nil {
|
||||
return "", nil, err
|
||||
}
|
||||
|
||||
return Zenodo, endpointURL, nil
|
||||
}
|
161
docs/content/doi.md
Normal file
161
docs/content/doi.md
Normal file
@ -0,0 +1,161 @@
|
||||
---
|
||||
title: "DOI"
|
||||
description: "Rclone docs for DOI"
|
||||
versionIntroduced: "?"
|
||||
---
|
||||
|
||||
# {{< icon "fa fa-building-columns" >}} DOI
|
||||
|
||||
The DOI remote is a read only remote for reading files from digital object identifiers (DOI).
|
||||
|
||||
Currently, the DOI backend supports supports DOIs hosted with:
|
||||
- [InvenioRDM](https://inveniosoftware.org/products/rdm/)
|
||||
- [Zenodo](https://zenodo.org)
|
||||
- [CaltechDATA](https://data.caltech.edu)
|
||||
- [Other InvenioRDM repositories](https://inveniosoftware.org/showcase/)
|
||||
- [Dataverse](https://dataverse.org)
|
||||
- [Harvard Dataverse](https://dataverse.harvard.edu)
|
||||
- [Other Dataverse repositories](https://dataverse.org/installations)
|
||||
|
||||
Paths are specified as `remote:path`
|
||||
|
||||
Paths may be as deep as required, e.g. `remote:directory/subdirectory`.
|
||||
|
||||
## Configuration
|
||||
|
||||
Here is an example of how to make a remote called `remote`. First run:
|
||||
|
||||
rclone config
|
||||
|
||||
This will guide you through an interactive setup process:
|
||||
|
||||
```
|
||||
No remotes found, make a new one?
|
||||
n) New remote
|
||||
s) Set configuration password
|
||||
q) Quit config
|
||||
n/s/q> n
|
||||
Enter name for new remote.
|
||||
name> remote
|
||||
Type of storage to configure.
|
||||
Choose a number from below, or type in your own value
|
||||
[snip]
|
||||
XX / DOI datasets
|
||||
\ (doi)
|
||||
[snip]
|
||||
Storage> doi
|
||||
Option doi.
|
||||
The DOI or the doi.org URL.
|
||||
Enter a value.
|
||||
doi> 10.5281/zenodo.5876941
|
||||
Edit advanced config?
|
||||
y) Yes
|
||||
n) No (default)
|
||||
y/n> n
|
||||
Configuration complete.
|
||||
Options:
|
||||
- type: doi
|
||||
- doi: 10.5281/zenodo.5876941
|
||||
Keep this "remote" remote?
|
||||
y) Yes this is OK (default)
|
||||
e) Edit this remote
|
||||
d) Delete this remote
|
||||
y/e/d> y
|
||||
```
|
||||
|
||||
{{< rem autogenerated options start" - DO NOT EDIT - instead edit fs.RegInfo in backend/doi/doi.go then run make backenddocs" >}}
|
||||
### Standard options
|
||||
|
||||
Here are the Standard options specific to doi (DOI datasets).
|
||||
|
||||
#### --doi-doi
|
||||
|
||||
The DOI or the doi.org URL.
|
||||
|
||||
Properties:
|
||||
|
||||
- Config: doi
|
||||
- Env Var: RCLONE_DOI_DOI
|
||||
- Type: string
|
||||
- Required: true
|
||||
|
||||
### Advanced options
|
||||
|
||||
Here are the Advanced options specific to doi (DOI datasets).
|
||||
|
||||
#### --doi-provider
|
||||
|
||||
DOI provider.
|
||||
|
||||
The DOI provider can be set when rclone does not automatically recognize a supported DOI provider.
|
||||
|
||||
Properties:
|
||||
|
||||
- Config: provider
|
||||
- Env Var: RCLONE_DOI_PROVIDER
|
||||
- Type: string
|
||||
- Required: false
|
||||
- Examples:
|
||||
- "auto"
|
||||
- Auto-detect provider
|
||||
- "zenodo"
|
||||
- Zenodo
|
||||
- "dataverse"
|
||||
- Dataverse
|
||||
- "invenio"
|
||||
- Invenio
|
||||
|
||||
## Backend commands
|
||||
|
||||
Here are the commands specific to the doi backend.
|
||||
|
||||
Run them with
|
||||
|
||||
rclone backend COMMAND remote:
|
||||
|
||||
The help below will explain what arguments each command takes.
|
||||
|
||||
See the [backend](/commands/rclone_backend/) command for more
|
||||
info on how to pass options and arguments.
|
||||
|
||||
These can be run on a running backend using the rc command
|
||||
[backend/command](/rc/#backend-command).
|
||||
|
||||
### metadata
|
||||
|
||||
Show metadata about the DOI.
|
||||
|
||||
rclone backend metadata remote: [options] [<arguments>+]
|
||||
|
||||
This command returns a JSON object with some information about the DOI.
|
||||
|
||||
rclone backend medatadata doi:
|
||||
|
||||
It returns a JSON object representing metadata about the DOI.
|
||||
|
||||
|
||||
### set
|
||||
|
||||
Set command for updating the config parameters.
|
||||
|
||||
rclone backend set remote: [options] [<arguments>+]
|
||||
|
||||
This set command can be used to update the config parameters
|
||||
for a running doi backend.
|
||||
|
||||
Usage Examples:
|
||||
|
||||
rclone backend set doi: [-o opt_name=opt_value] [-o opt_name2=opt_value2]
|
||||
rclone rc backend/command command=set fs=doi: [-o opt_name=opt_value] [-o opt_name2=opt_value2]
|
||||
rclone rc backend/command command=set fs=doi: -o doi=NEW_DOI
|
||||
|
||||
The option keys are named as they are in the config file.
|
||||
|
||||
This rebuilds the connection to the doi backend when it is called with
|
||||
the new parameters. Only new parameters need be passed as the values
|
||||
will default to those currently in use.
|
||||
|
||||
It doesn't return anything.
|
||||
|
||||
|
||||
{{< rem autogenerated options stop >}}
|
Loading…
x
Reference in New Issue
Block a user