2017-06-13 23:40:24 +03:00
|
|
|
package s3gof3r
|
|
|
|
|
|
|
|
import (
|
2021-09-04 11:29:06 +03:00
|
|
|
"context"
|
2017-06-13 23:40:24 +03:00
|
|
|
"encoding/xml"
|
|
|
|
"math"
|
|
|
|
"net/http"
|
|
|
|
"strconv"
|
|
|
|
"time"
|
2021-09-04 11:11:23 +03:00
|
|
|
|
2021-09-13 15:13:35 +03:00
|
|
|
"github.com/github/s3gof3r/internal/s3client"
|
2021-09-04 11:11:23 +03:00
|
|
|
"golang.org/x/sync/errgroup"
|
2017-06-13 23:40:24 +03:00
|
|
|
)
|
|
|
|
|
|
|
|
func newObjectLister(c *Config, b *Bucket, prefixes []string, maxKeys int) (*ObjectLister, error) {
|
2021-09-04 08:42:41 +03:00
|
|
|
cCopy := *c
|
|
|
|
cCopy.NTry = max(c.NTry, 1)
|
|
|
|
cCopy.Concurrency = max(c.Concurrency, 1)
|
|
|
|
|
|
|
|
bCopy := *b
|
|
|
|
|
2021-09-04 11:48:52 +03:00
|
|
|
ctx, cancel := context.WithCancel(context.TODO())
|
|
|
|
|
2021-09-04 08:42:41 +03:00
|
|
|
l := ObjectLister{
|
2021-09-04 11:50:26 +03:00
|
|
|
cancel: cancel,
|
2021-09-08 17:18:51 +03:00
|
|
|
b: &bCopy,
|
|
|
|
c: &cCopy,
|
2021-09-08 17:23:56 +03:00
|
|
|
prefixCh: make(chan string, len(prefixes)),
|
2021-09-08 17:18:51 +03:00
|
|
|
resultCh: make(chan []string, 1),
|
|
|
|
maxKeys: maxKeys,
|
2021-09-04 08:42:41 +03:00
|
|
|
}
|
2017-06-13 23:40:24 +03:00
|
|
|
|
2021-09-08 17:23:56 +03:00
|
|
|
// Enqueue all of the prefixes that we were given. This won't
|
|
|
|
// block because we have initialized `prefixCh` to be long enough
|
|
|
|
// to hold all of them. This has the added benefit that there is
|
|
|
|
// no data race if the caller happens to modify the contents of
|
|
|
|
// the slice after this call returns.
|
|
|
|
for _, p := range prefixes {
|
|
|
|
l.prefixCh <- p
|
|
|
|
}
|
|
|
|
close(l.prefixCh)
|
|
|
|
|
2021-09-04 14:57:29 +03:00
|
|
|
eg, ctx := errgroup.WithContext(ctx)
|
2021-09-08 17:28:07 +03:00
|
|
|
|
2021-09-08 17:31:41 +03:00
|
|
|
for i := 0; i < min(l.c.Concurrency, len(prefixes)); i++ {
|
2021-09-04 11:11:23 +03:00
|
|
|
eg.Go(func() error {
|
2021-09-04 14:57:29 +03:00
|
|
|
return l.worker(ctx)
|
2021-09-04 11:11:23 +03:00
|
|
|
})
|
2017-06-13 23:40:24 +03:00
|
|
|
}
|
2021-09-08 17:23:56 +03:00
|
|
|
|
|
|
|
go func() {
|
2021-09-04 14:57:29 +03:00
|
|
|
l.finalErr = eg.Wait()
|
2021-09-08 17:23:56 +03:00
|
|
|
close(l.resultCh)
|
2021-09-04 12:02:19 +03:00
|
|
|
l.cancel()
|
2021-09-08 17:23:56 +03:00
|
|
|
}()
|
2017-06-13 23:40:24 +03:00
|
|
|
|
2021-09-04 08:42:41 +03:00
|
|
|
return &l, nil
|
2017-06-13 23:40:24 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
type ObjectLister struct {
|
2021-09-04 11:50:26 +03:00
|
|
|
cancel context.CancelFunc
|
2021-09-04 11:48:52 +03:00
|
|
|
|
2021-09-04 08:56:58 +03:00
|
|
|
b *Bucket
|
|
|
|
c *Config
|
|
|
|
maxKeys int
|
2017-06-13 23:40:24 +03:00
|
|
|
|
2021-09-08 17:18:51 +03:00
|
|
|
prefixCh chan string
|
|
|
|
resultCh chan []string
|
2021-09-04 14:48:19 +03:00
|
|
|
|
2021-09-04 14:57:29 +03:00
|
|
|
// finalErr is set before closing `resultCh` if any of the workers
|
|
|
|
// returned errors. Any subsequent calls to `Next()` report this
|
|
|
|
// error.
|
|
|
|
finalErr error
|
|
|
|
|
2021-09-04 14:48:19 +03:00
|
|
|
// currentValue and currentErr are the "results" of the most
|
|
|
|
// recent call to `Next()`.
|
|
|
|
currentValue []string
|
|
|
|
currentErr error
|
2017-06-13 23:40:24 +03:00
|
|
|
}
|
|
|
|
|
2021-09-04 14:57:29 +03:00
|
|
|
func (l *ObjectLister) worker(ctx context.Context) error {
|
2021-09-08 17:18:51 +03:00
|
|
|
for p := range l.prefixCh {
|
2017-06-13 23:40:24 +03:00
|
|
|
var continuation string
|
|
|
|
retries:
|
|
|
|
for {
|
2021-09-04 15:31:44 +03:00
|
|
|
res, err := l.retryListObjects(ctx, p, continuation)
|
2017-06-13 23:40:24 +03:00
|
|
|
if err != nil {
|
|
|
|
select {
|
2021-09-04 11:29:06 +03:00
|
|
|
case <-ctx.Done():
|
2021-09-04 14:57:29 +03:00
|
|
|
return ctx.Err()
|
2017-06-13 23:40:24 +03:00
|
|
|
default:
|
2021-09-04 14:57:29 +03:00
|
|
|
return err
|
2017-06-13 23:40:24 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
keys := make([]string, 0, len(res.Contents))
|
|
|
|
for _, c := range res.Contents {
|
|
|
|
keys = append(keys, c.Key)
|
|
|
|
}
|
|
|
|
|
|
|
|
select {
|
2021-09-04 11:29:06 +03:00
|
|
|
case <-ctx.Done():
|
2021-09-04 14:57:29 +03:00
|
|
|
return ctx.Err()
|
2021-09-08 17:18:51 +03:00
|
|
|
case l.resultCh <- keys:
|
2017-06-13 23:40:24 +03:00
|
|
|
continuation = res.NextContinuationToken
|
|
|
|
if continuation != "" {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// Break from this prefix and grab the next one
|
|
|
|
break retries
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-09-04 14:57:29 +03:00
|
|
|
|
|
|
|
return nil
|
2017-06-13 23:40:24 +03:00
|
|
|
}
|
|
|
|
|
2021-09-04 15:31:44 +03:00
|
|
|
func (l *ObjectLister) retryListObjects(
|
|
|
|
ctx context.Context, p, continuation string,
|
|
|
|
) (*listBucketResult, error) {
|
2017-06-13 23:40:24 +03:00
|
|
|
var err error
|
|
|
|
var res *listBucketResult
|
2021-09-04 15:31:44 +03:00
|
|
|
var timer *time.Timer
|
2017-06-13 23:40:24 +03:00
|
|
|
for i := 0; i < l.c.NTry; i++ {
|
|
|
|
opts := listObjectsOptions{MaxKeys: l.maxKeys, Prefix: p, ContinuationToken: continuation}
|
|
|
|
res, err = listObjects(l.c, l.b, opts)
|
|
|
|
if err == nil {
|
|
|
|
return res, nil
|
|
|
|
}
|
|
|
|
|
2021-09-04 15:31:44 +03:00
|
|
|
// Exponential back-off, reusing the timer if possible:
|
|
|
|
duration := time.Duration(math.Exp2(float64(i))) * 100 * time.Millisecond
|
|
|
|
if timer == nil {
|
|
|
|
timer = time.NewTimer(duration)
|
|
|
|
} else {
|
2021-09-11 08:57:40 +03:00
|
|
|
// The only way to get here is if the timer was created
|
|
|
|
// during an earlier iteration of the loop, in which case
|
|
|
|
// the select below must have gone through the `<-timer.C`
|
|
|
|
// branch, which drained the timer. So it is safe to call
|
|
|
|
// `Reset()`:
|
2021-09-04 15:31:44 +03:00
|
|
|
timer.Reset(duration)
|
|
|
|
}
|
|
|
|
|
|
|
|
select {
|
|
|
|
case <-timer.C:
|
|
|
|
// Timer has fired and been drained, so it is ready for reuse.
|
|
|
|
case <-ctx.Done():
|
|
|
|
// Stop the timer to prevent a resource leak:
|
|
|
|
timer.Stop()
|
|
|
|
return nil, ctx.Err()
|
|
|
|
}
|
2017-06-13 23:40:24 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Next moves the iterator to the next set of results. It returns true if there
|
|
|
|
// are more results, or false if there are no more results or there was an
|
|
|
|
// error.
|
|
|
|
func (l *ObjectLister) Next() bool {
|
2021-09-04 14:57:29 +03:00
|
|
|
var ok bool
|
|
|
|
l.currentValue, ok = <-l.resultCh
|
|
|
|
if !ok {
|
|
|
|
// If there has been an error, we now show it to the caller:
|
|
|
|
l.currentErr = l.finalErr
|
2017-06-13 23:40:24 +03:00
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2021-09-04 14:57:29 +03:00
|
|
|
return true
|
2017-06-13 23:40:24 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
func (l *ObjectLister) Value() []string {
|
2021-09-04 14:48:19 +03:00
|
|
|
return l.currentValue
|
2017-06-13 23:40:24 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
func (l *ObjectLister) Error() error {
|
2021-09-04 14:48:19 +03:00
|
|
|
return l.currentErr
|
2017-06-13 23:40:24 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
func (l *ObjectLister) Close() {
|
2021-09-04 12:02:19 +03:00
|
|
|
l.cancel()
|
2017-06-13 23:40:24 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
// ListObjectsOptions specifies the options for a ListObjects operation on a S3
|
|
|
|
// bucket
|
|
|
|
type listObjectsOptions struct {
|
|
|
|
// Maximum number of keys to return per request
|
|
|
|
MaxKeys int
|
|
|
|
// Only list those keys that start with the given prefix
|
|
|
|
Prefix string
|
|
|
|
// Continuation token from the previous request
|
|
|
|
ContinuationToken string
|
|
|
|
}
|
|
|
|
|
|
|
|
type listBucketResult struct {
|
|
|
|
Name string `xml:"Name"`
|
|
|
|
Prefix string `xml:"Prefix"`
|
|
|
|
KeyCount int `xml:"KeyCount"`
|
|
|
|
MaxKeys int `xml:"MaxKeys"`
|
|
|
|
IsTruncated bool `xml:"IsTrucated"`
|
|
|
|
NextContinuationToken string `xml:"NextContinuationToken"`
|
|
|
|
Contents []listBucketContents `xml:"Contents"`
|
|
|
|
}
|
|
|
|
|
|
|
|
type listBucketContents struct {
|
|
|
|
Key string `xml:"Key"`
|
|
|
|
LastModified time.Time `xml:"LastModified"`
|
|
|
|
ETag string `xml:"ETag"`
|
|
|
|
Size int64 `xml:"Size"`
|
|
|
|
StorageClass string `xml:"StorageClass"`
|
|
|
|
CommonPrefixes []CommonPrefix `xml:"CommonPrefixes"`
|
|
|
|
}
|
|
|
|
|
|
|
|
type CommonPrefix struct {
|
|
|
|
Prefix string `xml:"Prefix"`
|
|
|
|
}
|
|
|
|
|
|
|
|
type ListObjectsResult struct {
|
|
|
|
result *listBucketResult
|
|
|
|
}
|
|
|
|
|
2021-09-06 18:48:00 +03:00
|
|
|
func listObjects(c *Config, b *Bucket, opts listObjectsOptions) (*listBucketResult, error) {
|
|
|
|
result := new(listBucketResult)
|
2017-06-13 23:40:24 +03:00
|
|
|
u, err := b.url("", c)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
q := u.Query()
|
|
|
|
q.Set("list-type", "2")
|
|
|
|
if opts.MaxKeys > 0 {
|
|
|
|
q.Set("max-keys", strconv.Itoa(opts.MaxKeys))
|
|
|
|
}
|
|
|
|
if opts.Prefix != "" {
|
|
|
|
q.Set("prefix", opts.Prefix)
|
|
|
|
}
|
|
|
|
if opts.ContinuationToken != "" {
|
|
|
|
q.Set("continuation-token", opts.ContinuationToken)
|
|
|
|
}
|
|
|
|
u.RawQuery = q.Encode()
|
|
|
|
|
|
|
|
r := http.Request{
|
|
|
|
Method: "GET",
|
|
|
|
URL: u,
|
|
|
|
}
|
|
|
|
b.Sign(&r)
|
|
|
|
|
|
|
|
resp, err := b.conf().Do(&r)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
if resp.StatusCode != 200 {
|
2021-09-13 15:13:35 +03:00
|
|
|
return nil, s3client.NewRespError(resp)
|
2017-06-13 23:40:24 +03:00
|
|
|
}
|
|
|
|
|
2021-09-06 18:48:00 +03:00
|
|
|
err = xml.NewDecoder(resp.Body).Decode(result)
|
|
|
|
closeErr := resp.Body.Close()
|
|
|
|
if err != nil {
|
2017-06-13 23:40:24 +03:00
|
|
|
return nil, err
|
|
|
|
}
|
2021-09-06 18:48:00 +03:00
|
|
|
if closeErr != nil {
|
|
|
|
return nil, closeErr
|
|
|
|
}
|
2017-06-13 23:40:24 +03:00
|
|
|
|
|
|
|
return result, nil
|
|
|
|
}
|