зеркало из https://github.com/Azure/blobxfer.git
Tag for 1.0.0a3 release
- Rename some options - Make thread join more robust on Python2
This commit is contained in:
Родитель
b7782619d1
Коммит
e1d97fa3cb
|
@ -2,7 +2,7 @@
|
|||
|
||||
## [Unreleased]
|
||||
|
||||
## [1.0.0a2] - 2017-06-02
|
||||
## [1.0.0a3] - 2017-06-02
|
||||
### Changed
|
||||
- From scratch rewrite providing a consistent CLI experience and a vast
|
||||
array of new and advanced features. Please see the
|
||||
|
@ -201,8 +201,8 @@ usage documentation carefully when upgrading from 0.12.1.
|
|||
`--no-skiponmatch`.
|
||||
- 0.8.2: performance regression fixes
|
||||
|
||||
[Unreleased]: https://github.com/Azure/blobxfer/compare/1.0.0a2...HEAD
|
||||
[1.0.0a2]: https://github.com/Azure/blobxfer/compare/0.12.1...1.0.0a2
|
||||
[Unreleased]: https://github.com/Azure/blobxfer/compare/1.0.0a3...HEAD
|
||||
[1.0.0a3]: https://github.com/Azure/blobxfer/compare/0.12.1...1.0.0a3
|
||||
[0.12.1]: https://github.com/Azure/blobxfer/compare/0.12.0...0.12.1
|
||||
[0.12.0]: https://github.com/Azure/blobxfer/compare/0.11.5...0.12.0
|
||||
[0.11.5]: https://github.com/Azure/blobxfer/compare/0.11.4...0.11.5
|
||||
|
|
|
@ -105,13 +105,14 @@ class Concurrency(object):
|
|||
"""Concurrency Options"""
|
||||
def __init__(
|
||||
self, crypto_processes, md5_processes, disk_threads,
|
||||
transfer_threads):
|
||||
transfer_threads, is_download=None):
|
||||
"""Ctor for Concurrency Options
|
||||
:param Concurrency self: this
|
||||
:param int crypto_processes: number of crypto procs
|
||||
:param int md5_processes: number of md5 procs
|
||||
:param int disk_threads: number of disk threads
|
||||
:param int transfer_threads: number of transfer threads
|
||||
:param bool is_download: download hint
|
||||
"""
|
||||
self.crypto_processes = crypto_processes
|
||||
self.md5_processes = md5_processes
|
||||
|
@ -131,6 +132,9 @@ class Concurrency(object):
|
|||
# cap maximum number of disk threads from cpu count to 64
|
||||
if self.disk_threads > 64:
|
||||
self.disk_threads = 64
|
||||
# for downloads, cap disk threads to lower value
|
||||
if is_download and self.disk_threads > 16:
|
||||
self.disk_threads = 16
|
||||
auto_disk = True
|
||||
if self.transfer_threads is None or self.transfer_threads < 1:
|
||||
if auto_disk:
|
||||
|
|
|
@ -430,7 +430,7 @@ class Downloader(object):
|
|||
if terminate:
|
||||
self._download_terminate = terminate
|
||||
for thr in self._disk_threads:
|
||||
thr.join()
|
||||
blobxfer.util.join_thread(thr)
|
||||
|
||||
def _wait_for_transfer_threads(self, terminate):
|
||||
# type: (Downloader, bool) -> None
|
||||
|
@ -441,7 +441,7 @@ class Downloader(object):
|
|||
if terminate:
|
||||
self._download_terminate = terminate
|
||||
for thr in self._transfer_threads:
|
||||
thr.join()
|
||||
blobxfer.util.join_thread(thr)
|
||||
|
||||
def _worker_thread_transfer(self):
|
||||
# type: (Downloader) -> None
|
||||
|
@ -452,7 +452,7 @@ class Downloader(object):
|
|||
while not self.termination_check:
|
||||
try:
|
||||
if len(self._disk_set) > max_set_len:
|
||||
time.sleep(0.2)
|
||||
time.sleep(0.1)
|
||||
continue
|
||||
else:
|
||||
dd = self._transfer_queue.get(block=False, timeout=0.1)
|
||||
|
@ -792,8 +792,8 @@ class Downloader(object):
|
|||
'KeyboardInterrupt detected, force terminating '
|
||||
'processes and threads (this may take a while)...')
|
||||
try:
|
||||
self._wait_for_transfer_threads(terminate=True)
|
||||
self._wait_for_disk_threads(terminate=True)
|
||||
self._wait_for_transfer_threads(terminate=True)
|
||||
finally:
|
||||
self._cleanup_temporary_files()
|
||||
raise
|
||||
|
|
|
@ -447,10 +447,10 @@ class Uploader(object):
|
|||
while not self.termination_check:
|
||||
try:
|
||||
if len(self._transfer_set) > max_set_len:
|
||||
time.sleep(0.2)
|
||||
time.sleep(0.1)
|
||||
continue
|
||||
else:
|
||||
ud = self._upload_queue.get(False, 0.1)
|
||||
ud = self._upload_queue.get(block=False, timeout=0.1)
|
||||
except queue.Empty:
|
||||
continue
|
||||
try:
|
||||
|
|
|
@ -124,6 +124,20 @@ def is_not_empty(obj):
|
|||
return obj is not None and len(obj) > 0
|
||||
|
||||
|
||||
def join_thread(thr):
|
||||
# type: (threading.Thread) -> None
|
||||
"""Join a thread
|
||||
:type threading.Thread thr: thread to join
|
||||
"""
|
||||
if on_python2():
|
||||
while True:
|
||||
thr.join(timeout=1)
|
||||
if not thr.isAlive():
|
||||
break
|
||||
else:
|
||||
thr.join()
|
||||
|
||||
|
||||
def merge_dict(dict1, dict2):
|
||||
# type: (dict, dict) -> dict
|
||||
"""Recursively merge dictionaries: dict2 on to dict1. This differs
|
||||
|
|
|
@ -22,4 +22,4 @@
|
|||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
# DEALINGS IN THE SOFTWARE.
|
||||
|
||||
__version__ = '1.0.0a2'
|
||||
__version__ = '1.0.0a3'
|
||||
|
|
42
cli/cli.py
42
cli/cli.py
|
@ -62,13 +62,15 @@ class CliContext(object):
|
|||
self.credentials = None
|
||||
self.general_options = None
|
||||
|
||||
def initialize(self):
|
||||
# type: (CliContext) -> None
|
||||
def initialize(self, action):
|
||||
# type: (CliContext, settings.TransferAction) -> None
|
||||
"""Initialize context
|
||||
:param CliContext self: this
|
||||
:param settings.TransferAction action: transfer action
|
||||
"""
|
||||
self._init_config()
|
||||
self.general_options = settings.create_general_options(self.config)
|
||||
self.general_options = settings.create_general_options(
|
||||
self.config, action)
|
||||
self.credentials = settings.create_azure_storage_credentials(
|
||||
self.config, self.general_options)
|
||||
|
||||
|
@ -164,7 +166,8 @@ def _log_file_option(f):
|
|||
'--log-file',
|
||||
expose_value=False,
|
||||
default=None,
|
||||
help='Log to file specified',
|
||||
help='Log to file specified; this must be specified for progress '
|
||||
'bar to show',
|
||||
callback=callback)(f)
|
||||
|
||||
|
||||
|
@ -191,7 +194,8 @@ def _progress_bar_option(f):
|
|||
'--progress-bar/--no-progress-bar',
|
||||
expose_value=False,
|
||||
default=True,
|
||||
help='Display progress bar instead of console logs [True]',
|
||||
help='Display progress bar instead of console logs; log file must '
|
||||
'be specified [True]',
|
||||
callback=callback)(f)
|
||||
|
||||
|
||||
|
@ -254,22 +258,22 @@ def _local_resource_option(f):
|
|||
clictx.cli_options['local_resource'] = value
|
||||
return value
|
||||
return click.option(
|
||||
'--local-resource',
|
||||
'--local-path',
|
||||
expose_value=False,
|
||||
help='Local resource; use - for stdin',
|
||||
help='Local path; use - for stdin',
|
||||
callback=callback)(f)
|
||||
|
||||
|
||||
def _storage_account_name_option(f):
|
||||
def _storage_account_option(f):
|
||||
def callback(ctx, param, value):
|
||||
clictx = ctx.ensure_object(CliContext)
|
||||
clictx.cli_options['storage_account'] = value
|
||||
return value
|
||||
return click.option(
|
||||
'--storage-account-name',
|
||||
'--storage-account',
|
||||
expose_value=False,
|
||||
help='Storage account name',
|
||||
envvar='BLOBXFER_STORAGE_ACCOUNT_NAME',
|
||||
envvar='BLOBXFER_STORAGE_ACCOUNT',
|
||||
callback=callback)(f)
|
||||
|
||||
|
||||
|
@ -301,7 +305,7 @@ def common_options(f):
|
|||
|
||||
def upload_download_options(f):
|
||||
f = _remote_path_option(f)
|
||||
f = _storage_account_name_option(f)
|
||||
f = _storage_account_option(f)
|
||||
f = _local_resource_option(f)
|
||||
return f
|
||||
|
||||
|
@ -633,16 +637,16 @@ def _sync_copy_dest_access_key_option(f):
|
|||
callback=callback)(f)
|
||||
|
||||
|
||||
def _sync_copy_dest_storage_account_name_option(f):
|
||||
def _sync_copy_dest_storage_account_option(f):
|
||||
def callback(ctx, param, value):
|
||||
clictx = ctx.ensure_object(CliContext)
|
||||
clictx.cli_options['sync_copy_dest_storage_account'] = value
|
||||
return value
|
||||
return click.option(
|
||||
'--sync-copy-dest-storage-account-name',
|
||||
'--sync-copy-dest-storage-account',
|
||||
expose_value=False,
|
||||
help='Storage account name for synccopy destination',
|
||||
envvar='BLOBXFER_SYNC_COPY_DEST_STORAGE_ACCOUNT_NAME',
|
||||
envvar='BLOBXFER_SYNC_COPY_DEST_STORAGE_ACCOUNT',
|
||||
callback=callback)(f)
|
||||
|
||||
|
||||
|
@ -721,11 +725,11 @@ def download_options(f):
|
|||
|
||||
|
||||
def sync_copy_options(f):
|
||||
f = _sync_copy_dest_storage_account_name_option(f)
|
||||
f = _sync_copy_dest_storage_account_option(f)
|
||||
f = _sync_copy_dest_sas_option(f)
|
||||
f = _sync_copy_dest_remote_path_option(f)
|
||||
f = _sync_copy_dest_access_key_option(f)
|
||||
f = _storage_account_name_option(f)
|
||||
f = _storage_account_option(f)
|
||||
f = _skip_on_md5_match_option(f)
|
||||
f = _skip_on_lmt_ge_option(f)
|
||||
f = _skip_on_filesize_match_option(f)
|
||||
|
@ -757,7 +761,7 @@ def cli(ctx):
|
|||
def download(ctx):
|
||||
"""Download blobs or files from Azure Storage"""
|
||||
settings.add_cli_options(ctx.cli_options, settings.TransferAction.Download)
|
||||
ctx.initialize()
|
||||
ctx.initialize(settings.TransferAction.Download)
|
||||
specs = settings.create_download_specifications(ctx.config)
|
||||
for spec in specs:
|
||||
blobxfer.api.Downloader(
|
||||
|
@ -773,7 +777,7 @@ def synccopy(ctx):
|
|||
"""Synchronously copy blobs between Azure Storage accounts"""
|
||||
raise NotImplementedError()
|
||||
settings.add_cli_options(ctx.cli_options, settings.TransferAction.Synccopy)
|
||||
ctx.initialize()
|
||||
ctx.initialize(settings.TransferAction.Synccopy)
|
||||
|
||||
|
||||
@cli.command('upload')
|
||||
|
@ -784,7 +788,7 @@ def synccopy(ctx):
|
|||
def upload(ctx):
|
||||
"""Upload files to Azure Storage"""
|
||||
settings.add_cli_options(ctx.cli_options, settings.TransferAction.Upload)
|
||||
ctx.initialize()
|
||||
ctx.initialize(settings.TransferAction.Upload)
|
||||
specs = settings.create_upload_specifications(ctx.config)
|
||||
for spec in specs:
|
||||
blobxfer.api.Uploader(
|
||||
|
|
|
@ -61,13 +61,13 @@ def add_cli_options(cli_options, action):
|
|||
if blobxfer.util.is_none_or_empty(local_resource):
|
||||
raise KeyError()
|
||||
except KeyError:
|
||||
raise ValueError('--local-resource must be specified')
|
||||
raise ValueError('--local-path must be specified')
|
||||
try:
|
||||
storage_account = cli_options['storage_account']
|
||||
if blobxfer.util.is_none_or_empty(storage_account):
|
||||
raise KeyError()
|
||||
except KeyError:
|
||||
raise ValueError('--storage-account-name must be specified')
|
||||
raise ValueError('--storage-account must be specified')
|
||||
try:
|
||||
remote_path = cli_options['remote_path']
|
||||
if blobxfer.util.is_none_or_empty(remote_path):
|
||||
|
@ -167,7 +167,7 @@ def add_cli_options(cli_options, action):
|
|||
raise KeyError()
|
||||
except KeyError:
|
||||
raise ValueError(
|
||||
'--sync-copy-dest-storage-account-name must be specified')
|
||||
'--sync-copy-dest-storage-account must be specified')
|
||||
try:
|
||||
sync_copy_dest_remote_path = \
|
||||
cli_options['sync_copy_dest_remote_path']
|
||||
|
@ -278,10 +278,11 @@ def create_azure_storage_credentials(config, general_options):
|
|||
return creds
|
||||
|
||||
|
||||
def create_general_options(config):
|
||||
# type: (dict) -> blobxfer.models.options.General
|
||||
def create_general_options(config, action):
|
||||
# type: (dict, TransferAction) -> blobxfer.models.options.General
|
||||
"""Create a General Options object from configuration
|
||||
:param dict config: config dict
|
||||
:param TransferAction action: transfer action
|
||||
:rtype: blobxfer.models.options.General
|
||||
:return: general options object
|
||||
"""
|
||||
|
@ -292,6 +293,7 @@ def create_general_options(config):
|
|||
disk_threads=conc.get('disk_threads', 0),
|
||||
md5_processes=conc.get('md5_processes', 0),
|
||||
transfer_threads=conc.get('transfer_threads', 0),
|
||||
is_download=action == TransferAction.Download,
|
||||
),
|
||||
log_file=config['options'].get('log_file', None),
|
||||
progress_bar=config['options'].get('progress_bar', True),
|
||||
|
|
|
@ -72,9 +72,10 @@ docker pull alfpark/blobxfer
|
|||
|
||||
## Troubleshooting
|
||||
#### `azure.storage` dependency not found
|
||||
If you get an error that `azure.storage` cannot be found or loaded, then
|
||||
most likely there was a conflict with this package with other `azure` packages
|
||||
that share the same base namespace. You can correct this by issuing:
|
||||
If you get an error such as `ImportError: No module named storage` or that
|
||||
`azure.storage` cannot be found or loaded, then most likely there was a
|
||||
conflict with this package with other `azure` packages that share the same
|
||||
base namespace. You can correct this by issuing:
|
||||
```shell
|
||||
# for Python3
|
||||
pip3 install --upgrade --force-reinstall azure-storage
|
||||
|
|
|
@ -12,9 +12,9 @@ command will be detailed along with all options available.
|
|||
### `download`
|
||||
Downloads a remote Azure path, which may contain many resources, to the
|
||||
local machine. This command requires at the minimum, the following options:
|
||||
* `--storage-account-name`
|
||||
* `--storage-account`
|
||||
* `--remote-path`
|
||||
* `--local-resource`
|
||||
* `--local-path`
|
||||
|
||||
Additionally, an authentication option for the storage account is required.
|
||||
Please see the Authentication sub-section below under Options.
|
||||
|
@ -23,14 +23,14 @@ Please see the Authentication sub-section below under Options.
|
|||
Uploads a local path to a remote Azure path. The local path may contain
|
||||
many resources on the local machine. This command requires at the minimum,
|
||||
the following options:
|
||||
* `--local-resource`
|
||||
* `--storage-account-name`
|
||||
* `--local-path`
|
||||
* `--storage-account`
|
||||
* `--remote-path`
|
||||
|
||||
Additionally, an authentication option for the storage account is required.
|
||||
Please see the Authentication sub-section below under Options.
|
||||
|
||||
If piping from `stdin`, `--local-resource` should be set to `-` as per
|
||||
If piping from `stdin`, `--local-path` should be set to `-` as per
|
||||
convention.
|
||||
|
||||
### `synccopy`
|
||||
|
@ -49,9 +49,10 @@ of up to 100MiB, all others have a maximum of 4MiB.
|
|||
attributes (mode and ownership) should be stored or restored. Note that to
|
||||
restore uid/gid, `blobxfer` must be run as root or under sudo.
|
||||
* `--file-md5` or `--no-file-md5` controls if the file MD5 should be computed.
|
||||
* `--local-resource` is the local resource path. Set to `-` if piping from
|
||||
* `--local-path` is the local resource path. Set to `-` if piping from
|
||||
`stdin`.
|
||||
* `--log-file` specifies the log file to write to.
|
||||
* `--log-file` specifies the log file to write to. This must be specified
|
||||
for a progress bar to be output to console.
|
||||
* `--mode` is the operating mode. The default is `auto` but may be set to
|
||||
`append`, `block`, `file`, or `page`. If specified with the `upload`
|
||||
command, then all files will be uploaded as the specified `mode` type.
|
||||
|
@ -61,12 +62,16 @@ with Azure File shares.
|
|||
* `--overwrite` or `--no-overwrite` controls clobber semantics at the
|
||||
destination.
|
||||
* `--progress-bar` or `--no-progress-bar` controls if a progress bar is
|
||||
output to the console.
|
||||
output to the console. `--log-file` must be specified for a progress bar
|
||||
to be output.
|
||||
* `--recursive` or `--no-recursive` controls if the source path should be
|
||||
recursively uploaded or downloaded.
|
||||
* `--remote-path` is the remote Azure path. This path must contain the
|
||||
Blob container or File share at the begining, e.g., `mycontainer/vdir`
|
||||
* `--resume-file` specifies the resume file to write to.
|
||||
* `--storage-account` specifies the storage account to use. This can be
|
||||
optionally provided through an environment variable `BLOBXFER_STORAGE_ACCOUNT`
|
||||
instead.
|
||||
* `--timeout` is the integral timeout value in seconds to use.
|
||||
* `-h` or `--help` can be passed at every command level to receive context
|
||||
sensitive help.
|
||||
|
@ -96,7 +101,7 @@ to/from Azure Storage.
|
|||
### Connection
|
||||
* `--endpoint` is the Azure Storage endpoint to connect to; the default is
|
||||
Azure Public regions, or `core.windows.net`.
|
||||
* `--storage-account-name` is the storage account to connect to.
|
||||
* `--storage-account` is the storage account to connect to.
|
||||
|
||||
### Encryption
|
||||
* `--rsa-private-key` is the RSA private key in PEM format to use. This can
|
||||
|
@ -161,27 +166,27 @@ file path. The default is `1`.
|
|||
### `download` Examples
|
||||
#### Download an Entire Encrypted Blob Container to Current Working Directory
|
||||
```shell
|
||||
blobxfer download --storage-account-name mystorageaccount --sas "mysastoken" --remote-path mycontainer --local-resource . --rsa-public-key ~/mypubkey.pem
|
||||
blobxfer download --storage-account mystorageaccount --sas "mysastoken" --remote-path mycontainer --local-path . --rsa-public-key ~/mypubkey.pem
|
||||
```
|
||||
|
||||
#### Download an Entire File Share to Designated Path and Skip On Filesize Matches
|
||||
```shell
|
||||
blobxfer download --mode file --storage-account-name mystorageaccount --storage-account-key "myaccesskey" --remote-path myfileshare --local-resource /my/path --skip-on-filesize-match
|
||||
blobxfer download --mode file --storage-account mystorageaccount --storage-account-key "myaccesskey" --remote-path myfileshare --local-path /my/path --skip-on-filesize-match
|
||||
```
|
||||
|
||||
#### Download only Page Blobs in Blob Container Virtual Directory Non-recursively and Cleanup Local Path to Match Remote Path
|
||||
```shell
|
||||
blobxfer download --mode page --storage-account-name mystorageaccount --storage-account-key "myaccesskey" --remote-path mycontainer --local-resource /my/pageblobs --no-recursive --delete
|
||||
blobxfer download --mode page --storage-account mystorageaccount --storage-account-key "myaccesskey" --remote-path mycontainer --local-path /my/pageblobs --no-recursive --delete
|
||||
```
|
||||
|
||||
#### Resume Incomplete Downloads Matching an Include Pattern and Log to File and Restore POSIX File Attributes
|
||||
```shell
|
||||
blobxfer download --storage-account-name mystorageaccount --storage-account-key "myaccesskey" --remote-path mycontainer --local-resource . --include '*.bin' --resume-file myresumefile.db --log-file blobxfer.log --file-attributes
|
||||
blobxfer download --storage-account mystorageaccount --storage-account-key "myaccesskey" --remote-path mycontainer --local-path . --include '*.bin' --resume-file myresumefile.db --log-file blobxfer.log --file-attributes
|
||||
```
|
||||
|
||||
#### Download a Blob Snapshot
|
||||
```shell
|
||||
blobxfer download --storage-account-name mystorageaccount --sas "mysastoken" --remote-path "mycontainer/file.bin?snapshot=2017-04-20T02:12:49.0311708Z" --local-resource .
|
||||
blobxfer download --storage-account mystorageaccount --sas "mysastoken" --remote-path "mycontainer/file.bin?snapshot=2017-04-20T02:12:49.0311708Z" --local-path .
|
||||
```
|
||||
|
||||
#### Download using a YAML Configuration File
|
||||
|
@ -192,27 +197,27 @@ blobxfer download --config myconfig.yaml
|
|||
### `upload` Examples
|
||||
#### Upload Current Working Directory as Encrypted Block Blobs Non-recursively
|
||||
```shell
|
||||
blobxfer upload --storage-account-name mystorageaccount --sas "mysastoken" --remote-path mycontainer --local-resource . --rsa-private-key ~/myprivatekey.pem --no-recursive
|
||||
blobxfer upload --storage-account mystorageaccount --sas "mysastoken" --remote-path mycontainer --local-path . --rsa-private-key ~/myprivatekey.pem --no-recursive
|
||||
```
|
||||
|
||||
#### Upload Specific Path Recursively to a File Share, Store File MD5 and POSIX File Attributes to a File Share and Exclude Some Files
|
||||
```shell
|
||||
blobxfer upload --mode file --storage-account-name mystorageaccount --sas "mysastoken" --remote-path myfileshare --local-resource . --file-md5 --file-attributes --exclude '*.bak'
|
||||
blobxfer upload --mode file --storage-account mystorageaccount --sas "mysastoken" --remote-path myfileshare --local-path . --file-md5 --file-attributes --exclude '*.bak'
|
||||
```
|
||||
|
||||
#### Upload Single File with Resume and Striped Vectored IO into 512MiB Chunks
|
||||
```shell
|
||||
blobxfer upload --storage-account-name mystorageaccount --sas "mysastoken" --remote-path mycontainer --local-resource /some/huge/file --resume-file hugefileresume.db --distribution-mode stripe --stripe-chunk-size-bytes 536870912
|
||||
blobxfer upload --storage-account mystorageaccount --sas "mysastoken" --remote-path mycontainer --local-path /some/huge/file --resume-file hugefileresume.db --distribution-mode stripe --stripe-chunk-size-bytes 536870912
|
||||
```
|
||||
|
||||
#### Upload Specific Path but Skip On Any MD5 Matches, Store File MD5 and Cleanup Remote Path to Match Local Path
|
||||
```shell
|
||||
blobxfer upload --storage-account-name mystorageaccount --sas "mysastoken" --remote-path mycontainer --local-resource /my/path --file-md5 --skip-on-md5-match --delete
|
||||
blobxfer upload --storage-account mystorageaccount --sas "mysastoken" --remote-path mycontainer --local-path /my/path --file-md5 --skip-on-md5-match --delete
|
||||
```
|
||||
|
||||
#### Upload From Piped `stdin`
|
||||
```shell
|
||||
curl -fSsL https://some.uri | blobxfer upload --storage-account-name mystorageaccount --sas "mysastoken" --remote-path mycontainer --local-resource -
|
||||
curl -fSsL https://some.uri | blobxfer upload --storage-account mystorageaccount --sas "mysastoken" --remote-path mycontainer --local-path -
|
||||
```
|
||||
|
||||
#### Upload using a YAML Configuration File
|
||||
|
|
|
@ -91,5 +91,5 @@ keep this metadata in-tact or reconstruction will fail.
|
|||
+---------------------+
|
||||
```
|
||||
|
||||
In order to take advantage of `stripe` Vectored IO, you must use a YAML
|
||||
configuration file to define multiple destinations.
|
||||
In order to take advantage of `stripe` Vectored IO across multiple
|
||||
destinations, you must use a YAML configuration file.
|
||||
|
|
|
@ -3,8 +3,8 @@ Please read the following carefully regarding considerations that should
|
|||
be applied with regard to performance and `blobxfer`. Additionally,
|
||||
please review the
|
||||
[Azure Storage Scalability and Performance Targets](https://azure.microsoft.com/en-us/documentation/articles/storage-scalability-targets/)
|
||||
for an overview of general performance targets that apply to Azure Blobs
|
||||
and File shares.
|
||||
for an overview of general performance targets that apply to Azure Blobs,
|
||||
File shares and Storage Account types (GRS, LRS, ZRS, etc).
|
||||
|
||||
## Concurrency
|
||||
* `blobxfer` offers four concurrency knobs. Each one should be tuned for
|
||||
|
@ -23,6 +23,44 @@ maximum performance according to your system and network characteristics.
|
|||
* The thread concurrency options (disk and transfer) can be set to a
|
||||
non-positive number to be automatically set as a multiple of the number of
|
||||
cores available on the machine.
|
||||
* For uploads, there should be a sufficient number of disk threads to ensure
|
||||
that all transfer threads have work to do. For downloads, there should be
|
||||
sufficient number of disk threads to write data to disk so transfer threads
|
||||
are not artificially blocked.
|
||||
|
||||
## Chunk Sizing
|
||||
Chunk sizing refers to the `chunk_size_bytes` option and the meaning of which
|
||||
varies upon the context of uploading or downloading.
|
||||
|
||||
### Uploads
|
||||
For uploads, chunk sizes correspond to the maximum amount of data to transfer
|
||||
with a single request. The Azure Storage service imposes maximums depending
|
||||
upon the type of entity that is being written. For block blobs, the maximum
|
||||
is 100MiB (although you may "one-shot" up to 256MiB). For page blobs, the
|
||||
maximum is 4MiB. For append blobs, the maximum is 4MiB. For Azure Files,
|
||||
the maximum is 4MiB.
|
||||
|
||||
For block blobs, setting the chunk size to something greater than 4MiB will
|
||||
not only allow you larger file sizes (recall that the maximum number of
|
||||
blocks for a block blob is 50000, thus at 100MiB blocks, you can create a
|
||||
5TiB block blob object) but will allow you to amortize larger portions of
|
||||
data transfer over each request/response overhead. `blobxfer` can
|
||||
automatically select the proper block size given your file, but will not
|
||||
automatically tune the chunk size as that depends upon your system and
|
||||
network characteristics.
|
||||
|
||||
### Downloads
|
||||
For downloads, chunk sizes correspond to the maximum amount of data to
|
||||
request from the server for each request. It is important to keep a balance
|
||||
between the chunk size and the number of in-flight operations afforded by
|
||||
the `transfer_threads` concurrency control. `blobxfer` does not automatically
|
||||
tune this (but can automatically set it to a value that should work for
|
||||
most situations) due to varying system and network conditions.
|
||||
|
||||
Additionally, disk write performance is typically lower than disk read
|
||||
performance so you need to ensure that the number of `disk_threads` is not
|
||||
set to a very large number to prevent thrashing and highly random write
|
||||
patterns.
|
||||
|
||||
## Azure File Share Performance
|
||||
File share performance can be "slow" or become a bottleneck, especially for
|
||||
|
|
Загрузка…
Ссылка в новой задаче