diff --git a/.coveragerc b/.coveragerc index b710cba..5fc34c3 100644 --- a/.coveragerc +++ b/.coveragerc @@ -6,6 +6,7 @@ omit = exclude_lines = # Have to re-enable the standard pragma pragma: no cover + noqa # Don't complain about missing debug-only code: def __repr__ diff --git a/.gitignore b/.gitignore index ddc86bb..21d27b6 100644 --- a/.gitignore +++ b/.gitignore @@ -43,6 +43,7 @@ htmlcov/ nosetests.xml coverage.xml *,cover +junit-*.xml # Translations *.mo diff --git a/.travis.yml b/.travis.yml index cdf9217..5bc451d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,6 +5,7 @@ python: - 3.3 - 3.4 - 3.5 + - 3.6 - pypy # disable pypy3 until 3.3 compliance #- pypy3 diff --git a/README.md b/README.md new file mode 100644 index 0000000..6853cb4 --- /dev/null +++ b/README.md @@ -0,0 +1,18 @@ +blobxfer +======== + +AzCopy-like OS independent Azure storage blob and file share transfer tool + +Change Log +---------- + +See the [CHANGELOG.md](https://github.com/Azure/blobxfer/blob/master/CHANGELOG.md) file. + +------------------------------------------------------------------------ + +This project has adopted the +[Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). +For more information see the +[Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) +or contact [](mailto:opencode@microsoft.com) with any +additional questions or comments. diff --git a/README.rst b/README.rst deleted file mode 100644 index 882d883..0000000 --- a/README.rst +++ /dev/null @@ -1,426 +0,0 @@ -.. image:: https://travis-ci.org/Azure/blobxfer.svg?branch=master - :target: https://travis-ci.org/Azure/blobxfer -.. image:: https://coveralls.io/repos/github/Azure/blobxfer/badge.svg?branch=master - :target: https://coveralls.io/github/Azure/blobxfer?branch=master -.. image:: https://img.shields.io/pypi/v/blobxfer.svg - :target: https://pypi.python.org/pypi/blobxfer -.. image:: https://img.shields.io/pypi/pyversions/blobxfer.svg - :target: https://pypi.python.org/pypi/blobxfer -.. image:: https://img.shields.io/pypi/l/blobxfer.svg - :target: https://pypi.python.org/pypi/blobxfer -.. image:: https://img.shields.io/docker/pulls/alfpark/blobxfer.svg - :target: https://hub.docker.com/r/alfpark/blobxfer -.. image:: https://images.microbadger.com/badges/image/alfpark/blobxfer.svg - :target: https://microbadger.com/images/alfpark/blobxfer - -blobxfer -======== -AzCopy-like OS independent Azure storage blob and file share transfer tool - -Installation ------------- -`blobxfer`_ is on PyPI and can be installed via: - -:: - - pip install blobxfer - -blobxfer is compatible with Python 2.7 and 3.3+. To install for Python 3, some -distributions may use ``pip3`` instead. If you do not want to install blobxfer -as a system-wide binary and modify system-wide python packages, use the -``--user`` flag with ``pip`` or ``pip3``. - -blobxfer is also on `Docker Hub`_, and the Docker image for Linux can be -pulled with the following command: - -:: - - docker pull alfpark/blobxfer - -Please see example usage below on how to use the docker image. - -If you encounter difficulties installing the script, it may be due to the -``cryptography`` dependency. Please ensure that your system is able to install -binary wheels provided by these dependencies (e.g., on Windows) or is able to -compile the dependencies (i.e., ensure you have a C compiler, python, ssl, -and ffi development libraries/headers installed prior to invoking pip). For -instance, to install blobxfer on a fresh Ubuntu 14.04/16.04 installation for -Python 2.7, issue the following commands: - -:: - - apt-get update - apt-get install -y build-essential libssl-dev libffi-dev libpython-dev python-dev python-pip - pip install --upgrade blobxfer - -If you need more fine-grained control on installing dependencies, continue -reading this section. Depending upon the desired mode of authentication with -Azure and options, the script will require the following packages, some of -which will automatically pull required dependent packages. Below is a list of -dependent packages: - -- Base Requirements - - - `azure-common`_ - - `azure-storage`_ - - `requests`_ - -- Encryption Support - - - `cryptography`_ - -- Service Management Certificate Support - - - `azure-servicemanagement-legacy`_ - -You can install these packages using pip, easy_install or through standard -setup.py procedures. These dependencies will be automatically installed if -using a package-based install or setup.py. The required versions of these -dependent packages can be found in ``setup.py``. - -.. _blobxfer: https://pypi.python.org/pypi/blobxfer -.. _Docker Hub: https://hub.docker.com/r/alfpark/blobxfer -.. _azure-common: https://pypi.python.org/pypi/azure-common -.. _azure-storage: https://pypi.python.org/pypi/azure-storage -.. _requests: https://pypi.python.org/pypi/requests -.. _cryptography: https://pypi.python.org/pypi/cryptography -.. _azure-servicemanagement-legacy: https://pypi.python.org/pypi/azure-servicemanagement-legacy - -Introduction ------------- - -The blobxfer.py script allows interacting with storage accounts using any of -the following methods: (1) management certificate, (2) shared account key, -(3) SAS key. The script can, in addition to working with single files, mirror -entire directories into and out of containers or file shares from Azure -Storage, respectively. File and block/page level MD5 integrity checking is -supported along with various transfer optimizations, built-in retries, -user-specified timeouts, and client-side encryption. - -Program parameters and command-line options can be listed via the ``-h`` -switch. Please invoke this first if you are unfamiliar with blobxfer operation -as not all options are explained below. At the minimum, three positional -arguments are required: storage account name, container or share name, and -local resource. Additionally, one of the following authentication switches -must be supplied: ``--subscriptionid`` with ``--managementcert``, -``--storageaccountkey``, or ``--saskey``. Do not combine different -authentication schemes together. - -Environment variables ``BLOBXFER_STORAGEACCOUNTKEY``, ``BLOBXFER_SASKEY``, -and ``BLOBXFER_RSAKEYPASSPHRASE`` can take the place of -``--storageaccountkey``, ``--saskey``, and ``--rsakeypassphrase`` respectively -if you do not want to expose credentials on a command line. - -It is generally recommended to use SAS keys wherever appropriate; only HTTPS -transport is used in the script. Please note that when using SAS keys that -only container- or fileshare-level SAS keys will allow for entire directory -uploading or container/fileshare downloading. The container/fileshare must -also have been created beforehand if using a service SAS, as -containers/fileshares cannot be created using service SAS keys. Account-level -SAS keys with a signed resource type of ``c`` or container will allow -containers/fileshares to be created with SAS keys. - -Example Usage -------------- - -The following examples show how to invoke the script with commonly used -options. Note that the authentication parameters are missing from the below -examples. You will need to select a preferred method of authenticating with -Azure and add the authentication switches (or as environment variables) as -noted above. - -The script will attempt to perform a smart transfer, by detecting if the local -resource exists. For example: - -:: - - blobxfer mystorageacct container0 mylocalfile.txt - -Note: if you downloaded the script directly from github, then you should append -``.py`` to the blobxfer command. - -If mylocalfile.txt exists locally, then the script will attempt to upload the -file to container0 on mystorageacct. If the file does not exist, then it will -attempt to download the resource. If the desired behavior is to download the -file from Azure even if the local file exists, one can override the detection -mechanism with ``--download``. ``--upload`` is available to force the transfer -to Azure storage. Note that specifying a particular direction does not force -the actual operation to occur as that depends on other options specified such -as skipping on MD5 matches. Note that you may use the ``--remoteresource`` flag -to rename the local file as the blob name on Azure storage if uploading, -however, ``--remoteresource`` has no effect if uploading a directory of files. -Please refer to the ``--collate`` option as explained below. - -If the local resource is a directory that exists, the script will attempt to -mirror (recursively copy) the entire directory to Azure storage while -maintaining subdirectories as virtual directories in Azure storage. You can -disable the recursive copy (i.e., upload only the files in the directory) -using the ``--no-recursive`` flag. - -To upload a directory with files only matching a Unix-style shell wildcard -pattern, an example commandline would be: - -:: - - blobxfer mystorageacct container0 mylocaldir --upload --include '**/*.txt' - -This would attempt to recursively upload the contents of mylocaldir -to container0 for any file matching the wildcard pattern ``*.txt`` within -all subdirectories. Include patterns can be applied for uploads as well as -downloads. Note that you will need to prevent globbing by your shell such -that wildcard expansion does not take place before script interprets the -argument. If ``--include`` is not specified, all files will be uploaded -or downloaded for the specific context. - -To download an entire container from your storage account, an example -commandline would be: - -:: - - blobxfer mystorageacct container0 mylocaldir --remoteresource . - -Assuming mylocaldir directory does not exist, the script will attempt to -download all of the contents in container0 because “.” is set with -``--remoteresource`` flag. To download individual blobs, one would specify the -blob name instead of “.” with the ``--remoteresource`` flag. If mylocaldir -directory exists, the script will attempt to upload the directory instead of -downloading it. If you want to force the download direction even if the -directory exists, indicate that with the ``--download`` flag. When downloading -an entire container, the script will attempt to pre-allocate file space and -recreate the sub-directory structure as needed. - -To collate files into specified virtual directories or local paths, use -the ``--collate`` flag with the appropriate parameter. For example, the -following commandline: - -:: - - blobxfer mystorageacct container0 myvhds --upload --collate vhds --autovhd - -If the directory ``myvhds`` had two vhd files a.vhd and subdir/b.vhd, these -files would be uploaded into ``container0`` under the virtual directory named -``vhds``, and b.vhd would not contain the virtual directory subdir; thus, -flattening the directory structure. The ``--autovhd`` flag would automatically -enable page blob uploads for these files. If you wish to collate all files -into the container directly, you would replace ``--collate vhds`` with -``--collate .`` - -To strip leading components of a path on upload, use ``--strip-components`` -with a number argument which will act similarly to tar's -``--strip-components=NUMBER`` parameter. This parameter is only applied -during an upload. - -To encrypt or decrypt files, the option ``--rsapublickey`` and -``--rsaprivatekey`` is available. This option requires a file location for a -PEM encoded RSA public or private key. An optional parameter, -``--rsakeypassphrase`` is available for passphrase protected RSA private keys. - -To encrypt and upload, only the RSA public key is required although an RSA -private key may be specified. To download and decrypt blobs which are -encrypted, the RSA private key is required. - -:: - - blobxfer mystorageacct container0 myblobs --upload --rsapublickey mypublickey.pem - -The above example commandline would encrypt and upload files contained in -``myblobs`` using an RSA public key named ``mypublickey.pem``. An RSA private -key may be specified instead for uploading (public parts will be used). - -:: - - blobxfer mystorageacct container0 myblobs --remoteresource . --download --rsaprivatekey myprivatekey.pem - -The above example commandline would download and decrypt all blobs in the -container ``container0`` using an RSA private key named ``myprivatekey.pem``. -An RSA private key must be specified for downloading and decryption of -encrypted blobs. - -Currently only the ``FullBlob`` encryption mode is supported for the -parameter ``--encmode``. The ``FullBlob`` encryption mode either uploads or -downloads Azure Storage .NET/Java compatible client-side encrypted block blobs. - -Please read important points in the Encryption Notes below for more -information. - -To transfer to an Azure Files share, specify the ``--fileshare`` option and -specify the share name as the second positional argument. - -:: - - blobxfer mystorageacct myshare localfiles --fileshare --upload - -The above example would upload all files in the ``localfiles`` directory to -the share named ``myshare``. Encryption/decryption options are compatible with -Azure Files as the destination or source. Please refer to this `MSDN article`_ -for features not supported by the Azure File Service. - -.. _MSDN article: https://msdn.microsoft.com/en-us/library/azure/dn744326.aspx - -Docker Usage ------------- - -An example execution for uploading the host path ``/example/host/path`` -to a storage container named ``container0`` would be: - -:: - - docker run --rm -t -v /example/host/path:/path/in/container alfpark/blobxfer mystorageacct container0 /path/in/container --upload - -Note that docker volume mount mappings must be crafted with care to ensure -consistency with directory depth between the host and the container. -Optionally, you can utilize the ``--strip-components`` flag to remove leading -path components as desired. - -General Notes -------------- - -- If the pyOpenSSL package is present, urllib3/requests may use this package - (as discussed in the Performance Notes below), which may result in - exceptions being thrown that are not normalized by urllib3. This may - result in exceptions that should be retried, but are not. It is recommended - to upgrade your Python where pyOpenSSL is not required for fully validating - peers and such that blobxfer can operate without pyOpenSSL in a secure - fashion. You can also run blobxfer via Docker or in a virtualenv - environment without pyOpenSSL. -- blobxfer does not take any leases on blobs or containers. It is up to - the user to ensure that blobs are not modified while download/uploads - are being performed. -- No validation is performed regarding container and file naming and length - restrictions. -- blobxfer will attempt to download from blob storage as-is. If the source - filename is incompatible with the destination operating system, then - failure may result. -- When using SAS, the SAS key must be a container- or share-level SAS if - performing recursive directory upload or container/file share download. -- If uploading via service-level SAS keys, the container or file share must - already be created in Azure storage prior to upload. Account-level SAS keys - with the signed resource type of ``c`` or container-level permission will - allow conatiner or file share creation. -- For non-SAS requests, timeouts may not be properly honored due to - limitations of the Azure Python SDK. -- By default, files with matching MD5 checksums will be skipped for both - download (if MD5 information is present on the blob) and upload. Specify - ``--no-skiponmatch`` to disable this functionality. -- When uploading files as page blobs, the content is page boundary - byte-aligned. The MD5 for the blob is computed using the final aligned - data if the source is not page boundary byte-aligned. This enables these - page blobs or files to be skipped during subsequent download or upload by - default (i.e., ``--no-skiponmatch`` parameter is not specified). -- If ``--delete`` is specified, any remote files found that have no - corresponding local file in directory upload mode will be deleted. Deletion - occurs prior to any transfers, analogous to the delete-before rsync option. - Please note that this parameter will interact with ``--include`` and any - file not included from the include pattern will be deleted. -- ``--include`` has no effect when specifying a single file to upload or - blob to download. When specifying ``--include`` on container download, - the pattern will be applied to the blob name without the container name. - Globbing of wildcards must be disabled such that the script can read - the include pattern without the shell expanding the wildcards, if specified. -- Empty directories are not created locally when downloading from an Azure - file share which has empty directories. -- Empty directories are not deleted if ``--delete`` is specified and no - files remain in the directory on the Azure file share. - -Performance Notes ------------------ - -- Most likely, you will need to tweak the ``--numworkers`` argument that best - suits your environment. The default is the number of CPUs on the running - machine multiplied by 3 (except when transferring to/from file shares). - Increasing this number (or even using the default) may not provide the - optimal balance between concurrency and your network conditions. - Additionally, this number may not work properly if you are attempting to - run multiple blobxfer sessions in parallel from one machine or IP address. - Futhermore, this number may be defaulted to be set too high if encryption - is enabled and the machine cannot handle processing multiple threads in - parallel. -- Computing file MD5 can be time consuming for large files. If integrity - checking or rsync-like capability is not required, specify - ``--no-computefilemd5`` to disable MD5 computation for files. -- File share performance can be "slow" or become a bottleneck, especially for - file shares containing thousands of files as multiple REST calls must be - performed for each file. Currently, a single file share has a limit of up - to 60 MB/s and 1000 8KB IOPS. Please refer to the - `Azure Storage Scalability and Performance Targets`_ for performance targets - and limits regarding Azure Storage Blobs and Files. If scalable high - performance is required, consider using blob storage or multiple file - shares. -- Using SAS keys may provide the best performance as the script bypasses - the Azure Storage Python SDK and uses requests/urllib3 directly with - Azure Storage endpoints. Transfers to/from Azure Files will always use - the Azure Storage Python SDK even with SAS keys. -- As of requests 2.6.0 and Python versions < 2.7.9 (i.e., interpreter found - on default Ubuntu 14.04 installations), if certain packages are installed, - as those found in ``requests[security]`` then the underlying ``urllib3`` - package will utilize the ``ndg-httpsclient`` package which will use - `pyOpenSSL`_. This will ensure the peers are `fully validated`_. However, - this incurs a rather larger performance penalty. If you understand the - potential security risks for disabling this behavior due to high performance - requirements, you can either remove ``ndg-httpsclient`` or use the script - in a ``virtualenv`` environment without the ``ndg-httpsclient`` package. - Python versions >= 2.7.9 are not affected by this issue. These warnings can - be suppressed using ``--disable-urllib-warnings``, but is not recommended - unless you understand the security implications. - -.. _Azure Storage Scalability and Performance Targets: https://azure.microsoft.com/en-us/documentation/articles/storage-scalability-targets/ -.. _pyOpenSSL: https://urllib3.readthedocs.org/en/latest/security.html#pyopenssl -.. _fully validated: https://urllib3.readthedocs.org/en/latest/security.html#insecureplatformwarning - - -Encryption Notes ----------------- - -- All required information regarding the encryption process is stored on - each blob's ``encryptiondata`` and ``encryptiondata_authentication`` - metadata. These metadata entries are used on download to configure the proper - download and parameters for the decryption process as well as to authenticate - the encryption. Encryption metadata set by blobxfer (or the Azure Storage - .NET/Java client library) should not be modified or blobs/files may be - unrecoverable. -- Local files can be encrypted by blobxfer and stored in Azure Files and, - correspondingly, remote files on Azure File shares can be decrypted by - blobxfer as long as the metdata portions remain in-tact. -- Keys for AES256 block cipher are generated on a per-blob/file basis. These - keys are encrypted using RSAES-OAEP. -- MD5 for both the pre-encrypted and encrypted version of the file is stored - in blob/file metadata. Rsync-like synchronization is still supported - transparently with encrypted blobs/files. -- Whole file MD5 checks are skipped if a message authentication code is found - to validate the integrity of the encrypted data. -- Attempting to upload the same file as an encrypted blob with a different RSA - key or under a different encryption mode will not occur if the file content - MD5 is the same. This behavior can be overridden by including the option - ``--no-skiponmatch``. -- If one wishes to apply encryption to a blob/file already uploaded to Azure - Storage that has not changed, the upload will not occur since the underlying - file content MD5 has not changed; this behavior can be overriden by - including the option ``--no-skiponmatch``. -- Encryption is only applied to block blobs (or fileshare files). Encrypted - page blobs appear to be of minimal value stored in Azure Storage via - blobxfer. Thus, if uploading VHDs while enabling encryption in the script, - do not enable the option ``--pageblob``. ``--autovhd`` will continue to work - transparently where vhd files will be uploaded as page blobs in unencrypted - form while other files will be uploaded as encrypted block blobs. Note that - using ``--autovhd`` with encryption will force set the max chunk size to - 4 MiB for non-encrypted vhd files. -- Downloading encrypted blobs/files may not fully preallocate each file due to - padding. Script failure can result during transfer if there is insufficient - disk space. -- Zero-byte (empty) files are not encrypted. - -Change Log ----------- - -See the `CHANGELOG.md`_ file. - -.. _CHANGELOG.md: https://github.com/Azure/blobxfer/blob/master/CHANGELOG.md - ----- - -This project has adopted the -`Microsoft Open Source Code of Conduct `__. -For more information see the -`Code of Conduct FAQ `__ -or contact `opencode@microsoft.com `__ with any -additional questions or comments. diff --git a/blobxfer.py b/blobxfer.py deleted file mode 100755 index 5cadcba..0000000 --- a/blobxfer.py +++ /dev/null @@ -1,3033 +0,0 @@ -#!/usr/bin/env python - -# blobxfer Tool -# -# Copyright (c) Microsoft Corporation -# -# All rights reserved. -# -# MIT License -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -""" -Data transfer tool for Azure blob and file storage - -See notes in the README.rst file. - -TODO list: -- convert from threading to multiprocessing -- move instruction queue data to class -- migrate connections with sas to azure-storage -""" - -# pylint: disable=R0913,R0914 - -# stdlib imports -from __future__ import print_function -import argparse -import base64 -import errno -import fnmatch -import hashlib -import hmac -import json -import mimetypes -import multiprocessing -import os -import platform -# pylint: disable=F0401 -try: - import queue -except ImportError: # pragma: no cover - import Queue as queue -# pylint: enable=F0401 -import socket -import sys -import threading -import time -import traceback -try: - from urllib.parse import quote as urlquote -except ImportError: # pramga: no cover - from urllib import quote as urlquote -import xml.etree.ElementTree as ET -# non-stdlib imports -import azure.common -try: - import azure.servicemanagement -except ImportError: # pragma: no cover - pass -import azure.storage.blob -import azure.storage.file -try: - import cryptography.hazmat.backends - import cryptography.hazmat.primitives.asymmetric.padding - import cryptography.hazmat.primitives.asymmetric.rsa - import cryptography.hazmat.primitives.ciphers - import cryptography.hazmat.primitives.ciphers.algorithms - import cryptography.hazmat.primitives.ciphers.modes - import cryptography.hazmat.primitives.constant_time - import cryptography.hazmat.primitives.hashes - import cryptography.hazmat.primitives.padding - import cryptography.hazmat.primitives.serialization -except ImportError: # pragma: no cover - pass -import requests - -# remap keywords for Python3 -# pylint: disable=W0622,C0103 -try: - xrange -except NameError: # pragma: no cover - xrange = range -try: - long -except NameError: # pragma: no cover - long = int -# pylint: enable=W0622,C0103 - -# global defines -_SCRIPT_VERSION = '0.12.1' -_PY2 = sys.version_info.major == 2 -_DEFAULT_MAX_STORAGEACCOUNT_WORKERS = multiprocessing.cpu_count() * 3 -_MAX_BLOB_CHUNK_SIZE_BYTES = 4194304 -_EMPTY_MAX_PAGE_SIZE_MD5 = 'tc+p1sj+vWGPkawoQ9UKHA==' -_MAX_LISTBLOBS_RESULTS = 1000 -_PAGEBLOB_BOUNDARY = 512 -_DEFAULT_STORAGE_ENDPOINT = 'core.windows.net' -_DEFAULT_MANAGEMENT_ENDPOINT = 'management.core.windows.net' -_ENVVAR_STORAGEACCOUNTKEY = 'BLOBXFER_STORAGEACCOUNTKEY' -_ENVVAR_SASKEY = 'BLOBXFER_SASKEY' -_ENVVAR_RSAKEYPASSPHRASE = 'BLOBXFER_RSAKEYPASSPHRASE' -# encryption defines -_AES256_KEYLENGTH_BYTES = 32 -_AES256_BLOCKSIZE_BYTES = 16 -_HMACSHA256_DIGESTSIZE_BYTES = 32 -_AES256CBC_HMACSHA256_OVERHEAD_BYTES = _AES256_BLOCKSIZE_BYTES + \ - _HMACSHA256_DIGESTSIZE_BYTES -_ENCRYPTION_MODE_FULLBLOB = 'FullBlob' -_ENCRYPTION_MODE_CHUNKEDBLOB = 'ChunkedBlob' -_DEFAULT_ENCRYPTION_MODE = _ENCRYPTION_MODE_FULLBLOB -_ENCRYPTION_PROTOCOL_VERSION = '1.0' -_ENCRYPTION_ALGORITHM = 'AES_CBC_256' -_ENCRYPTION_AUTH_ALGORITHM = 'HMAC-SHA256' -_ENCRYPTION_CHUNKSTRUCTURE = 'IV || EncryptedData || Signature' -_ENCRYPTION_ENCRYPTED_KEY_SCHEME = 'RSA-OAEP' -_ENCRYPTION_METADATA_NAME = 'encryptiondata' -_ENCRYPTION_METADATA_MODE = 'EncryptionMode' -_ENCRYPTION_METADATA_ALGORITHM = 'Algorithm' -_ENCRYPTION_METADATA_MAC = 'MessageAuthenticationCode' -_ENCRYPTION_METADATA_LAYOUT = 'EncryptedDataLayout' -_ENCRYPTION_METADATA_CHUNKOFFSETS = 'ChunkByteOffsets' -_ENCRYPTION_METADATA_CHUNKSTRUCTURE = 'ChunkStructure' -_ENCRYPTION_METADATA_AGENT = 'EncryptionAgent' -_ENCRYPTION_METADATA_PROTOCOL = 'Protocol' -_ENCRYPTION_METADATA_ENCRYPTION_ALGORITHM = 'EncryptionAlgorithm' -_ENCRYPTION_METADATA_INTEGRITY_AUTH = 'EncryptionAuthentication' -_ENCRYPTION_METADATA_WRAPPEDCONTENTKEY = 'WrappedContentKey' -_ENCRYPTION_METADATA_ENCRYPTEDKEY = 'EncryptedKey' -_ENCRYPTION_METADATA_ENCRYPTEDAUTHKEY = 'EncryptedAuthenticationKey' -_ENCRYPTION_METADATA_CONTENT_IV = 'ContentEncryptionIV' -_ENCRYPTION_METADATA_KEYID = 'KeyId' -_ENCRYPTION_METADATA_BLOBXFER_EXTENSIONS = 'BlobxferExtensions' -_ENCRYPTION_METADATA_PREENCRYPTED_MD5 = 'PreEncryptedContentMD5' -_ENCRYPTION_METADATA_AUTH_NAME = 'encryptiondata_authentication' -_ENCRYPTION_METADATA_AUTH_METAAUTH = 'EncryptionMetadataAuthentication' -_ENCRYPTION_METADATA_AUTH_ENCODING = 'Encoding' -_ENCRYPTION_METADATA_AUTH_ENCODING_TYPE = 'UTF-8' - - -class EncryptionMetadataJson(object): - """Class for handling encryption metadata json""" - def __init__( - self, args, symkey, signkey, iv, encdata_signature, - preencrypted_md5, rsakeyid=None): - """Ctor for EncryptionMetadataJson - Parameters: - args - program arguments - symkey - symmetric key - signkey - signing key - iv - initialization vector - encdata_signature - encrypted data signature (MAC) - preencrypted_md5 - pre-encrypted md5 hash - rsakeyid - symmetric key id - Returns: - Nothing - Raises: - Nothing - """ - self.encmode = args.encmode - self.rsaprivatekey = args.rsaprivatekey - self.rsapublickey = args.rsapublickey - self.chunksizebytes = args.chunksizebytes - self.symkey = symkey - self.signkey = signkey - if rsakeyid is None: - self.rsakeyid = 'private:key1' - else: - self.rsakeyid = rsakeyid - self.iv = iv - self.hmac = encdata_signature - self.md5 = preencrypted_md5 - - def construct_metadata_json(self): - """Constructs encryptiondata metadata - Paramters: - None - Returns: - dict of encryptiondata and encryptiondata_authentiation json - Raises: - Nothing - """ - encsymkey, _ = rsa_encrypt_key( - self.rsaprivatekey, self.rsapublickey, self.symkey) - encsignkey, _ = rsa_encrypt_key( - self.rsaprivatekey, self.rsapublickey, self.signkey) - encjson = { - _ENCRYPTION_METADATA_MODE: self.encmode, - _ENCRYPTION_METADATA_WRAPPEDCONTENTKEY: { - _ENCRYPTION_METADATA_KEYID: self.rsakeyid, - _ENCRYPTION_METADATA_ENCRYPTEDKEY: encsymkey, - _ENCRYPTION_METADATA_ENCRYPTEDAUTHKEY: encsignkey, - _ENCRYPTION_METADATA_ALGORITHM: - _ENCRYPTION_ENCRYPTED_KEY_SCHEME, - }, - _ENCRYPTION_METADATA_AGENT: { - _ENCRYPTION_METADATA_PROTOCOL: _ENCRYPTION_PROTOCOL_VERSION, - _ENCRYPTION_METADATA_ENCRYPTION_ALGORITHM: - _ENCRYPTION_ALGORITHM - }, - _ENCRYPTION_METADATA_INTEGRITY_AUTH: { - _ENCRYPTION_METADATA_ALGORITHM: - _ENCRYPTION_AUTH_ALGORITHM, - }, - 'KeyWrappingMetadata': {}, - } - if self.md5 is not None: - encjson[_ENCRYPTION_METADATA_BLOBXFER_EXTENSIONS] = { - _ENCRYPTION_METADATA_PREENCRYPTED_MD5: self.md5 - } - if self.encmode == _ENCRYPTION_MODE_FULLBLOB: - encjson[_ENCRYPTION_METADATA_CONTENT_IV] = base64encode(self.iv) - encjson[_ENCRYPTION_METADATA_INTEGRITY_AUTH][ - _ENCRYPTION_METADATA_MAC] = base64encode(self.hmac) - elif self.encmode == _ENCRYPTION_MODE_CHUNKEDBLOB: - encjson[_ENCRYPTION_METADATA_LAYOUT] = {} - encjson[_ENCRYPTION_METADATA_LAYOUT][ - _ENCRYPTION_METADATA_CHUNKOFFSETS] = \ - self.chunksizebytes + _AES256CBC_HMACSHA256_OVERHEAD_BYTES + 1 - encjson[_ENCRYPTION_METADATA_LAYOUT][ - _ENCRYPTION_METADATA_CHUNKSTRUCTURE] = \ - _ENCRYPTION_CHUNKSTRUCTURE - else: - raise RuntimeError( - 'Unknown encryption mode: {}'.format(self.encmode)) - bencjson = json.dumps( - encjson, sort_keys=True, ensure_ascii=False).encode( - _ENCRYPTION_METADATA_AUTH_ENCODING_TYPE) - encjson = {_ENCRYPTION_METADATA_NAME: - json.dumps(encjson, sort_keys=True)} - # compute MAC over encjson - hmacsha256 = hmac.new(self.signkey, digestmod=hashlib.sha256) - hmacsha256.update(bencjson) - authjson = { - _ENCRYPTION_METADATA_AUTH_METAAUTH: { - _ENCRYPTION_METADATA_ALGORITHM: _ENCRYPTION_AUTH_ALGORITHM, - _ENCRYPTION_METADATA_AUTH_ENCODING: - _ENCRYPTION_METADATA_AUTH_ENCODING_TYPE, - _ENCRYPTION_METADATA_MAC: base64encode(hmacsha256.digest()), - } - } - encjson[_ENCRYPTION_METADATA_AUTH_NAME] = json.dumps( - authjson, sort_keys=True) - return encjson - - def parse_metadata_json( - self, blobname, rsaprivatekey, rsapublickey, mddict): - """Parses a meta data dictionary containing the encryptiondata - metadata - Parameters: - blobname - name of blob - rsaprivatekey - RSA private key - rsapublickey - RSA public key - mddict - metadata dictionary - Returns: - Nothing - Raises: - RuntimeError if encryptiondata metadata contains invalid or - unknown fields - """ - if _ENCRYPTION_METADATA_NAME not in mddict: - return - # json parse internal dict - meta = json.loads(mddict[_ENCRYPTION_METADATA_NAME]) - # populate preencryption md5 - if (_ENCRYPTION_METADATA_BLOBXFER_EXTENSIONS in meta and - _ENCRYPTION_METADATA_PREENCRYPTED_MD5 in meta[ - _ENCRYPTION_METADATA_BLOBXFER_EXTENSIONS]): - self.md5 = meta[_ENCRYPTION_METADATA_BLOBXFER_EXTENSIONS][ - _ENCRYPTION_METADATA_PREENCRYPTED_MD5] - else: - self.md5 = None - # if RSA key is not present return - if rsaprivatekey is None and rsapublickey is None: - return - # check for required metadata fields - if (_ENCRYPTION_METADATA_MODE not in meta or - _ENCRYPTION_METADATA_AGENT not in meta): - return - # populate encryption mode - self.encmode = meta[_ENCRYPTION_METADATA_MODE] - # validate known encryption metadata is set to proper values - if self.encmode == _ENCRYPTION_MODE_CHUNKEDBLOB: - chunkstructure = meta[_ENCRYPTION_METADATA_LAYOUT][ - _ENCRYPTION_METADATA_CHUNKSTRUCTURE] - if chunkstructure != _ENCRYPTION_CHUNKSTRUCTURE: - raise RuntimeError( - '{}: unknown encrypted chunk structure {}'.format( - blobname, chunkstructure)) - protocol = meta[_ENCRYPTION_METADATA_AGENT][ - _ENCRYPTION_METADATA_PROTOCOL] - if protocol != _ENCRYPTION_PROTOCOL_VERSION: - raise RuntimeError('{}: unknown encryption protocol: {}'.format( - blobname, protocol)) - blockcipher = meta[_ENCRYPTION_METADATA_AGENT][ - _ENCRYPTION_METADATA_ENCRYPTION_ALGORITHM] - if blockcipher != _ENCRYPTION_ALGORITHM: - raise RuntimeError('{}: unknown block cipher: {}'.format( - blobname, blockcipher)) - if _ENCRYPTION_METADATA_INTEGRITY_AUTH in meta: - intauth = meta[_ENCRYPTION_METADATA_INTEGRITY_AUTH][ - _ENCRYPTION_METADATA_ALGORITHM] - if intauth != _ENCRYPTION_AUTH_ALGORITHM: - raise RuntimeError( - '{}: unknown integrity/auth method: {}'.format( - blobname, intauth)) - symkeyalg = meta[_ENCRYPTION_METADATA_WRAPPEDCONTENTKEY][ - _ENCRYPTION_METADATA_ALGORITHM] - if symkeyalg != _ENCRYPTION_ENCRYPTED_KEY_SCHEME: - raise RuntimeError('{}: unknown key encryption scheme: {}'.format( - blobname, symkeyalg)) - # populate iv and hmac - if self.encmode == _ENCRYPTION_MODE_FULLBLOB: - self.iv = base64.b64decode(meta[_ENCRYPTION_METADATA_CONTENT_IV]) - # don't base64 decode hmac - if _ENCRYPTION_METADATA_INTEGRITY_AUTH in meta: - self.hmac = meta[_ENCRYPTION_METADATA_INTEGRITY_AUTH][ - _ENCRYPTION_METADATA_MAC] - else: - self.hmac = None - # populate chunksize - if self.encmode == _ENCRYPTION_MODE_CHUNKEDBLOB: - self.chunksizebytes = long( - meta[_ENCRYPTION_METADATA_LAYOUT][ - _ENCRYPTION_METADATA_CHUNKOFFSETS]) - # if RSA key is a public key, stop here as keys cannot be decrypted - if rsaprivatekey is None: - return - # decrypt symmetric key - self.symkey = rsa_decrypt_key( - rsaprivatekey, - meta[_ENCRYPTION_METADATA_WRAPPEDCONTENTKEY][ - _ENCRYPTION_METADATA_ENCRYPTEDKEY], None) - # decrypt signing key, if it exists - if _ENCRYPTION_METADATA_ENCRYPTEDAUTHKEY in meta[ - _ENCRYPTION_METADATA_WRAPPEDCONTENTKEY]: - self.signkey = rsa_decrypt_key( - rsaprivatekey, - meta[_ENCRYPTION_METADATA_WRAPPEDCONTENTKEY][ - _ENCRYPTION_METADATA_ENCRYPTEDAUTHKEY], None) - else: - self.signkey = None - # validate encryptiondata metadata using the signing key - if (self.signkey is not None and - _ENCRYPTION_METADATA_AUTH_NAME in mddict): - authmeta = json.loads(mddict[_ENCRYPTION_METADATA_AUTH_NAME]) - if _ENCRYPTION_METADATA_AUTH_METAAUTH not in authmeta: - raise RuntimeError( - '{}: encryption metadata auth block not found'.format( - blobname)) - if _ENCRYPTION_METADATA_AUTH_ENCODING not in authmeta[ - _ENCRYPTION_METADATA_AUTH_METAAUTH]: - raise RuntimeError( - '{}: encryption metadata auth encoding not found'.format( - blobname)) - intauth = authmeta[_ENCRYPTION_METADATA_AUTH_METAAUTH][ - _ENCRYPTION_METADATA_ALGORITHM] - if intauth != _ENCRYPTION_AUTH_ALGORITHM: - raise RuntimeError( - '{}: unknown integrity/auth method: {}'.format( - blobname, intauth)) - authhmac = base64.b64decode( - authmeta[_ENCRYPTION_METADATA_AUTH_METAAUTH][ - _ENCRYPTION_METADATA_MAC]) - bmeta = mddict[_ENCRYPTION_METADATA_NAME].encode( - authmeta[_ENCRYPTION_METADATA_AUTH_METAAUTH][ - _ENCRYPTION_METADATA_AUTH_ENCODING]) - hmacsha256 = hmac.new(self.signkey, digestmod=hashlib.sha256) - hmacsha256.update(bmeta) - if hmacsha256.digest() != authhmac: - raise RuntimeError( - '{}: encryption metadata authentication failed'.format( - blobname)) - - -class PqTupleSort(tuple): - """Priority Queue tuple sorter: handles priority collisions. - 0th item in the tuple is the priority number.""" - def __lt__(self, rhs): - return self[0] < rhs[0] - - def __gt__(self, rhs): - return self[0] > rhs[0] - - def __le__(self, rhs): - return self[0] <= rhs[0] - - def __ge__(self, rhs): - return self[0] >= rhs[0] - - -class SasBlobList(object): - """Sas Blob listing object""" - def __init__(self): - """Ctor for SasBlobList""" - self.blobs = [] - self.next_marker = None - - def __iter__(self): - """Iterator""" - return iter(self.blobs) - - def __len__(self): - """Length""" - return len(self.blobs) - - def __getitem__(self, index): - """Accessor""" - return self.blobs[index] - - def add_blob(self, name, content_length, content_md5, blobtype, mddict): - """Adds a blob to the list - Parameters: - name - blob name - content_length - content length - content_md5 - content md5 - blobtype - blob type - mddict - metadata dictionary - Returns: - Nothing - Raises: - Nothing - """ - obj = type('bloblistobject', (object,), {}) - obj.name = name - obj.metadata = mddict - obj.properties = type('properties', (object,), {}) - obj.properties.content_length = content_length - obj.properties.content_settings = azure.storage.blob.ContentSettings() - if content_md5 is not None and len(content_md5) > 0: - obj.properties.content_settings.content_md5 = content_md5 - obj.properties.blobtype = blobtype - self.blobs.append(obj) - - def set_next_marker(self, marker): - """Set the continuation token - Parameters: - marker - next marker - Returns: - Nothing - Raises: - Nothing - """ - if marker is not None and len(marker) > 0: - self.next_marker = marker - - -class SasBlobService(object): - """BlobService supporting SAS for functions used in the Python SDK. - create_container method does not exist because it is not a supported - operation under SAS""" - def __init__(self, endpoint, saskey, timeout): - """SAS Blob Service ctor - Parameters: - endpoint - storage endpoint - saskey - saskey - timeout - timeout - Returns: - Nothing - Raises: - Nothing - """ - self.endpoint = endpoint - # normalize sas key - if saskey[0] != '?': - self.saskey = '?' + saskey - else: - self.saskey = saskey - self.timeout = timeout - - def _parse_blob_list_xml(self, content): - """Parse blob list in xml format to an attribute-based object - Parameters: - content - http response content in xml - Returns: - attribute-based object - Raises: - No special exception handling - """ - result = SasBlobList() - root = ET.fromstring(content) - blobs = root.find('Blobs') - for blob in blobs.iter('Blob'): - name = blob.find('Name').text - props = blob.find('Properties') - cl = long(props.find('Content-Length').text) - md5 = props.find('Content-MD5').text - bt = props.find('BlobType').text - metadata = blob.find('Metadata') - mddict = {} - for md in metadata: - mddict[md.tag] = md.text - result.add_blob(name, cl, md5, bt, mddict) - try: - result.set_next_marker(root.find('NextMarker').text) - except Exception: - pass - return result - - def list_blobs( - self, container_name, marker=None, - max_results=_MAX_LISTBLOBS_RESULTS, include=None): - """List blobs in container - Parameters: - container_name - container name - marker - marker - max_results - max results - include - `azure.storage.models.Include` include object - Returns: - List of blobs - Raises: - IOError if unexpected status code - """ - url = '{endpoint}{container_name}{saskey}'.format( - endpoint=self.endpoint, container_name=container_name, - saskey=self.saskey) - reqparams = { - 'restype': 'container', - 'comp': 'list', - 'maxresults': str(max_results)} - if marker is not None: - reqparams['marker'] = marker - if include is not None and include.metadata: - reqparams['include'] = 'metadata' - response = azure_request( - requests.get, url=url, params=reqparams, timeout=self.timeout) - response.raise_for_status() - if response.status_code != 200: - raise IOError( - 'incorrect status code returned for list_blobs: {}'.format( - response.status_code)) - return self._parse_blob_list_xml(response.content) - - def _get_blob(self, container_name, blob_name, start_range, end_range): - """Get blob - Parameters: - container_name - container name - blob_name - name of blob - start_range - start range of bytes - end_range - end range of bytes - Returns: - `azure.storage.blob.Blob` object - Raises: - IOError if unexpected status code - """ - url = '{endpoint}{container_name}/{blob_name}{saskey}'.format( - endpoint=self.endpoint, container_name=container_name, - blob_name=blob_name, saskey=self.saskey) - reqheaders = { - 'x-ms-range': 'bytes={}-{}'.format(start_range, end_range) - } - response = azure_request( - requests.get, url=url, headers=reqheaders, timeout=self.timeout) - response.raise_for_status() - if response.status_code != 200 and response.status_code != 206: - raise IOError( - 'incorrect status code returned for get_blob: {}'.format( - response.status_code)) - return azure.storage.blob.Blob(content=response.content) - - def get_blob_properties(self, container_name, blob_name): - """Get blob properties - Parameters: - container_name - container name - blob_name - name of blob - Returns: - `azure.storage.blob.Blob` object - Raises: - IOError if unexpected status code - """ - url = '{endpoint}{container_name}/{blob_name}{saskey}'.format( - endpoint=self.endpoint, container_name=container_name, - blob_name=blob_name, saskey=self.saskey) - response = azure_request( - requests.head, url=url, timeout=self.timeout) - response.raise_for_status() - if response.status_code != 200: - raise IOError('incorrect status code returned for ' - 'get_blob_properties: {}'.format( - response.status_code)) - # parse response headers into blob object - blob = azure.storage.blob.Blob() - blob.propertes = azure.storage.blob.BlobProperties() - blob.properties.content_length = \ - long(response.headers['content-length']) - blob.properties.content_settings = azure.storage.blob.ContentSettings() - if 'content-md5' in response.headers: - blob.properties.content_settings.content_md5 = \ - response.headers['content-md5'] - # read meta values, all meta values are lowercased - mddict = {} - for res in response.headers: - if res.startswith('x-ms-meta-'): - mddict[res[10:]] = response.headers[res] - blob.metadata = mddict - return blob - - def set_blob_metadata( - self, container_name, blob_name, metadata): - """Set blob metadata. Clearing is not supported. - Parameters: - container_name - container name - blob_name - name of blob - metadata - blob metadata dictionary - Returns: - Nothing - Raises: - IOError if unexpected status code - """ - if metadata is None or len(metadata) == 0: - return - url = '{endpoint}{container_name}/{blob_name}{saskey}'.format( - endpoint=self.endpoint, container_name=container_name, - blob_name=blob_name, saskey=self.saskey) - reqparams = {'comp': 'metadata'} - reqheaders = {} - for key in metadata: - reqheaders['x-ms-meta-' + key] = metadata[key] - response = azure_request( - requests.put, url=url, params=reqparams, headers=reqheaders, - timeout=self.timeout) - response.raise_for_status() - if response.status_code != 200: - raise IOError( - 'incorrect status code returned for ' - 'set_blob_metadata: {}'.format(response.status_code)) - - def create_blob( - self, container_name, blob_name, content_length, content_settings): - """Create blob for initializing page blobs - Parameters: - container_name - container name - blob_name - name of blob - content_length - content length aligned to 512-byte boundary - content_settings - `azure.storage.blob.ContentSettings` object - Returns: - response content - Raises: - IOError if unexpected status code - """ - url = '{endpoint}{container_name}/{blob_name}{saskey}'.format( - endpoint=self.endpoint, container_name=container_name, - blob_name=blob_name, saskey=self.saskey) - reqheaders = { - 'x-ms-blob-type': 'PageBlob', - 'x-ms-blob-content-length': str(content_length), - } - if content_settings is not None: - if content_settings.content_md5 is not None: - reqheaders['x-ms-blob-content-md5'] = \ - content_settings.content_md5 - if content_settings.content_type is not None: - reqheaders['x-ms-blob-content-type'] = \ - content_settings.content_type - response = azure_request( - requests.put, url=url, headers=reqheaders, timeout=self.timeout) - response.raise_for_status() - if response.status_code != 201: - raise IOError( - 'incorrect status code returned for create_blob: {}'.format( - response.status_code)) - return response.content - - def _put_blob( - self, container_name, blob_name, blob, content_settings): - """Put blob for creating/updated block blobs - Parameters: - container_name - container name - blob_name - name of blob - blob - blob content - content_settings - `azure.storage.blob.ContentSettings` object - Returns: - response content - Raises: - IOError if unexpected status code - """ - url = '{endpoint}{container_name}/{blob_name}{saskey}'.format( - endpoint=self.endpoint, container_name=container_name, - blob_name=blob_name, saskey=self.saskey) - reqheaders = {'x-ms-blob-type': 'BlockBlob'} - if content_settings is not None: - if content_settings.content_md5 is not None: - reqheaders['x-ms-blob-content-md5'] = \ - content_settings.content_md5 - if content_settings.content_type is not None: - reqheaders['x-ms-blob-content-type'] = \ - content_settings.content_type - response = azure_request( - requests.put, url=url, headers=reqheaders, timeout=self.timeout) - response.raise_for_status() - if response.status_code != 201: - raise IOError( - 'incorrect status code returned for put_blob: {}'.format( - response.status_code)) - return response.content - - def update_page( - self, container_name, blob_name, page, start_range, end_range, - validate_content=False, content_md5=None): - """Put page for page blob. This API differs from the Python storage - sdk to maintain efficiency for block md5 computation. - Parameters: - container_name - container name - blob_name - name of blob - page - page data - start_range - start range of bytes - end_range - end range of bytes - validate_content - validate content - content_md5 - md5 hash for page data - Returns: - Nothing - Raises: - IOError if unexpected status code - """ - url = '{endpoint}{container_name}/{blob_name}{saskey}'.format( - endpoint=self.endpoint, container_name=container_name, - blob_name=blob_name, saskey=self.saskey) - reqheaders = { - 'x-ms-range': 'bytes={}-{}'.format(start_range, end_range), - 'x-ms-page-write': 'update'} - if validate_content and content_md5 is not None: - reqheaders['Content-MD5'] = content_md5 - reqparams = {'comp': 'page'} - response = azure_request( - requests.put, url=url, params=reqparams, headers=reqheaders, - data=page, timeout=self.timeout) - response.raise_for_status() - if response.status_code != 201: - raise IOError( - 'incorrect status code returned for update_page: {}'.format( - response.status_code)) - - def put_block( - self, container_name, blob_name, block, block_id, - validate_content=False): - """Put block for blob - Parameters: - container_name - container name - blob_name - name of blob - block - block data - block_id - block id - validate_content - validate content - Returns: - Nothing - Raises: - IOError if unexpected status code - """ - url = '{endpoint}{container_name}/{blob_name}{saskey}'.format( - endpoint=self.endpoint, container_name=container_name, - blob_name=blob_name, saskey=self.saskey) - # compute block md5 - if validate_content: - reqheaders = {'Content-MD5': compute_md5_for_data_asbase64(block)} - else: - reqheaders = None - reqparams = {'comp': 'block', 'blockid': block_id} - response = azure_request( - requests.put, url=url, params=reqparams, headers=reqheaders, - data=block, timeout=self.timeout) - response.raise_for_status() - if response.status_code != 201: - raise IOError( - 'incorrect status code returned for put_block: {}'.format( - response.status_code)) - - def put_block_list( - self, container_name, blob_name, block_list, - content_settings): - """Put block list for blob - Parameters: - container_name - container name - blob_name - name of blob - block_list - list of `azure.storage.blob.BlobBlock` - content_settings - `azure.storage.blob.ContentSettings` object - Returns: - Nothing - Raises: - IOError if unexpected status code - """ - url = '{endpoint}{container_name}/{blob_name}{saskey}'.format( - endpoint=self.endpoint, container_name=container_name, - blob_name=blob_name, saskey=self.saskey) - reqheaders = {} - if content_settings is not None: - if content_settings.content_md5 is not None: - reqheaders['x-ms-blob-content-md5'] = \ - content_settings.content_md5 - if content_settings.content_type is not None: - reqheaders['x-ms-blob-content-type'] = \ - content_settings.content_type - reqparams = {'comp': 'blocklist'} - body = [''] - for block in block_list: - body.append('{}'.format(block.id)) - body.append('') - response = azure_request( - requests.put, url=url, params=reqparams, headers=reqheaders, - data=''.join(body), timeout=self.timeout) - response.raise_for_status() - if response.status_code != 201: - raise IOError( - 'incorrect status code returned for put_block_list: {}'.format( - response.status_code)) - - def set_blob_properties( - self, container_name, blob_name, content_settings): - """Sets blob properties (MD5 only) - Parameters: - container_name - container name - blob_name - name of blob - content_settings - `azure.storage.blob.ContentSettings` object - Returns: - Nothing - Raises: - IOError if unexpected status code - """ - url = '{endpoint}{container_name}/{blob_name}{saskey}'.format( - endpoint=self.endpoint, container_name=container_name, - blob_name=blob_name, saskey=self.saskey) - reqheaders = {} - if content_settings is not None: - if content_settings.content_md5 is not None: - reqheaders['x-ms-blob-content-md5'] = \ - content_settings.content_md5 - reqparams = {'comp': 'properties'} - response = azure_request( - requests.put, url=url, params=reqparams, headers=reqheaders, - timeout=self.timeout) - response.raise_for_status() - if response.status_code != 200: - raise IOError('incorrect status code returned for ' - 'set_blob_properties: {}'.format( - response.status_code)) - - def delete_blob( - self, container_name, blob_name): - """Deletes a blob - Parameters: - container_name - container name - blob_name - name of blob - Returns: - Nothing - Raises: - IOError if unexpected status code - """ - url = '{endpoint}{container_name}/{blob_name}{saskey}'.format( - endpoint=self.endpoint, container_name=container_name, - blob_name=blob_name, saskey=self.saskey) - response = azure_request( - requests.delete, url=url, timeout=self.timeout) - response.raise_for_status() - if response.status_code != 202: - raise IOError( - 'incorrect status code returned for delete_blob: {}'.format( - response.status_code)) - - def create_container( - self, container_name, fail_on_exist=False): - """Create a container - Parameters: - container_name - container name - Returns: - Nothing - Raises: - IOError if unexpected status code - """ - url = '{endpoint}{container_name}{saskey}'.format( - endpoint=self.endpoint, container_name=container_name, - saskey=self.saskey) - reqparams = {'restype': 'container'} - response = azure_request( - requests.put, url=url, params=reqparams, timeout=self.timeout) - if response.status_code != 201: - if response.status_code == 409: - if fail_on_exist: - response.raise_for_status() - else: - return - raise IOError('incorrect status code returned for ' - 'create_container: {}'.format( - response.status_code)) - - -class StorageChunkWorker(threading.Thread): - """Chunk worker for a storage entity""" - def __init__( - self, exc, s_in_queue, s_out_queue, args, xfertoazure, - blob_service, file_service): - """Storage Chunk worker Thread ctor - Parameters: - exc - exception list - s_in_queue - storage in queue - s_out_queue - storage out queue - args - program arguments - xfertoazure - xfer to azure (direction) - blob_service - blob service - file_service - file service - Returns: - Nothing - Raises: - Nothing - """ - threading.Thread.__init__(self) - self.terminate = False - self._exc = exc - self._in_queue = s_in_queue - self._out_queue = s_out_queue - self.args = args - self.xfertoazure = xfertoazure - self.blob_service = blob_service - self.file_service = file_service - - def run(self): - """Thread code - Parameters: - Nothing - Returns: - Nothing - Raises: - Nothing - """ - while not self.terminate: - try: - pri, (localresource, container, remoteresource, blockid, - offset, bytestoxfer, encparam, flock, filedesc) = \ - self._in_queue.get_nowait() - except queue.Empty: - break - # detect termination early and break if necessary - if self.terminate: - break - try: - if self.xfertoazure: - # if iv is not ready for this chunk, re-add back to queue - if (not as_page_blob(self.args, localresource) and - ((self.args.rsaprivatekey is not None or - self.args.rsapublickey is not None) and - self.args.encmode == _ENCRYPTION_MODE_FULLBLOB)): - _iblockid = int(blockid) - if _iblockid not in encparam[2]: - self._in_queue.put( - PqTupleSort(( - pri, - (localresource, container, remoteresource, - blockid, offset, bytestoxfer, encparam, - flock, filedesc)))) - continue - # upload block/page - self.put_storage_data( - localresource, container, remoteresource, blockid, - offset, bytestoxfer, encparam, flock, filedesc) - else: - # download range - self.get_storage_range( - localresource, container, remoteresource, blockid, - offset, bytestoxfer, encparam, flock, filedesc) - # pylint: disable=W0703 - except Exception: - # pylint: enable=W0703 - self._exc.append(traceback.format_exc()) - self._out_queue.put((localresource, encparam)) - if len(self._exc) > 0: - break - - def put_storage_data( - self, localresource, container, remoteresource, blockid, offset, - bytestoxfer, encparam, flock, filedesc): - """Puts data (blob, page or file bits) into Azure storage - Parameters: - localresource - name of local resource - container - blob container - remoteresource - name of remote resource - blockid - block id (ignored for page blobs) - offset - file offset - bytestoxfer - number of bytes to xfer - encparam - encryption metadata: (symkey, signkey, ivmap, pad) - flock - file lock - filedesc - file handle - Returns: - Nothing - Raises: - IOError if file cannot be read - """ - # if bytestoxfer is zero, then we're transferring a zero-byte - # file, use put blob instead of page/block ops - if bytestoxfer == 0: - contentmd5 = compute_md5_for_data_asbase64(b'') - if as_page_blob(self.args, localresource): - azure_request( - self.blob_service[1].create_blob, container_name=container, - blob_name=remoteresource, content_length=bytestoxfer, - content_settings=azure.storage.blob.ContentSettings( - content_type=get_mime_type(localresource), - content_md5=contentmd5)) - elif self.args.fileshare: - fsfile = split_fileshare_path_into_parts(remoteresource) - azure_request( - self.file_service.create_file, share_name=container, - directory_name=fsfile[0], file_name=fsfile[1], - content_length=bytestoxfer, - content_settings=azure.storage.file.ContentSettings( - content_type=get_mime_type(localresource), - content_md5=contentmd5)) - else: - azure_request( - self.blob_service[0]._put_blob, container_name=container, - blob_name=remoteresource, blob=None, - content_settings=azure.storage.blob.ContentSettings( - content_type=get_mime_type(localresource), - content_md5=contentmd5)) - return - # read the file at specified offset, must take lock - data = None - with flock: - closefd = False - if not filedesc: - filedesc = open(localresource, 'rb') - closefd = True - filedesc.seek(offset, 0) - data = filedesc.read(bytestoxfer) - if closefd: - filedesc.close() - if not data: - raise IOError('could not read {}: {} -> {}'.format( - localresource, offset, offset + bytestoxfer)) - # issue REST put - if as_page_blob(self.args, localresource): - aligned = page_align_content_length(bytestoxfer) - # fill data to boundary - if aligned != bytestoxfer: - data = data.ljust(aligned, b'\0') - # compute page md5 - contentmd5 = compute_md5_for_data_asbase64(data) - # check if this page is empty - if contentmd5 == _EMPTY_MAX_PAGE_SIZE_MD5: - return - elif len(data) != _MAX_BLOB_CHUNK_SIZE_BYTES: - data_chk = b'\0' * len(data) - data_chk_md5 = compute_md5_for_data_asbase64(data_chk) - del data_chk - if data_chk_md5 == contentmd5: - return - del data_chk_md5 - # upload page range - if self.args.saskey: - azure_request( - self.blob_service[1].update_page, container_name=container, - blob_name=remoteresource, page=data, start_range=offset, - end_range=offset + aligned - 1, - validate_content=self.args.computeblockmd5, - content_md5=contentmd5, timeout=self.args.timeout) - else: - azure_request( - self.blob_service[1].update_page, container_name=container, - blob_name=remoteresource, page=data, start_range=offset, - end_range=offset + aligned - 1, - validate_content=self.args.computeblockmd5, - timeout=self.args.timeout) - else: - # encrypt block if required - if (encparam is not None and - (self.args.rsaprivatekey is not None or - self.args.rsapublickey is not None)): - symkey = encparam[0] - signkey = encparam[1] - if self.args.encmode == _ENCRYPTION_MODE_FULLBLOB: - _blkid = int(blockid) - iv = encparam[2][_blkid] - pad = encparam[3] - else: - iv = None - pad = True - data = encrypt_chunk( - symkey, signkey, data, self.args.encmode, iv=iv, pad=pad) - with flock: - if self.args.encmode == _ENCRYPTION_MODE_FULLBLOB: - # compute hmac for chunk - if _blkid == 0: - encparam[2]['hmac'].update(iv + data) - else: - encparam[2]['hmac'].update(data) - # store iv for next chunk - encparam[2][_blkid + 1] = data[ - len(data) - _AES256_BLOCKSIZE_BYTES:] - # compute md5 for encrypted data chunk - encparam[2]['md5'].update(data) - if self.args.fileshare: - bytestoxfer = len(data) - encparam[2]['filesize'] += bytestoxfer - if self.args.fileshare: - fsfile = split_fileshare_path_into_parts(remoteresource) - # subtract 1 from end_range - azure_request( - self.file_service.update_range, share_name=container, - directory_name=fsfile[0], file_name=fsfile[1], - data=data, start_range=offset, - end_range=offset + bytestoxfer - 1, - validate_content=self.args.computeblockmd5, - timeout=self.args.timeout) - else: - azure_request( - self.blob_service[0].put_block, container_name=container, - blob_name=remoteresource, block=data, block_id=blockid, - validate_content=self.args.computeblockmd5, - timeout=self.args.timeout) - del data - - def get_storage_range( - self, localresource, container, remoteresource, blockid, offset, - bytestoxfer, encparam, flock, filedesc): - """Get a segment of a blob/page/file using range offset downloading - Parameters: - localresource - name of local resource - container - blob container - remoteresource - name of remote resource - blockid - block id (integral) - offset - file offset - bytestoxfer - number of bytes to xfer - encparam - decryption metadata: - (symkey, signkey, offset_mod, encmode, ivmap, unpad) - flock - file lock - filedesc - file handle - Returns: - Nothing - Raises: - Nothing - """ - if (encparam[0] is not None and - encparam[3] == _ENCRYPTION_MODE_FULLBLOB): - if offset == 0: - start_range = offset - end_range = offset + bytestoxfer - else: - # retrieve block size data prior for IV - start_range = offset - _AES256_BLOCKSIZE_BYTES - end_range = offset + bytestoxfer - else: - start_range = offset - end_range = offset + bytestoxfer - if self.args.fileshare: - fsfile = split_fileshare_path_into_parts(remoteresource) - _blob = azure_request( - self.file_service._get_file, share_name=container, - directory_name=fsfile[0], file_name=fsfile[1], - start_range=start_range, end_range=end_range, - timeout=self.args.timeout) - else: - if as_page_blob(self.args, localresource): - blob_service = self.blob_service[1] - else: - blob_service = self.blob_service[0] - _blob = azure_request( - blob_service._get_blob, timeout=self.args.timeout, - container_name=container, blob_name=remoteresource, - start_range=start_range, end_range=end_range) - blobdata = _blob.content - # decrypt block if required - if encparam[0] is not None: - if encparam[3] == _ENCRYPTION_MODE_FULLBLOB: - if offset == 0: - iv = encparam[4][0] - else: - iv = blobdata[:_AES256_BLOCKSIZE_BYTES] - blobdata = blobdata[_AES256_BLOCKSIZE_BYTES:] - unpad = encparam[5] - # update any buffered data to hmac - hmacdict = encparam[4]['hmac'] - if hmacdict['hmac'] is not None: - # grab file lock to manipulate hmac - with flock: - # include iv in first hmac calculation - if offset == 0: - hmacdict['buffered'][blockid] = iv + blobdata - else: - hmacdict['buffered'][blockid] = blobdata - # try to process hmac data - while True: - curr = hmacdict['curr'] - if curr in hmacdict['buffered']: - hmacdict['hmac'].update( - hmacdict['buffered'][curr]) - hmacdict['buffered'].pop(curr) - hmacdict['curr'] = curr + 1 - else: - break - else: - iv = None - unpad = True - blobdata = decrypt_chunk( - encparam[0], encparam[1], blobdata, encparam[3], iv=iv, - unpad=unpad) - if blobdata is not None: - with flock: - closefd = False - if not filedesc: - filedesc = open(localresource, 'r+b') - closefd = True - filedesc.seek(offset - (encparam[2] or 0), 0) - filedesc.write(blobdata) - if closefd: - filedesc.close() - del blobdata - del _blob - - -def pad_pkcs7(buf): - """Appends PKCS7 padding to an input buffer. - Parameters: - buf - buffer to add padding - Returns: - buffer with PKCS7_PADDING - Raises: - No special exception handling - """ - padder = cryptography.hazmat.primitives.padding.PKCS7( - cryptography.hazmat.primitives.ciphers. - algorithms.AES.block_size).padder() - return padder.update(buf) + padder.finalize() - - -def unpad_pkcs7(buf): - """Removes PKCS7 padding a decrypted object. - Parameters: - buf - buffer to remove padding - Returns: - buffer without PKCS7_PADDING - Raises: - No special exception handling - """ - unpadder = cryptography.hazmat.primitives.padding.PKCS7( - cryptography.hazmat.primitives.ciphers. - algorithms.AES.block_size).unpadder() - return unpadder.update(buf) + unpadder.finalize() - - -def generate_aes256_keys(): - """Generate AES256 symmetric key and signing key - Parameters: - None - Returns: - Tuple of symmetric key and signing key - Raises: - Nothing - """ - symkey = os.urandom(_AES256_KEYLENGTH_BYTES) - signkey = os.urandom(_AES256_KEYLENGTH_BYTES) - return symkey, signkey - - -def rsa_encrypt_key(rsaprivatekey, rsapublickey, plainkey, asbase64=True): - """Encrypt a plaintext key using RSA and PKCS1_OAEP padding - Parameters: - rsaprivatekey - rsa private key for encryption - rsapublickey - rsa public key for encryption - plainkey - plaintext key - asbase64 - encode as base64 - Returns: - Tuple of encrypted key and signature (if RSA private key is given) - Raises: - Nothing - """ - if rsapublickey is None: - rsapublickey = rsaprivatekey.public_key() - if rsaprivatekey is None: - signature = None - else: - signer = rsaprivatekey.signer( - cryptography.hazmat.primitives.asymmetric.padding.PSS( - mgf=cryptography.hazmat.primitives.asymmetric.padding.MGF1( - cryptography.hazmat.primitives.hashes.SHA256()), - salt_length=cryptography.hazmat.primitives.asymmetric. - padding.PSS.MAX_LENGTH), - cryptography.hazmat.primitives.hashes.SHA256()) - signer.update(plainkey) - signature = signer.finalize() - enckey = rsapublickey.encrypt( - plainkey, cryptography.hazmat.primitives.asymmetric.padding.OAEP( - mgf=cryptography.hazmat.primitives.asymmetric.padding.MGF1( - algorithm=cryptography.hazmat.primitives.hashes.SHA1()), - algorithm=cryptography.hazmat.primitives.hashes.SHA1(), - label=None)) - if asbase64: - return base64encode(enckey), base64encode( - signature) if signature is not None else signature - else: - return enckey, signature - - -def rsa_decrypt_key(rsaprivatekey, enckey, signature, isbase64=True): - """Decrypt an RSA encrypted key and optional signature verification - Parameters: - rsaprivatekey - rsa private key for decryption - enckey - encrypted key - signature - optional signature to verify encrypted data - isbase64 - if keys are base64 encoded - Returns: - Decrypted key - Raises: - RuntimeError if RSA signature validation fails - """ - if isbase64: - enckey = base64.b64decode(enckey) - deckey = rsaprivatekey.decrypt( - enckey, cryptography.hazmat.primitives.asymmetric.padding.OAEP( - mgf=cryptography.hazmat.primitives.asymmetric.padding.MGF1( - algorithm=cryptography.hazmat.primitives.hashes.SHA1()), - algorithm=cryptography.hazmat.primitives.hashes.SHA1(), - label=None)) - if signature is not None and len(signature) > 0: - rsapublickey = rsaprivatekey.public_key() - if isbase64: - signature = base64.b64decode(signature) - verifier = rsapublickey.verifier( - signature, cryptography.hazmat.primitives.asymmetric.padding.PSS( - mgf=cryptography.hazmat.primitives.asymmetric.padding.MGF1( - cryptography.hazmat.primitives.hashes.SHA256()), - salt_length=cryptography.hazmat.primitives.asymmetric. - padding.PSS.MAX_LENGTH), - cryptography.hazmat.primitives.hashes.SHA256()) - verifier.update(deckey) - verifier.verify() - return deckey - - -def encrypt_chunk(symkey, signkey, data, encmode, iv=None, pad=False): - """Encrypt a chunk of data - Parameters: - symkey - symmetric key - signkey - signing key - data - data to encrypt - encmode - encryption mode - iv - initialization vector - pad - pad data - Returns: - iv and hmac not specified: iv || encrypted data || signature - else: encrypted data - Raises: - No special exception handling - """ - # create iv - if encmode == _ENCRYPTION_MODE_CHUNKEDBLOB: - iv = os.urandom(_AES256_BLOCKSIZE_BYTES) - # force padding on since this will be an individual encrypted chunk - pad = True - # encrypt data - cipher = cryptography.hazmat.primitives.ciphers.Cipher( - cryptography.hazmat.primitives.ciphers.algorithms.AES(symkey), - cryptography.hazmat.primitives.ciphers.modes.CBC(iv), - backend=cryptography.hazmat.backends.default_backend()).encryptor() - if pad: - encdata = cipher.update(pad_pkcs7(data)) + cipher.finalize() - else: - encdata = cipher.update(data) + cipher.finalize() - # sign encrypted data - if encmode == _ENCRYPTION_MODE_CHUNKEDBLOB: - hmacsha256 = hmac.new(signkey, digestmod=hashlib.sha256) - hmacsha256.update(iv + encdata) - return iv + encdata + hmacsha256.digest() - else: - return encdata - - -def decrypt_chunk( - symkey, signkey, encchunk, encmode, iv=None, unpad=False): - """Decrypt a chunk of data - Parameters: - symkey - symmetric key - signkey - signing key - encchunk - data to decrypt - encmode - encryption mode - blockid - block id - iv - initialization vector - unpad - unpad data - Returns: - decrypted data - Raises: - RuntimeError if signature verification fails - """ - # if chunked blob, then preprocess for iv and signature - if encmode == _ENCRYPTION_MODE_CHUNKEDBLOB: - # retrieve iv - iv = encchunk[:_AES256_BLOCKSIZE_BYTES] - # retrieve encrypted data - encdata = encchunk[ - _AES256_BLOCKSIZE_BYTES:-_HMACSHA256_DIGESTSIZE_BYTES] - # retrieve signature - sig = encchunk[-_HMACSHA256_DIGESTSIZE_BYTES:] - # validate integrity of data - hmacsha256 = hmac.new(signkey, digestmod=hashlib.sha256) - # compute hmac over iv + encdata - hmacsha256.update(encchunk[:-_HMACSHA256_DIGESTSIZE_BYTES]) - if not cryptography.hazmat.primitives.constant_time.bytes_eq( - hmacsha256.digest(), sig): - raise RuntimeError( - 'Encrypted data integrity check failed for chunk') - else: - encdata = encchunk - # decrypt data - cipher = cryptography.hazmat.primitives.ciphers.Cipher( - cryptography.hazmat.primitives.ciphers.algorithms.AES(symkey), - cryptography.hazmat.primitives.ciphers.modes.CBC(iv), - backend=cryptography.hazmat.backends.default_backend()).decryptor() - decrypted = cipher.update(encdata) + cipher.finalize() - if unpad: - return unpad_pkcs7(decrypted) - else: - return decrypted - - -def azure_request(req, timeout=None, *args, **kwargs): - """Wrapper method to issue/retry requests to Azure, works with both - the Azure Python SDK and Requests - Parameters: - req - request to issue - timeout - timeout in seconds - args - positional args to req - kwargs - keyworded args to req - Returns: - result of request - Raises: - Any uncaught exceptions - IOError if timeout - """ - start = time.clock() - lastwait = None - while True: - try: - return req(*args, **kwargs) - except requests.Timeout: - pass - except (requests.ConnectionError, - requests.exceptions.ChunkedEncodingError) as exc: - if (isinstance(exc.args[0], requests.packages.urllib3. - exceptions.ProtocolError) and - isinstance(exc.args[0].args[1], socket.error)): - err = exc.args[0].args[1].errno - if (err != errno.ECONNRESET and - err != errno.ECONNREFUSED and - err != errno.ECONNABORTED and - err != errno.ENETRESET and - err != errno.ETIMEDOUT): - raise - except requests.HTTPError as exc: - if (exc.response.status_code < 500 or - exc.response.status_code == 501 or - exc.response.status_code == 505): - raise - except azure.common.AzureHttpError as exc: - if (exc.status_code < 500 or - exc.status_code == 501 or - exc.status_code == 505): - raise - if timeout is not None and time.clock() - start > timeout: - raise IOError( - 'waited {} sec for request {}, exceeded timeout of {}'.format( - time.clock() - start, req.__name__, timeout)) - if lastwait is None or lastwait > 8: - wait = 1 - else: - wait = lastwait << 1 - lastwait = wait - time.sleep(wait) - - -def create_dir_ifnotexists(dirname): - """Create a directory if it doesn't exist - Parameters: - dirname - name of directory to create - Returns: - Nothing - Raises: - Unhandled exceptions - """ - try: - os.makedirs(dirname) - print('created local directory: {}'.format(dirname)) - except OSError as exc: - if exc.errno != errno.EEXIST: - raise # pragma: no cover - - -def get_mime_type(filename): - """Guess the type of a file based on its filename - Parameters: - filename - filename to guess the content-type - Returns: - A string of the form 'type/subtype', - usable for a MIME content-type header - Raises: - Nothing - """ - return (mimetypes.guess_type(filename)[0] or 'application/octet-stream') - - -def encode_blobname(args, blobname): - """Encode blob name: url encode. Due to current Azure Python Storage SDK - limitations, does not apply to non-SAS requests. - Parameters: - args - program arguments - Returns: - urlencoded blob name - Raises: - Nothing - """ - if args.saskey is None or args.fileshare: - return blobname - else: - return urlquote(blobname) - - -def base64encode(obj): - """Encode object to base64 - Parameters: - obj - object to encode - Returns: - base64 encoded string - Raises: - Nothing - """ - if _PY2: - return base64.b64encode(obj) - else: - return str(base64.b64encode(obj), 'ascii') - - -def compute_md5_for_file_asbase64(filename, pagealign=False, blocksize=65536): - """Compute MD5 hash for file and encode as Base64 - Parameters: - filename - filename to compute md5 - pagealign - align bytes for page boundary - blocksize - block size in bytes - Returns: - MD5 for file encoded as Base64 - Raises: - Nothing - """ - hasher = hashlib.md5() - with open(filename, 'rb') as filedesc: - while True: - buf = filedesc.read(blocksize) - if not buf: - break - buflen = len(buf) - if pagealign and buflen < blocksize: - aligned = page_align_content_length(buflen) - if aligned != buflen: - buf = buf.ljust(aligned, b'\0') - hasher.update(buf) - return base64encode(hasher.digest()) - - -def compute_md5_for_data_asbase64(data): - """Compute MD5 hash for bits and encode as Base64 - Parameters: - data - data to compute MD5 hash over - Returns: - MD5 for data encoded as Base64 - Raises: - Nothing - """ - hasher = hashlib.md5() - hasher.update(data) - return base64encode(hasher.digest()) - - -def page_align_content_length(length): - """Compute page boundary alignment - Parameters: - length - content length - Returns: - aligned byte boundary - Raises: - Nothing - """ - mod = length % _PAGEBLOB_BOUNDARY - if mod != 0: - return length + (_PAGEBLOB_BOUNDARY - mod) - return length - - -def as_page_blob(args, name): - """Determines if the file should be a pageblob depending upon args - Parameters: - args - program args - name - file name - Returns: - True if file should be a pageblob - Raises: - Nothing - """ - if not args.fileshare and ( - args.pageblob or (args.autovhd and name.lower().endswith('.vhd'))): - return True - return False - - -def get_blob_listing(blob_service, args, metadata=True): - """Convenience method for generating a blob listing of a container - Parameters: - blob_service - blob service - args - program arguments - metadata - include metadata - Returns: - dictionary of blob -> list [content length, content md5, enc metadata] - Raises: - Nothing - """ - marker = None - blobdict = {} - if metadata: - incl = azure.storage.blob.Include.METADATA - else: - incl = None - while True: - try: - result = azure_request( - blob_service.list_blobs, timeout=args.timeout, - container_name=args.container, marker=marker, include=incl) - except azure.common.AzureMissingResourceHttpError: - break - for blob in result: - blobdict[blob.name] = [ - blob.properties.content_length, - blob.properties.content_settings.content_md5, None] - if (blob.metadata is not None and - _ENCRYPTION_METADATA_NAME in blob.metadata): - encmeta = EncryptionMetadataJson( - args, None, None, None, None, None) - encmeta.parse_metadata_json( - blob.name, args.rsaprivatekey, args.rsapublickey, - blob.metadata) - blobdict[blob.name][1] = encmeta.md5 - if (args.rsaprivatekey is not None or - args.rsapublickey is not None): - blobdict[blob.name][2] = encmeta - marker = result.next_marker - if marker is None or len(marker) < 1: - break - return blobdict - - -def get_fileshare_listing(file_service, args, metadata=True): - """Retrieve all files and directories under a file share - Parameters: - file_service - file service - args - program args - metadata - retrieve metadata - Returns: - dictionary of files -> list [content length, content md5, enc metadata] - Raises: - Nothing - """ - blobdict = {} - dirs = [None] - while len(dirs) > 0: - dir = dirs.pop() - fsfiles = file_service.list_directories_and_files( - share_name=args.container, directory_name=dir, - timeout=args.timeout) - if dir is None: - dir = '' - for fsfile in fsfiles: - fspath = os.path.join(dir, fsfile.name) - if isinstance(fsfile, azure.storage.file.File): - fsprop = get_fileshare_file_properties( - file_service, args, fspath) - blobdict[fspath] = fsprop[1] - else: - dirs.append(fspath) - return blobdict - - -def split_fileshare_path_into_parts(remotefname): - """Split fileshare name into parts - Parameters: - remotefname - remote file name - Returns: - tuple of (directory name, file name) - Raises: - Nothing - """ - parts = remotefname.split(os.path.sep) - dirname = os.path.sep.join(parts[:len(parts) - 1]) - return (dirname, parts[-1]) - - -def get_fileshare_file_properties(file_service, args, remotefname): - """Convenience method for retrieving a file share file's properties and - metadata - Parameters: - file_service - file service - args - program arguments - remotefname - remote file name - Returns: - blobdict entry tuple (file name, blobdict value) - Raises: - Nothing - """ - # split directory and file name - dirname, fname = split_fileshare_path_into_parts(remotefname) - try: - fsfile = file_service.get_file_properties( - args.container, dirname, fname, timeout=args.timeout) - except azure.common.AzureMissingResourceHttpError: - return None - fsmeta = file_service.get_file_metadata( - args.container, dirname, fname, timeout=args.timeout) - entry = [ - fsfile.properties.content_length, - fsfile.properties.content_settings.content_md5, None] - if fsmeta is not None and _ENCRYPTION_METADATA_NAME in fsmeta: - encmeta = EncryptionMetadataJson( - args, None, None, None, None, None) - encmeta.parse_metadata_json( - fsfile.name, args.rsaprivatekey, args.rsapublickey, - fsmeta) - entry[1] = encmeta.md5 - if (args.rsaprivatekey is not None or - args.rsapublickey is not None): - entry[2] = encmeta - return (fsfile.name, entry) - - -def create_all_parent_directories_fileshare( - file_service, args, fsfile, dirscreated): - """Create all parent directories of a given file share path - Parameters - file_service - file service - args - program args - fsfile - file share path - dirscreated - directories created set - Returns: - Nothing - Raises: - Nothing - """ - dirs = fsfile[0].split(os.path.sep) - for i in xrange(0, len(dirs)): - dir = os.path.join(*(dirs[0:i + 1])) - if dir not in dirscreated: - file_service.create_directory( - share_name=args.container, - directory_name=dir, fail_on_exist=False, - timeout=args.timeout) - dirscreated.add(dir) - - -def generate_xferspec_download( - blob_service, file_service, args, storage_in_queue, localfile, - remoteresource, addfd, blobprop): - """Generate an xferspec for download - Parameters: - blob_service - blob service - file_service - file service - args - program arguments - storage_in_queue - storage input queue - localfile - name of local resource - remoteresource - name of remote resource - addfd - create and add file handle - blobprop - blob properties list [length, md5, metadatadict] - Returns: - xferspec containing instructions - Raises: - ValueError if get_blob_properties returns an invalid result or - contentlength is invalid - """ - contentlength = blobprop[0] - contentmd5 = blobprop[1] - encmeta = blobprop[2] - remoteresource = encode_blobname(args, remoteresource) - # get the blob metadata if missing - if not args.fileshare and ( - contentlength is None or contentmd5 is None or - (args.rsaprivatekey is not None and encmeta is None)): - result = azure_request( - blob_service.get_blob_properties, timeout=args.timeout, - container_name=args.container, blob_name=remoteresource) - if not result: - raise ValueError( - 'unexpected result for get_blob_properties is None') - contentmd5 = result.properties.content_settings.content_md5 - contentlength = result.properties.content_length - if (args.rsaprivatekey is not None and - _ENCRYPTION_METADATA_NAME in result.metadata): - encmeta = EncryptionMetadataJson( - args, None, None, None, None, None) - encmeta.parse_metadata_json( - remoteresource, args.rsaprivatekey, args.rsapublickey, - result.metadata) - if contentlength < 0: - raise ValueError( - 'contentlength is invalid for {}'.format(remoteresource)) - # overwrite content md5 if encryption metadata exists - if encmeta is not None: - contentmd5 = encmeta.md5 - # check if download is needed - if (args.skiponmatch and contentmd5 is not None and - os.path.exists(localfile)): - print('computing file md5 on: {} length: {}'.format( - localfile, contentlength)) - lmd5 = compute_md5_for_file_asbase64(localfile) - print(' >> {} {} {} '.format( - lmd5, contentmd5, remoteresource), end='') - if lmd5 != contentmd5: - print('MISMATCH: re-download') - else: - print('match: skip') - return None, None, None, None - else: - print('remote blob: {} length: {} bytes, md5: {}'.format( - remoteresource, contentlength, contentmd5)) - tmpfilename = localfile + '.blobtmp' - if encmeta is not None: - chunksize = encmeta.chunksizebytes - symkey = encmeta.symkey - signkey = encmeta.signkey - if encmeta.encmode == _ENCRYPTION_MODE_FULLBLOB: - ivmap = { - 0: encmeta.iv, - 'hmac': { - 'hmac': None, - 'buffered': {}, - 'curr': 0, - 'sig': encmeta.hmac, - } - } - if signkey is not None: - ivmap['hmac']['hmac'] = hmac.new( - signkey, digestmod=hashlib.sha256) - offset_mod = 0 - elif encmeta.encmode == _ENCRYPTION_MODE_CHUNKEDBLOB: - ivmap = None - offset_mod = _AES256CBC_HMACSHA256_OVERHEAD_BYTES + 1 - else: - raise RuntimeError('Unknown encryption mode: {}'.format( - encmeta.encmode)) - else: - chunksize = args.chunksizebytes - offset_mod = 0 - symkey = None - signkey = None - ivmap = None - nchunks = contentlength // chunksize - # compute allocation size, if encrypted this will be an - # underallocation estimate - if contentlength > 0: - if encmeta is not None: - if encmeta.encmode == _ENCRYPTION_MODE_CHUNKEDBLOB: - allocatesize = contentlength - ((nchunks + 2) * offset_mod) - else: - allocatesize = contentlength - _AES256_BLOCKSIZE_BYTES - else: - allocatesize = contentlength - if allocatesize < 0: - allocatesize = 0 - else: - allocatesize = 0 - currfileoffset = 0 - nstorageops = 0 - flock = threading.Lock() - filedesc = None - # preallocate file - with flock: - filedesc = open(tmpfilename, 'wb') - if allocatesize > 0: - filedesc.seek(allocatesize - 1) - filedesc.write(b'\0') - filedesc.close() - if addfd: - # reopen under r+b mode - filedesc = open(tmpfilename, 'r+b') - else: - filedesc = None - chunktoadd = min(chunksize, contentlength) - for i in xrange(nchunks + 1): - if chunktoadd + currfileoffset > contentlength: - chunktoadd = contentlength - currfileoffset - # on download, chunktoadd must be offset by 1 as the x-ms-range - # header expects it that way. x -> y bytes means first bits of the - # (x+1)th byte to the last bits of the (y+1)th byte. for example, - # 0 -> 511 means byte 1 to byte 512 - encparam = [ - symkey, signkey, i * offset_mod, - encmeta.encmode if encmeta is not None else None, ivmap, False] - xferspec = (tmpfilename, args.container, remoteresource, i, - currfileoffset, chunktoadd - 1, encparam, flock, filedesc) - currfileoffset = currfileoffset + chunktoadd - nstorageops = nstorageops + 1 - storage_in_queue.put(PqTupleSort((i, xferspec))) - if currfileoffset >= contentlength: - encparam[5] = True - break - return contentlength, nstorageops, contentmd5, filedesc - - -def generate_xferspec_upload( - args, storage_in_queue, blobskipdict, blockids, localfile, - remoteresource, addfd): - """Generate an xferspec for upload - Parameters: - args - program arguments - storage_in_queue - storage input queue - blobskipdict - blob skip dictionary - blockids - block id dictionary - localfile - name of local resource - remoteresource - name of remote resource - addfd - create and add file handle - Returns: - xferspec containing instructions - Raises: - Nothing - """ - # compute md5 hash - md5digest = None - if args.computefilemd5: - print('computing file md5 on: {}'.format(localfile)) - md5digest = compute_md5_for_file_asbase64( - localfile, as_page_blob(args, localfile)) - # check if upload is needed - if args.skiponmatch and remoteresource in blobskipdict: - print(' >> {} {} {} '.format( - md5digest, blobskipdict[remoteresource][1], - remoteresource), end='') - if md5digest != blobskipdict[remoteresource][1]: - print('MISMATCH: re-upload') - else: - print('match: skip') - return None, 0, None, None - else: - print(' >> md5: {}'.format(md5digest)) - # create blockids entry - if localfile not in blockids: - blockids[localfile] = [] - # partition local file into chunks - filesize = os.path.getsize(localfile) - if as_page_blob(args, localfile) and ( - args.rsaprivatekey is not None or - args.rsapublickey is not None): - chunksizebytes = _MAX_BLOB_CHUNK_SIZE_BYTES - nchunks = filesize // chunksizebytes - if nchunks > 250000: - raise RuntimeError( - '{} chunks for file {} exceeds Azure Storage limits for a ' - 'single page blob'.format(nchunks, localfile)) - else: - chunksizebytes = args.chunksizebytes - nchunks = filesize // chunksizebytes - if nchunks > 50000: - raise RuntimeError( - '{} chunks for file {} exceeds Azure Storage limits for a ' - 'single block blob'.format(nchunks, localfile)) - chunktoadd = min(chunksizebytes, filesize) - currfileoffset = 0 - nstorageops = 0 - flock = threading.Lock() - filedesc = None - if addfd: - with flock: - filedesc = open(localfile, 'rb') - symkey = None - signkey = None - ivmap = None - for i in xrange(nchunks + 1): - if chunktoadd + currfileoffset > filesize: - chunktoadd = filesize - currfileoffset - blockid = '{0:08d}'.format(currfileoffset // chunksizebytes) - # generate the ivmap for the first block - if (not as_page_blob(args, localfile) and - (args.rsaprivatekey is not None or - args.rsapublickey is not None) and currfileoffset == 0): - # generate sym/signing keys - symkey, signkey = generate_aes256_keys() - if args.encmode == _ENCRYPTION_MODE_FULLBLOB: - ivmap = { - i: os.urandom(_AES256_BLOCKSIZE_BYTES), - 'hmac': hmac.new(signkey, digestmod=hashlib.sha256), - } - else: - ivmap = {} - ivmap['md5'] = hashlib.md5() - ivmap['filesize'] = 0 - blockids[localfile].append(blockid) - encparam = [symkey, signkey, ivmap, False] - xferspec = (localfile, args.container, - encode_blobname(args, remoteresource), blockid, - currfileoffset, chunktoadd, encparam, flock, filedesc) - currfileoffset = currfileoffset + chunktoadd - nstorageops = nstorageops + 1 - storage_in_queue.put(PqTupleSort((i, xferspec))) - if currfileoffset >= filesize: - encparam[3] = True - break - return filesize, nstorageops, md5digest, filedesc - - -def apply_file_collation_and_strip(args, fname): - """Apply collation path or component strip to a remote filename - Parameters: - args - arguments - fname - file name - Returns: - remote filename - Raises: - No special exception handling - """ - remotefname = fname.strip(os.path.sep) - if args.collate is not None: - remotefname = remotefname.split(os.path.sep)[-1] - if args.collate != '.': - remotefname = os.path.sep.join((args.collate, remotefname)) - elif args.stripcomponents > 0: - rtmp = remotefname.split(os.path.sep) - nsc = min((len(rtmp) - 1, args.stripcomponents)) - if nsc > 0: - remotefname = os.path.sep.join(rtmp[nsc:]) - return remotefname - - -def main(): - """Main function - Parameters: - None - Returns: - Nothing - Raises: - ValueError for invalid arguments - """ - # get command-line args - args = parseargs() - - # populate args from env vars - if args.storageaccountkey is None: - args.storageaccountkey = os.getenv(_ENVVAR_STORAGEACCOUNTKEY) - if args.saskey is None: - args.saskey = os.getenv(_ENVVAR_SASKEY) - if args.rsakeypassphrase is None: - args.rsakeypassphrase = os.getenv(_ENVVAR_RSAKEYPASSPHRASE) - - # check some parameters - if (len(args.localresource) < 1 or len(args.storageaccount) < 1 or - len(args.container) < 1): - raise ValueError('invalid positional arguments') - if len(args.endpoint) < 1: - raise ValueError('storage endpoint is invalid') - if args.upload and args.download: - raise ValueError( - 'cannot specify both download and upload transfer direction ' - 'within the same invocation') - if args.subscriptionid is not None and args.managementcert is None: - raise ValueError( - 'cannot specify subscription id without a management cert') - if args.subscriptionid is None and args.managementcert is not None: - raise ValueError( - 'cannot specify a management cert without a subscription id') - if args.storageaccountkey is not None and args.saskey is not None: - raise ValueError('cannot use both a sas key and storage account key') - if args.pageblob and args.fileshare: - raise ValueError( - 'cannot specify both page blob and file share destinations') - if args.autovhd and args.fileshare: - raise ValueError( - 'cannot specify both autovhd and file share destination') - if args.pageblob and args.autovhd: - raise ValueError('cannot specify both pageblob and autovhd parameters') - if args.collate is not None and args.stripcomponents is not None: - raise ValueError( - 'cannot specify collate and non-default component ' - 'strip: {}'.format(args.stripcomponents)) - if args.stripcomponents is None: - args.stripcomponents = 1 - if args.stripcomponents < 0: - raise ValueError('invalid component strip number: {}'.format( - args.stripcomponents)) - if args.rsaprivatekey is not None and args.rsapublickey is not None: - raise ValueError('cannot specify both RSA private and public keys') - if args.rsapublickey is not None and args.rsakeypassphrase is not None: - raise ValueError('cannot specify an RSA public key and passphrase') - if args.timeout is not None and args.timeout <= 0: - args.timeout = None - - # get key if we don't have a handle on one - sms = None - if args.saskey is not None: - if len(args.saskey) < 1: - raise ValueError('invalid sas key specified') - elif args.storageaccountkey is None: - if (args.managementcert is not None and - args.subscriptionid is not None): - # check to ensure management cert is valid - if len(args.managementcert) == 0 or \ - args.managementcert.split('.')[-1].lower() != 'pem': - raise ValueError('management cert appears to be invalid') - if args.managementep is None or len(args.managementep) == 0: - raise ValueError('management endpoint is invalid') - # expand management cert path out if contains ~ - args.managementcert = os.path.abspath(args.managementcert) - # get sms reference - sms = azure.servicemanagement.ServiceManagementService( - args.subscriptionid, args.managementcert, args.managementep) - # get keys - service_keys = azure_request( - sms.get_storage_account_keys, timeout=args.timeout, - service_name=args.storageaccount) - args.storageaccountkey = service_keys.storage_service_keys.primary - else: - raise ValueError('could not determine authentication to use') - - # check storage account key validity - if args.storageaccountkey is not None and \ - len(args.storageaccountkey) < 1: - raise ValueError('storage account key is invalid') - - # set valid num workers - if args.numworkers < 1: - args.numworkers = 1 - if (args.fileshare and - args.numworkers == _DEFAULT_MAX_STORAGEACCOUNT_WORKERS): - args.numworkers //= 2 - - # expand any paths - args.localresource = os.path.expanduser(args.localresource) - - # sanitize remote file name - if args.remoteresource: - args.remoteresource = args.remoteresource.strip(os.path.sep) - - # set chunk size - if (args.chunksizebytes is None or args.chunksizebytes < 64 or - args.chunksizebytes > _MAX_BLOB_CHUNK_SIZE_BYTES): - args.chunksizebytes = _MAX_BLOB_CHUNK_SIZE_BYTES - - # set storage ep - endpoint = None - if sms: - storage_acct = azure_request( - sms.get_storage_account_properties, timeout=args.timeout, - service_name=args.storageaccount) - if args.fileshare: - endpoint = storage_acct.storage_service_properties.endpoints[3] - else: - endpoint = storage_acct.storage_service_properties.endpoints[0] - else: - if args.fileshare: - endpoint = 'https://{}.file.{}/'.format( - args.storageaccount, args.endpoint) - else: - endpoint = 'https://{}.blob.{}/'.format( - args.storageaccount, args.endpoint) - - # create master block blob, page blob and file service - blob_service = None - if args.storageaccountkey: - if args.endpoint[0] == '.': - args.endpoint = args.endpoint[1:] - block_blob_service = azure.storage.blob.BlockBlobService( - account_name=args.storageaccount, - account_key=args.storageaccountkey, - endpoint_suffix=args.endpoint) - page_blob_service = azure.storage.blob.PageBlobService( - account_name=args.storageaccount, - account_key=args.storageaccountkey, - endpoint_suffix=args.endpoint) - file_service = azure.storage.file.FileService( - account_name=args.storageaccount, - account_key=args.storageaccountkey, - endpoint_suffix=args.endpoint) - blob_service = (block_blob_service, page_blob_service) - elif args.saskey: - _bs = SasBlobService(endpoint, args.saskey, args.timeout) - blob_service = (_bs, _bs) - # normalize sas key for python sdk - if args.saskey[0] == '?': - args.saskey = args.saskey[1:] - file_service = azure.storage.file.FileService( - account_name=args.storageaccount, - sas_token=args.saskey, - endpoint_suffix=args.endpoint) - # disable container/share creation if SAS is not account-level and - # does not contain a signed resource type with container-level access - if args.createcontainer: - args.createcontainer = False - sasparts = args.saskey.split('&') - for part in sasparts: - tmp = part.split('=') - if tmp[0] == 'srt': - if 'c' in tmp[1]: - args.createcontainer = True - break - del sasparts - if blob_service is None: - raise ValueError('blob_service is invalid') - if args.fileshare and file_service is None: - raise ValueError('file_service is invalid') - - # check which way we're transfering - xfertoazure = False - if (args.upload or - (not args.download and os.path.exists(args.localresource))): - xfertoazure = True - else: - if args.remoteresource is None: - raise ValueError('cannot download remote file if not specified') - - # import rsa key - if args.rsaprivatekey is not None: - rsakeyfile = args.rsaprivatekey - elif args.rsapublickey is not None: - rsakeyfile = args.rsapublickey - else: - rsakeyfile = None - if rsakeyfile is not None: - # check for conflicting options - if args.pageblob: - raise ValueError( - 'cannot operate in page blob mode with encryption enabled') - # check for supported encryption modes - if (args.encmode != _ENCRYPTION_MODE_FULLBLOB and - args.encmode != _ENCRYPTION_MODE_CHUNKEDBLOB): - raise RuntimeError( - 'Unknown encryption mode: {}'.format(args.encmode)) - # only allow full blob encryption mode for now due to - # possible compatibility issues - if args.encmode == _ENCRYPTION_MODE_CHUNKEDBLOB: - raise RuntimeError( - '{} encryption mode not allowed'.format(args.encmode)) - with open(rsakeyfile, 'rb') as keyfile: - if args.rsaprivatekey is not None: - args.rsaprivatekey = cryptography.hazmat.primitives.\ - serialization.load_pem_private_key( - keyfile.read(), args.rsakeypassphrase, - backend=cryptography.hazmat.backends.default_backend()) - else: - args.rsapublickey = cryptography.hazmat.primitives.\ - serialization.load_pem_public_key( - keyfile.read(), - backend=cryptography.hazmat.backends.default_backend()) - if args.rsaprivatekey is None and not xfertoazure: - raise ValueError('imported RSA key does not have a private key') - # adjust chunk size for padding for chunked mode - if xfertoazure: - if args.encmode == _ENCRYPTION_MODE_CHUNKEDBLOB: - args.chunksizebytes -= _AES256CBC_HMACSHA256_OVERHEAD_BYTES + 1 - elif args.encmode == _ENCRYPTION_MODE_FULLBLOB: - nchunks = args.chunksizebytes // \ - _AES256CBC_HMACSHA256_OVERHEAD_BYTES - args.chunksizebytes = (nchunks - 1) * \ - _AES256CBC_HMACSHA256_OVERHEAD_BYTES - del nchunks - # ensure chunk size is greater than overhead - if args.chunksizebytes <= ( - _AES256CBC_HMACSHA256_OVERHEAD_BYTES + 1) << 1: - raise ValueError('chunksizebytes {} <= encryption min {}'.format( - args.chunksizebytes, - (_AES256CBC_HMACSHA256_OVERHEAD_BYTES + 1) << 1)) - - # disable urllib3 warnings if specified - if args.disableurllibwarnings: - print('!!! WARNING: DISABLING URLLIB3 WARNINGS !!!') - requests.packages.urllib3.disable_warnings( - requests.packages.urllib3.exceptions.InsecurePlatformWarning) - requests.packages.urllib3.disable_warnings( - requests.packages.urllib3.exceptions.SNIMissingWarning) - - # collect package versions - packages = ['az.common=' + azure.common.__version__] - try: - packages.append('az.sml=' + azure.servicemanagement.__version__) - except Exception: - pass - try: - packages.append('az.stor=' + azure.storage.__version__) - except Exception: - pass - try: - packages.append('crypt=' + cryptography.__version__) - except Exception: - pass - packages.append( - 'req=' + requests.__version__) - - # print all parameters - print('=====================================') - print(' azure blobxfer parameters [v{}]'.format(_SCRIPT_VERSION)) - print('=====================================') - print(' platform: {}'.format(platform.platform())) - print(' python interpreter: {} {}'.format( - platform.python_implementation(), platform.python_version())) - print(' package versions: {}'.format(' '.join(packages))) - del packages - print(' subscription id: {}'.format(args.subscriptionid)) - print(' management cert: {}'.format(args.managementcert)) - print(' transfer direction: {}'.format( - 'local->Azure' if xfertoazure else 'Azure->local')) - print(' local resource: {}'.format(args.localresource)) - print(' include pattern: {}'.format(args.include)) - print(' remote resource: {}'.format(args.remoteresource)) - print(' max num of workers: {}'.format(args.numworkers)) - print(' timeout: {}'.format(args.timeout)) - print(' storage account: {}'.format(args.storageaccount)) - print(' use SAS: {}'.format(True if args.saskey else False)) - print(' upload as page blob: {}'.format(args.pageblob)) - print(' auto vhd->page blob: {}'.format(args.autovhd)) - print(' upload to file share: {}'.format(args.fileshare)) - print(' container/share name: {}'.format(args.container)) - print(' container/share URI: {}'.format(endpoint + args.container)) - print(' compute block MD5: {}'.format(args.computeblockmd5)) - print(' compute file MD5: {}'.format(args.computefilemd5)) - print(' skip on MD5 match: {}'.format(args.skiponmatch)) - print(' chunk size (bytes): {}'.format(args.chunksizebytes)) - print(' create container: {}'.format(args.createcontainer)) - print(' keep mismatched MD5: {}'.format(args.keepmismatchedmd5files)) - print(' recursive if dir: {}'.format(args.recursive)) - print('component strip on up: {}'.format(args.stripcomponents)) - print(' remote delete: {}'.format(args.delete)) - print(' collate to: {}'.format(args.collate or 'disabled')) - print(' local overwrite: {}'.format(args.overwrite)) - print(' encryption mode: {}'.format( - (args.encmode or 'disabled' if xfertoazure else 'file dependent') - if args.rsaprivatekey is not None or args.rsapublickey is not None - else 'disabled')) - print(' RSA key file: {}'.format(rsakeyfile or 'disabled')) - print(' RSA key type: {}'.format( - 'private' if args.rsaprivatekey is not None else 'public' - if args.rsapublickey is not None else 'disabled')) - print('=======================================\n') - - # mark start time after init - print('script start time: {}'.format(time.strftime("%Y-%m-%d %H:%M:%S"))) - start = time.time() - - # populate instruction queues - allfilesize = 0 - storage_in_queue = queue.PriorityQueue() - nstorageops = 0 - blockids = {} - completed_blockids = {} - filemap = {} - filesizes = {} - delblobs = None - md5map = {} - filedesc = None - if xfertoazure: - # if skiponmatch is enabled, list blobs first and check - if args.skiponmatch and not args.fileshare: - blobskipdict = get_blob_listing(blob_service[0], args) - else: - blobskipdict = {} - if os.path.isdir(args.localresource): - if args.remoteresource is not None: - print('WARNING: ignorning specified remoteresource {} for ' - 'directory upload'.format(args.remoteresource)) - _remotefiles = set() - # mirror directory - if args.recursive: - for root, _, files in os.walk(args.localresource): - for dirfile in files: - fname = os.path.join(root, dirfile) - if args.include is not None and not fnmatch.fnmatch( - fname, args.include): - continue - remotefname = apply_file_collation_and_strip( - args, fname) - _remotefiles.add(remotefname) - # manually pull file properties for file service - if args.fileshare and args.skiponmatch: - fsfile = get_fileshare_file_properties( - file_service, args, remotefname) - if fsfile is not None: - blobskipdict[fsfile[0]] = fsfile[1] - filesize, ops, md5digest, filedesc = \ - generate_xferspec_upload( - args, storage_in_queue, blobskipdict, - blockids, fname, remotefname, False) - if filesize is not None: - completed_blockids[fname] = 0 - md5map[fname] = md5digest - filemap[fname] = encode_blobname(args, remotefname) - filesizes[fname] = filesize - allfilesize = allfilesize + filesize - nstorageops = nstorageops + ops - else: - # copy just directory contents, non-recursively - for lfile in os.listdir(args.localresource): - fname = os.path.join(args.localresource, lfile) - if os.path.isdir(fname) or ( - args.include is not None and not fnmatch.fnmatch( - fname, args.include)): - continue - remotefname = apply_file_collation_and_strip(args, fname) - _remotefiles.add(remotefname) - # manually pull file properties for file service - if args.fileshare and args.skiponmatch: - fsfile = get_fileshare_file_properties( - file_service, args, remotefname) - if fsfile is not None: - blobskipdict[fsfile[0]] = fsfile[1] - filesize, ops, md5digest, filedesc = \ - generate_xferspec_upload( - args, storage_in_queue, blobskipdict, - blockids, fname, remotefname, False) - if filesize is not None: - completed_blockids[fname] = 0 - md5map[fname] = md5digest - filemap[fname] = encode_blobname(args, remotefname) - filesizes[fname] = filesize - allfilesize = allfilesize + filesize - nstorageops = nstorageops + ops - # fill deletion list - if args.delete: - # get blob skip dict if it hasn't been populated - if len(blobskipdict) == 0: - if args.fileshare: - blobskipdict = get_fileshare_listing( - file_service, args) - else: - blobskipdict = get_blob_listing( - blob_service[0], args, metadata=False) - delblobs = [x for x in blobskipdict if x not in _remotefiles] - del _remotefiles - else: - # upload single file - if args.remoteresource is None: - args.remoteresource = args.localresource - else: - if args.stripcomponents > 0: - args.stripcomponents -= 1 - args.remoteresource = apply_file_collation_and_strip( - args, args.remoteresource) - # manually pull file properties for file service - if args.fileshare and args.skiponmatch: - fsfile = get_fileshare_file_properties( - file_service, args, args.remoteresource) - if fsfile is not None: - blobskipdict[fsfile[0]] = fsfile[1] - filesize, nstorageops, md5digest, filedesc = \ - generate_xferspec_upload( - args, storage_in_queue, blobskipdict, blockids, - args.localresource, args.remoteresource, True) - if filesize is not None: - completed_blockids[args.localresource] = 0 - md5map[args.localresource] = md5digest - filemap[args.localresource] = encode_blobname( - args, args.remoteresource) - filesizes[args.localresource] = filesize - allfilesize = allfilesize + filesize - del blobskipdict - # create container/file share if needed - if args.createcontainer: - if args.fileshare: - print('creating file share, if needed: {}'.format( - args.container)) - try: - azure_request( - file_service.create_share, share_name=args.container, - fail_on_exist=False, timeout=args.timeout) - except azure.common.AzureConflictHttpError: - pass - else: - print('creating container, if needed: {}'.format( - args.container)) - try: - azure_request( - blob_service[0].create_container, timeout=args.timeout, - container_name=args.container, fail_on_exist=False) - except azure.common.AzureConflictHttpError: - pass - # initialize page blobs or file share files - if len(filemap) > 0: - if args.pageblob or args.autovhd: - print('initializing page blobs') - for key in filemap: - if as_page_blob(args, key): - blob_service[1].create_blob( - container_name=args.container, - blob_name=filemap[key], - content_length=page_align_content_length( - filesizes[key]), content_settings=None) - elif args.fileshare: - print('initializing files on fileshare') - dirscreated = set() - for key in filemap: - fsfile = split_fileshare_path_into_parts(filemap[key]) - if args.rsaprivatekey or args.rsapublickey: - fspad = _AES256_BLOCKSIZE_BYTES - else: - fspad = 0 - # try to create the file first, if preconditon failure - # then try creating the parent directory - try: - file_service.create_file( - share_name=args.container, - directory_name=fsfile[0], file_name=fsfile[1], - content_length=filesizes[key] + fspad, - content_settings=None, timeout=args.timeout) - except azure.common.AzureMissingResourceHttpError as exc: - create_all_parent_directories_fileshare( - file_service, args, fsfile, dirscreated) - file_service.create_file( - share_name=args.container, - directory_name=fsfile[0], file_name=fsfile[1], - content_length=filesizes[key] + fspad, - content_settings=None, timeout=args.timeout) - del dirscreated - else: - if args.remoteresource == '.': - print('attempting to copy entire {} {} to {}'.format( - 'file share' if args.fileshare else 'container', - args.container, args.localresource)) - if args.fileshare: - blobdict = get_fileshare_listing(file_service, args) - else: - blobdict = get_blob_listing(blob_service[0], args) - else: - if args.fileshare: - fsfile = get_fileshare_file_properties( - file_service, args, args.remoteresource) - if fsfile is None: - raise RuntimeError('file {} not found on share {}'.format( - args.remoteresource, args.container)) - blobdict = {args.remoteresource: fsfile[1]} - else: - blobdict = {args.remoteresource: [None, None, None]} - if len(blobdict) > 0: - print('generating local directory structure and ' - 'pre-allocating space') - # make the localresource directory - created_dirs = set() - create_dir_ifnotexists(args.localresource) - created_dirs.add(args.localresource) - # generate xferspec for all blobs - for blob in blobdict: - # filter results - if args.include is not None and not fnmatch.fnmatch( - blob, args.include): - continue - if args.collate is not None: - localfile = os.path.join( - args.localresource, args.collate, blob) - else: - localfile = os.path.join(args.localresource, blob) - # create any subdirectories if required - localdir = os.path.dirname(localfile) - if localdir not in created_dirs: - create_dir_ifnotexists(localdir) - created_dirs.add(localdir) - # add instructions - filesize, ops, md5digest, filedesc = \ - generate_xferspec_download( - blob_service[0], file_service, args, storage_in_queue, - localfile, blob, False, blobdict[blob]) - if filesize is not None: - md5map[localfile] = md5digest - filemap[localfile] = localfile + '.blobtmp' - allfilesize = allfilesize + filesize - nstorageops = nstorageops + ops - if len(blobdict) > 0: - del created_dirs - del blobdict - - # delete any remote blobs if specified - if xfertoazure and delblobs is not None: - if args.fileshare: - print('deleting {} remote files'.format(len(delblobs))) - for blob in delblobs: - fsfile = split_fileshare_path_into_parts(blob) - azure_request( - file_service.delete_file, - share_name=args.container, directory_name=fsfile[0], - file_name=fsfile[1], timeout=args.timeout) - else: - print('deleting {} remote blobs'.format(len(delblobs))) - for blob in delblobs: - azure_request( - blob_service[0].delete_blob, timeout=args.timeout, - container_name=args.container, blob_name=blob) - print('deletion complete.') - - if nstorageops == 0: - print('detected no transfer actions needed to be taken, exiting...') - sys.exit(0) - - if xfertoazure: - # count number of empty files - emptyfiles = 0 - for fsize in filesizes.items(): - if fsize[1] == 0: - emptyfiles += 1 - print('detected {} empty files to upload'.format(emptyfiles)) - if args.fileshare: - print('performing {} put ranges and {} set file properties'.format( - nstorageops, len(blockids) - emptyfiles)) - progress_text = 'ranges' - elif args.pageblob: - print('performing {} put pages/blobs and {} set blob ' - 'properties'.format( - nstorageops, len(blockids) - emptyfiles)) - progress_text = 'pages' - elif args.autovhd: - print('performing {} mixed page/block operations with {} ' - 'finalizing operations'.format( - nstorageops, len(blockids) - emptyfiles)) - progress_text = 'chunks' - else: - print('performing {} put blocks/blobs and {} put block ' - 'lists'.format( - nstorageops, len(blockids) - emptyfiles)) - progress_text = 'blocks' - else: - print('performing {} range-gets'.format(nstorageops)) - progress_text = 'range-gets' - - # spawn workers - storage_out_queue = queue.Queue(nstorageops) - maxworkers = min((args.numworkers, nstorageops)) - print('spawning {} worker threads'.format(maxworkers)) - exc_list = [] - threads = [] - for _ in xrange(maxworkers): - thr = StorageChunkWorker( - exc_list, storage_in_queue, storage_out_queue, args, xfertoazure, - blob_service, file_service) - thr.start() - threads.append(thr) - - done_ops = 0 - hmacs = {} - storage_start = time.time() - progress_bar( - args.progressbar, 'xfer', progress_text, nstorageops, - done_ops, storage_start) - while True: - try: - localresource, encparam = storage_out_queue.get() - except KeyboardInterrupt: - print('\n\nKeyboardInterrupt detected, force terminating ' - 'threads (this may take a while)...') - for thr in threads: - thr.terminate = True - for thr in threads: - thr.join() - raise - if len(exc_list) > 0: - for exc in exc_list: - print(exc) - sys.exit(1) - if xfertoazure: - completed_blockids[localresource] = completed_blockids[ - localresource] + 1 - if completed_blockids[localresource] == len( - blockids[localresource]): - if as_page_blob(args, localresource): - if args.computefilemd5: - azure_request( - blob_service[1].set_blob_properties, - timeout=args.timeout, - container_name=args.container, - blob_name=filemap[localresource], - content_settings=azure.storage.blob. - ContentSettings(content_md5=md5map[localresource])) - elif args.fileshare: - fsfile = split_fileshare_path_into_parts( - filemap[localresource]) - # set file metadata for encrypted files - if filesizes[localresource] > 0 and ( - args.rsaprivatekey is not None or - args.rsapublickey is not None): - if args.encmode == _ENCRYPTION_MODE_FULLBLOB: - encmetadata = EncryptionMetadataJson( - args, encparam[0], encparam[1], - encparam[2][0], - encparam[2]['hmac'].digest(), - md5map[localresource] - ).construct_metadata_json() - else: - encmetadata = EncryptionMetadataJson( - args, encparam[0], encparam[1], None, - None, md5map[localresource] - ).construct_metadata_json() - azure_request( - file_service.set_file_metadata, - share_name=args.container, - directory_name=fsfile[0], file_name=fsfile[1], - metadata=encmetadata, - timeout=args.timeout) - # resize file to final encrypted size if required - if (filesizes[localresource] + - _AES256_BLOCKSIZE_BYTES != - encparam[2]['filesize']): - azure_request( - file_service.resize_file, - share_name=args.container, - directory_name=fsfile[0], file_name=fsfile[1], - content_length=encparam[2]['filesize'], - timeout=args.timeout) - if args.computefilemd5: - if (args.rsaprivatekey is not None or - args.rsapublickey is not None): - md5 = base64encode(encparam[2]['md5'].digest()) - else: - md5 = md5map[localresource] - azure_request( - file_service.set_file_properties, - share_name=args.container, - directory_name=fsfile[0], file_name=fsfile[1], - content_settings=azure.storage.file. - ContentSettings(content_md5=md5), - timeout=args.timeout) - else: - # only perform put block list on non-zero byte files - if filesizes[localresource] > 0: - if (args.rsaprivatekey is not None or - args.rsapublickey is not None): - md5 = base64encode(encparam[2]['md5'].digest()) - else: - md5 = md5map[localresource] - block_list = [] - for bid in blockids[localresource]: - block_list.append( - azure.storage.blob.BlobBlock(id=bid)) - azure_request( - blob_service[0].put_block_list, - timeout=args.timeout, - container_name=args.container, - blob_name=filemap[localresource], - block_list=block_list, - content_settings=azure.storage.blob. - ContentSettings( - content_type=get_mime_type(localresource), - content_md5=md5)) - # set blob metadata for encrypted blobs - if (args.rsaprivatekey is not None or - args.rsapublickey is not None): - if args.encmode == _ENCRYPTION_MODE_FULLBLOB: - encmetadata = EncryptionMetadataJson( - args, encparam[0], encparam[1], - encparam[2][0], - encparam[2]['hmac'].digest(), - md5map[localresource] - ).construct_metadata_json() - else: - encmetadata = EncryptionMetadataJson( - args, encparam[0], encparam[1], None, - None, md5map[localresource] - ).construct_metadata_json() - azure_request( - blob_service[0].set_blob_metadata, - timeout=args.timeout, - container_name=args.container, - blob_name=filemap[localresource], - metadata=encmetadata) - else: - if (args.rsaprivatekey is not None and - encparam[3] == _ENCRYPTION_MODE_FULLBLOB and - not as_page_blob(args, localresource) and - encparam[4]['hmac']['hmac'] is not None): - hmacs[localresource] = encparam[4]['hmac'] - done_ops += 1 - progress_bar( - args.progressbar, 'xfer', progress_text, nstorageops, - done_ops, storage_start) - if done_ops == nstorageops: - break - endtime = time.time() - if filedesc is not None: - filedesc.close() - progress_bar( - args.progressbar, 'xfer', progress_text, nstorageops, - done_ops, storage_start) - print('\n\n{} MiB transfered, elapsed {} sec. ' - 'Throughput = {} Mbit/sec\n'.format( - allfilesize / 1048576.0, endtime - storage_start, - (8.0 * allfilesize / 1048576.0) / (endtime - storage_start))) - - # finalize files/blobs - if not xfertoazure: - print( - 'performing finalization (if applicable): {}: {}, MD5: {}'.format( - _ENCRYPTION_AUTH_ALGORITHM, - args.rsaprivatekey is not None, args.computefilemd5)) - for localfile in filemap: - tmpfilename = filemap[localfile] - finalizefile = True - skipmd5 = False - # check hmac - if (args.rsaprivatekey is not None and - args.encmode == _ENCRYPTION_MODE_FULLBLOB): - if tmpfilename in hmacs: - hmacdict = hmacs[tmpfilename] - # process any remaining hmac data - while len(hmacdict['buffered']) > 0: - curr = hmacdict['curr'] - if curr in hmacdict['buffered']: - hmacdict['hmac'].update(hmacdict['buffered'][curr]) - hmacdict['buffered'].pop(curr) - hmacdict['curr'] = curr + 1 - else: - break - digest = base64encode(hmacdict['hmac'].digest()) - res = 'OK' - if digest != hmacdict['sig']: - res = 'MISMATCH' - finalizefile = False - else: - skipmd5 = True - print('[{}: {}, {}] {} {}'.format( - _ENCRYPTION_AUTH_ALGORITHM, res, localfile, - digest, hmacdict['sig'])) - # compare md5 hash - if args.computefilemd5 and not skipmd5: - lmd5 = compute_md5_for_file_asbase64(tmpfilename) - if md5map[localfile] is None: - print('[MD5: SKIPPED, {}] {} {}'.format( - localfile, lmd5, md5map[localfile])) - else: - if lmd5 != md5map[localfile]: - res = 'MISMATCH' - if not args.keepmismatchedmd5files: - finalizefile = False - else: - res = 'OK' - print('[MD5: {}, {}] {} {}'.format( - res, localfile, lmd5, md5map[localfile])) - if finalizefile: - # check for existing file first - if os.path.exists(localfile): - if args.overwrite: - os.remove(localfile) - else: - raise IOError( - 'cannot overwrite existing file: {}'.format( - localfile)) - # move tmp file to real file - os.rename(tmpfilename, localfile) - else: - os.remove(tmpfilename) - print('finalization complete.') - - # output final log lines - print('\nscript elapsed time: {} sec'.format(time.time() - start)) - print('script end time: {}'.format(time.strftime("%Y-%m-%d %H:%M:%S"))) - - -def progress_bar(display, sprefix, rtext, value, qsize, start): - """Display a progress bar - Parameters: - display - display bar - sprefix - progress prefix - rtext - rate text - value - value input value - qsize - queue size - start - start time - Returns: - Nothing - Raises: - Nothing - """ - if not display: - return - done = float(qsize) / value - diff = time.time() - start - if diff <= 0: - # arbitrarily give a small delta - diff = 1e-6 - rate = float(qsize) / (diff / 60) - sys.stdout.write( - '\r{0} progress: [{1:30s}] {2:.2f}% {3:10.2f} {4}/min '.format( - sprefix, '>' * int(done * 30), done * 100, rate, rtext)) - sys.stdout.flush() - - -def parseargs(): # pragma: no cover - """Sets up command-line arguments and parser - Parameters: - Nothing - Returns: - Parsed command line arguments - Raises: - Nothing - """ - parser = argparse.ArgumentParser( - description='Transfer files/blobs to/from Azure blob or file storage') - parser.set_defaults( - autovhd=False, endpoint=_DEFAULT_STORAGE_ENDPOINT, - chunksizebytes=_MAX_BLOB_CHUNK_SIZE_BYTES, collate=None, - computeblockmd5=False, computefilemd5=True, createcontainer=True, - delete=False, disableurllibwarnings=False, - encmode=_DEFAULT_ENCRYPTION_MODE, fileshare=False, include=None, - managementep=_DEFAULT_MANAGEMENT_ENDPOINT, - numworkers=_DEFAULT_MAX_STORAGEACCOUNT_WORKERS, overwrite=True, - pageblob=False, progressbar=True, recursive=True, rsaprivatekey=None, - rsapublickey=None, rsakeypassphrase=None, skiponmatch=True, - stripcomponents=None, timeout=None) - parser.add_argument('storageaccount', help='name of storage account') - parser.add_argument( - 'container', - help='name of blob container or file share') - parser.add_argument( - 'localresource', - help='name of the local file or directory, if mirroring. "."=use ' - 'current directory') - parser.add_argument( - '--autovhd', action='store_true', - help='automatically upload files ending in .vhd as page blobs') - parser.add_argument( - '--collate', nargs='?', - help='collate all files into a specified path') - parser.add_argument( - '--computeblockmd5', dest='computeblockmd5', action='store_true', - help='compute block/page level MD5 during upload') - parser.add_argument( - '--chunksizebytes', type=int, - help='maximum chunk size to transfer in bytes [{}]'.format( - _MAX_BLOB_CHUNK_SIZE_BYTES)) - parser.add_argument( - '--delete', action='store_true', - help='delete extraneous remote blobs that have no corresponding ' - 'local file when uploading directories') - parser.add_argument( - '--disable-urllib-warnings', action='store_true', - dest='disableurllibwarnings', - help='disable urllib warnings (not recommended)') - parser.add_argument( - '--download', action='store_true', - help='force transfer direction to download from Azure') - parser.add_argument( - '--encmode', - help='encryption mode [{}]'.format(_DEFAULT_ENCRYPTION_MODE)) - parser.add_argument( - '--endpoint', - help='storage endpoint [{}]'.format(_DEFAULT_STORAGE_ENDPOINT)) - parser.add_argument( - '--fileshare', action='store_true', - help='transfer to a file share rather than block/page blob') - parser.add_argument( - '--include', type=str, - help='include pattern (Unix shell-style wildcards)') - parser.add_argument( - '--keepmismatchedmd5files', action='store_true', - help='keep files with MD5 mismatches') - parser.add_argument( - '--managementcert', - help='path to management certificate .pem file') - parser.add_argument( - '--managementep', - help='management endpoint [{}]'.format(_DEFAULT_MANAGEMENT_ENDPOINT)) - parser.add_argument( - '--no-computefilemd5', dest='computefilemd5', action='store_false', - help='do not compute file MD5 and either upload as metadata ' - 'or validate on download') - parser.add_argument( - '--no-createcontainer', dest='createcontainer', action='store_false', - help='do not create container if it does not exist') - parser.add_argument( - '--no-overwrite', dest='overwrite', action='store_false', - help='do not overwrite local files on download') - parser.add_argument( - '--no-progressbar', dest='progressbar', action='store_false', - help='disable progress bar') - parser.add_argument( - '--no-recursive', dest='recursive', action='store_false', - help='do not mirror local directory recursively') - parser.add_argument( - '--no-skiponmatch', dest='skiponmatch', action='store_false', - help='do not skip upload/download on MD5 match') - parser.add_argument( - '--numworkers', type=int, - help='max number of workers [{}]'.format( - _DEFAULT_MAX_STORAGEACCOUNT_WORKERS)) - parser.add_argument( - '--pageblob', action='store_true', - help='upload as page blob rather than block blob, blobs will ' - 'be page-aligned in Azure storage') - parser.add_argument( - '--rsaprivatekey', - help='RSA private key file in PEM format. Specifying an RSA private ' - 'key will turn on decryption (or encryption). An RSA private key is ' - 'required for downloading and decrypting blobs and may be specified ' - 'for encrypting and uploading blobs.') - parser.add_argument( - '--rsapublickey', - help='RSA public key file in PEM format. Specifying an RSA public ' - 'key will turn on encryption. An RSA public key can only be used ' - 'for encrypting and uploading blobs.') - parser.add_argument( - '--rsakeypassphrase', - help='Optional passphrase for decrypting an RSA private key; can be ' - 'specified as {} environment variable instead'.format( - _ENVVAR_RSAKEYPASSPHRASE)) - parser.add_argument( - '--remoteresource', - help='name of remote resource on Azure storage. "."=container ' - 'copy recursive implied') - parser.add_argument( - '--saskey', - help='SAS key to use, if recursive upload or container download, ' - 'this must be a container SAS; can be specified as ' - '{} environment variable instead'.format(_ENVVAR_SASKEY)) - parser.add_argument( - '--storageaccountkey', - help='storage account shared key; can be specified as ' - '{} environment variable instead'.format(_ENVVAR_STORAGEACCOUNTKEY)) - parser.add_argument( - '--strip-components', dest='stripcomponents', type=int, - help='strip N leading components from path on upload [1]') - parser.add_argument('--subscriptionid', help='subscription id') - parser.add_argument( - '--timeout', type=float, - help='timeout in seconds for any operation to complete') - parser.add_argument( - '--upload', action='store_true', - help='force transfer direction to upload to Azure') - parser.add_argument('--version', action='version', version=_SCRIPT_VERSION) - return parser.parse_args() - - -if __name__ == '__main__': - main() diff --git a/blobxfer/__init__.py b/blobxfer/__init__.py new file mode 100644 index 0000000..157d59f --- /dev/null +++ b/blobxfer/__init__.py @@ -0,0 +1,25 @@ +# Copyright (c) Microsoft Corporation +# +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +from .version import __version__ # noqa diff --git a/blobxfer/util.py b/blobxfer/util.py new file mode 100644 index 0000000..bf3a9a8 --- /dev/null +++ b/blobxfer/util.py @@ -0,0 +1,213 @@ +# Copyright (c) Microsoft Corporation +# +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# compat imports +from __future__ import absolute_import, division, print_function +from builtins import ( # noqa + bytes, dict, int, list, object, range, str, ascii, chr, hex, input, + next, oct, open, pow, round, super, filter, map, zip +) +# stdlib imports +import base64 +import copy +import hashlib +import logging +import logging.handlers +import mimetypes +try: + from os import scandir as scandir +except ImportError: # noqa + from scandir import scandir as scandir +import sys +# non-stdlib imports +# local imports + +# global defines +_PY2 = sys.version_info.major == 2 +_PAGEBLOB_BOUNDARY = 512 + + +def on_python2(): + # type: (None) -> bool + """Execution on python2 + :rtype: bool + :return: if on Python2 + """ + return _PY2 + + +def setup_logger(logger): # noqa + # type: (logger) -> None + """Set up logger""" + logger.setLevel(logging.DEBUG) + handler = logging.StreamHandler() + formatter = logging.Formatter( + '%(asctime)sZ %(levelname)s %(name)s:%(funcName)s:%(lineno)d ' + '%(message)s') + handler.setFormatter(formatter) + logger.addHandler(handler) + + +def is_none_or_empty(obj): + # type: (any) -> bool + """Determine if object is None or empty + :type any obj: object + :rtype: bool + :return: if object is None or empty + """ + if obj is None or len(obj) == 0: + return True + return False + + +def is_not_empty(obj): + # type: (any) -> bool + """Determine if object is not None and is length is > 0 + :type any obj: object + :rtype: bool + :return: if object is not None and length is > 0 + """ + if obj is not None and len(obj) > 0: + return True + return False + + +def merge_dict(dict1, dict2): + # type: (dict, dict) -> dict + """Recursively merge dictionaries: dict2 on to dict1. This differs + from dict.update() in that values that are dicts are recursively merged. + Note that only dict value types are merged, not lists, etc. + + :param dict dict1: dictionary to merge to + :param dict dict2: dictionary to merge with + :rtype: dict + :return: merged dictionary + """ + if not isinstance(dict1, dict) or not isinstance(dict2, dict): + raise ValueError('dict1 or dict2 is not a dictionary') + result = copy.deepcopy(dict1) + for k, v in dict2.items(): + if k in result and isinstance(result[k], dict): + result[k] = merge_dict(result[k], v) + else: + result[k] = copy.deepcopy(v) + return result + + +def scantree(path): + # type: (str) -> os.DirEntry + """Recursively scan a directory tree + :param str path: path to scan + :rtype: DirEntry + :return: DirEntry via generator + """ + for entry in scandir(path): + if entry.is_dir(follow_symlinks=True): + # due to python2 compat, cannot use yield from here + for t in scantree(entry.path): + yield t + else: + yield entry + + +def get_mime_type(filename): + # type: (str) -> str + """Guess the type of a file based on its filename + :param str filename: filename to guess the content-type + :rtype: str + :rturn: string of form 'class/type' for MIME content-type header + """ + return (mimetypes.guess_type(filename)[0] or 'application/octet-stream') + + +def base64_encode_as_string(obj): # noqa + # type: (any) -> str + """Encode object to base64 + :param any obj: object to encode + :rtype: str + :return: base64 encoded string + """ + if _PY2: + return base64.b64encode(obj) + else: + return str(base64.b64encode(obj), 'ascii') + + +def base64_decode_string(string): + # type: (str) -> str + """Base64 decode a string + :param str string: string to decode + :rtype: str + :return: decoded string + """ + return base64.b64decode(string) + + +def compute_md5_for_file_asbase64(filename, pagealign=False, blocksize=65536): + # type: (str, bool, int) -> str + """Compute MD5 hash for file and encode as Base64 + :param str filename: file to compute MD5 for + :param bool pagealign: page align data + :param int blocksize: block size + :rtype: str + :return: MD5 for file encoded as Base64 + """ + hasher = hashlib.md5() + with open(filename, 'rb') as filedesc: + while True: + buf = filedesc.read(blocksize) + if not buf: + break + buflen = len(buf) + if pagealign and buflen < blocksize: + aligned = page_align_content_length(buflen) + if aligned != buflen: + buf = buf.ljust(aligned, b'\0') + hasher.update(buf) + return base64_encode_as_string(hasher.digest()) + + +def compute_md5_for_data_asbase64(data): + # type: (obj) -> str + """Compute MD5 hash for bits and encode as Base64 + :param any data: data to compute MD5 for + :rtype: str + :return: MD5 for data + """ + hasher = hashlib.md5() + hasher.update(data) + return base64_encode_as_string(hasher.digest()) + + +def page_align_content_length(length): + # type: (int) -> int + """Compute page boundary alignment + :param int length: content length + :rtype: int + :return: aligned byte boundary + """ + mod = length % _PAGEBLOB_BOUNDARY + if mod != 0: + return length + (_PAGEBLOB_BOUNDARY - mod) + return length diff --git a/blobxfer/version.py b/blobxfer/version.py new file mode 100644 index 0000000..9e8b65b --- /dev/null +++ b/blobxfer/version.py @@ -0,0 +1,25 @@ +# Copyright (c) Microsoft Corporation +# +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +__version__ = '1.0.0a1' diff --git a/setup.py b/setup.py index 7709e0e..83d5abb 100644 --- a/setup.py +++ b/setup.py @@ -1,41 +1,76 @@ +from codecs import open +import os import re try: from setuptools import setup except ImportError: from distutils.core import setup +import sys -with open('blobxfer.py', 'r') as fd: +if sys.argv[-1] == 'publish': + os.system('rm -rf blobxfer.egg-info/ build dist __pycache__/') + os.system('python setup.py sdist bdist_wheel') + os.unlink('README.rst') + sys.exit() +elif sys.argv[-1] == 'upload': + os.system('twine upload dist/*') + sys.exit() +elif sys.argv[-1] == 'sdist' or sys.argv[-1] == 'bdist_wheel': + import pypandoc + long_description = pypandoc.convert('README.md', 'rst') +else: + long_description = '' + +with open('blobxfer/version.py', 'r', 'utf-8') as fd: version = re.search( - r'^_SCRIPT_VERSION\s*=\s*[\'"]([^\'"]*)[\'"]', + r'^__version__\s*=\s*[\'"]([^\'"]*)[\'"]', fd.read(), re.MULTILINE).group(1) -with open('README.rst') as readme: - long_description = ''.join(readme).strip() +if not version: + raise RuntimeError('Cannot find version') + +packages = [ + 'blobxfer', + 'blobxfer.blob', + 'blobxfer.blob.block', + 'blobxfer_cli', +] + +install_requires = [ + 'azure-common==1.1.4', + 'azure-storage==0.33.0', + 'click==6.6', + 'cryptography>=1.7.1', + 'future==0.16.0', + 'ruamel.yaml==0.13.11', +] + +if sys.version_info < (3, 5): + install_requires.append('pathlib2') + install_requires.append('scandir') setup( name='blobxfer', version=version, author='Microsoft Corporation, Azure Batch and HPC Team', author_email='', - description='Azure storage transfer tool with AzCopy-like features', + description=( + 'Azure storage transfer tool and library with AzCopy-like features'), long_description=long_description, platforms='any', url='https://github.com/Azure/blobxfer', license='MIT', - py_modules=['blobxfer'], + packages=packages, + package_data={'blobxfer': ['LICENSE']}, + package_dir={'blobxfer': 'blobxfer', 'blobxfer_cli': 'cli'}, entry_points={ - 'console_scripts': 'blobxfer=blobxfer:main', + 'console_scripts': 'blobxfer=blobxfer_cli.cli:cli', }, - install_requires=[ - 'azure-common==1.1.4', - 'azure-storage==0.33.0', - 'azure-servicemanagement-legacy==0.20.5', - 'cryptography>=1.6', - 'requests==2.12.3' - ], + zip_safe=False, + install_requires=install_requires, tests_require=['pytest'], classifiers=[ - 'Development Status :: 4 - Beta', + 'Development Status :: 3 - Alpha', 'Environment :: Console', 'Intended Audience :: Developers', 'Intended Audience :: System Administrators', @@ -47,7 +82,8 @@ setup( 'Programming Language :: Python :: 3.3', 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', 'Topic :: Utilities', ], - keywords='azcopy azure storage blob files transfer copy smb', + keywords='azcopy azure storage blob files transfer copy smb cifs', ) diff --git a/test/test_blobxfer.py b/test/test_blobxfer.py deleted file mode 100644 index 28208af..0000000 --- a/test/test_blobxfer.py +++ /dev/null @@ -1,1436 +0,0 @@ -# coding=utf-8 -"""Tests for blobxfer""" - -# stdlib imports -import base64 -import copy -import errno -import json -import math -import os -try: - import queue -except ImportError: - import Queue as queue -import socket -import sys -import threading -import uuid -# non-stdlib imports -import azure.common -import azure.storage.blob -import cryptography.exceptions -import cryptography.hazmat.backends -import cryptography.hazmat.primitives.asymmetric.rsa -import cryptography.hazmat.primitives.serialization -from mock import (MagicMock, Mock, patch) -import pytest -import requests -import requests_mock -# module under test -sys.path.append('..') -import blobxfer # noqa - - -# global defines -_RSAKEY = cryptography.hazmat.primitives.asymmetric.rsa.generate_private_key( - public_exponent=65537, key_size=2048, - backend=cryptography.hazmat.backends.default_backend()) - - -def test_encrypt_decrypt_chunk(): - enckey, signkey = blobxfer.generate_aes256_keys() - assert len(enckey) == blobxfer._AES256_KEYLENGTH_BYTES - assert len(signkey) == blobxfer._AES256_KEYLENGTH_BYTES - - # test random binary data, unaligned - iv = os.urandom(16) - plaindata = os.urandom(31) - encdata = blobxfer.encrypt_chunk( - enckey, signkey, plaindata, blobxfer._ENCRYPTION_MODE_CHUNKEDBLOB, - pad=True) - assert encdata != plaindata - decdata = blobxfer.decrypt_chunk( - enckey, signkey, encdata, blobxfer._ENCRYPTION_MODE_CHUNKEDBLOB, - unpad=True) - assert decdata == plaindata - with pytest.raises(RuntimeError): - badsig = base64.b64encode(b'0') - blobxfer.decrypt_chunk( - enckey, badsig, encdata, blobxfer._ENCRYPTION_MODE_CHUNKEDBLOB, - unpad=True) - - encdata = blobxfer.encrypt_chunk( - enckey, signkey, plaindata, blobxfer._ENCRYPTION_MODE_FULLBLOB, - iv=iv, pad=True) - decdata = blobxfer.decrypt_chunk( - enckey, signkey, encdata, blobxfer._ENCRYPTION_MODE_FULLBLOB, - iv=iv, unpad=True) - assert decdata == plaindata - - # test random binary data aligned on boundary - plaindata = os.urandom(32) - encdata = blobxfer.encrypt_chunk( - enckey, signkey, plaindata, blobxfer._ENCRYPTION_MODE_FULLBLOB, - iv=iv, pad=True) - assert encdata != plaindata - decdata = blobxfer.decrypt_chunk( - enckey, signkey, encdata, blobxfer._ENCRYPTION_MODE_FULLBLOB, - iv=iv, unpad=True) - assert decdata == plaindata - - # test text data - plaindata = b'attack at dawn!' - encdata = blobxfer.encrypt_chunk( - enckey, signkey, plaindata, blobxfer._ENCRYPTION_MODE_FULLBLOB, - iv, pad=True) - assert encdata != plaindata - decdata = blobxfer.decrypt_chunk( - enckey, signkey, encdata, blobxfer._ENCRYPTION_MODE_FULLBLOB, - iv, unpad=True) - assert decdata == plaindata - - -def test_rsa_keys(): - symkey = os.urandom(32) - enckey, sig = blobxfer.rsa_encrypt_key( - _RSAKEY, None, symkey, asbase64=False) - assert enckey is not None - assert sig is not None - plainkey = blobxfer.rsa_decrypt_key(_RSAKEY, enckey, sig, isbase64=False) - assert symkey == plainkey - - with pytest.raises(cryptography.exceptions.InvalidSignature): - badsig = base64.b64encode(b'0') - blobxfer.rsa_decrypt_key(_RSAKEY, enckey, badsig, isbase64=False) - - enckey, sig = blobxfer.rsa_encrypt_key( - _RSAKEY, None, symkey, asbase64=True) - assert enckey is not None - assert sig is not None - plainkey = blobxfer.rsa_decrypt_key(_RSAKEY, enckey, sig, isbase64=True) - assert symkey == plainkey - - with pytest.raises(cryptography.exceptions.InvalidSignature): - badsig = base64.b64encode(b'0') - blobxfer.rsa_decrypt_key(_RSAKEY, enckey, badsig, isbase64=True) - - -def test_compute_md5(tmpdir): - lpath = str(tmpdir.join('test.tmp')) - testdata = str(uuid.uuid4()) - with open(lpath, 'wt') as f: - f.write(testdata) - md5_file = blobxfer.compute_md5_for_file_asbase64(lpath) - md5_data = blobxfer.compute_md5_for_data_asbase64(testdata.encode('utf8')) - assert md5_file == md5_data - - # test non-existent file - with pytest.raises(IOError): - blobxfer.compute_md5_for_file_asbase64(testdata) - - -def test_page_align_content_length(): - assert 0 == blobxfer.page_align_content_length(0) - assert 512 == blobxfer.page_align_content_length(511) - assert 512 == blobxfer.page_align_content_length(512) - assert 1024 == blobxfer.page_align_content_length(513) - - -def _func_successful_requests_call(timeout=None): - response = MagicMock() - response.raise_for_status = lambda: None - return response - - -def _func_raise_requests_exception_once(val, timeout=None): - if len(val) > 0: - response = MagicMock() - response.raise_for_status = lambda: None - return response - val.append(0) - ex = requests.Timeout() - raise ex - - -def _func_raise_requests_connection_error_once(val, timeout=None): - if len(val) > 0: - response = MagicMock() - response.raise_for_status = lambda: None - return response - val.append(0) - ex = requests.ConnectionError( - requests.packages.urllib3.exceptions.ProtocolError( - 'Connection aborted.', - socket.error(errno.ECONNRESET, 'Connection reset by peer'))) - raise ex - - -def _func_raise_requests_chunked_encoding_error_once(val, timeout=None): - if len(val) > 0: - response = MagicMock() - response.raise_for_status = lambda: None - return response - val.append(0) - ex = requests.exceptions.ChunkedEncodingError( - requests.packages.urllib3.exceptions.ProtocolError( - 'Connection aborted.', - socket.error(errno.ECONNRESET, 'Connection reset by peer'))) - raise ex - - -def _func_raise_azurehttperror_once(val, timeout=None): - if len(val) > 0: - response = MagicMock() - return response - val.append(0) - ex = azure.common.AzureHttpError('ServerBusy', 503) - raise ex - - -@patch('time.sleep', return_value=None) -def test_azure_request(patched_time_sleep): - azcomerr = azure.common.AzureHttpError('ServerBusy', 503) - - with pytest.raises(IOError): - mock = Mock(side_effect=azcomerr) - mock.__name__ = 'name' - blobxfer.azure_request(mock, timeout=0.001) - - with pytest.raises(Exception): - ex = Exception() - ex.message = 'Uncaught' - blobxfer.azure_request(Mock(side_effect=ex)) - - with pytest.raises(Exception): - ex = Exception() - ex.__delattr__('message') - blobxfer.azure_request(Mock(side_effect=ex)) - - blobxfer.azure_request( - _func_raise_requests_connection_error_once, val=[], timeout=1) - - blobxfer.azure_request( - _func_raise_requests_chunked_encoding_error_once, val=[], timeout=1) - - blobxfer.azure_request( - _func_raise_azurehttperror_once, val=[], timeout=1) - - with pytest.raises(requests.HTTPError): - exc = requests.HTTPError() - exc.response = MagicMock() - exc.response.status_code = 404 - mock = Mock(side_effect=exc) - blobxfer.azure_request(mock) - - try: - blobxfer.azure_request( - _func_raise_requests_exception_once, val=[], timeout=1) - except Exception: - pytest.fail('unexpected Exception raised') - - try: - blobxfer.azure_request(_func_successful_requests_call) - except Exception: - pytest.fail('unexpected Exception raised') - - -def test_sasblobservice_listblobs(): - session = requests.Session() - adapter = requests_mock.Adapter() - session.mount('mock', adapter) - content = b'string-value' + \ - b'string-valueint-value' + \ - b'string-valueblob-name' + \ - b'date-time-value' + \ - b'date-time-valueetag' + \ - b'2147483648' + \ - b'blob-content-type' + \ - b'abc' + \ - b'sequence-number' + \ - b'BlockBlob' + \ - b'locked|unlocked' + \ - b'available | leased | expired | breaking | broken' + \ - b'infinite | fixedid' + \ - b'pending | success | aborted | failed' + \ - b'source url' + \ - b'bytes copied/bytes total' + \ - b'datetime' + \ - b'error string' + \ - b'value' + \ - b'blob-prefixnm' + \ - b'' - - with requests_mock.mock() as m: - m.get('mock://blobepcontainer?saskey', content=content) - sbs = blobxfer.SasBlobService('mock://blobep', 'saskey', None) - metamock = MagicMock() - metamock.metadata = True - result = sbs.list_blobs('container', 'marker', include=metamock) - assert len(result) == 1 - assert result[0].name == 'blob-name' - assert result[0].properties.content_length == 2147483648 - assert result[0].properties.content_settings.content_md5 == 'abc' - assert result[0].properties.blobtype == 'BlockBlob' - assert result[0].metadata['Name'] == 'value' - assert result.next_marker == 'nm' - - m.get('mock://blobepcontainer?saskey', content=b'', status_code=201) - sbs = blobxfer.SasBlobService('mock://blobep', 'saskey', None) - with pytest.raises(IOError): - sbs.list_blobs('container', 'marker') - - -def test_sasblobservice_setblobmetadata(): - session = requests.Session() - adapter = requests_mock.Adapter() - session.mount('mock', adapter) - - with requests_mock.mock() as m: - m.put('mock://blobepcontainer/blob?saskey') - sbs = blobxfer.SasBlobService('mock://blobep', 'saskey', None) - sbs.set_blob_metadata('container', 'blob', None) - sbs.set_blob_metadata('container', 'blob', {'name': 'value'}) - - m.put('mock://blobepcontainer/blob?saskey', status_code=201) - with pytest.raises(IOError): - sbs.set_blob_metadata('container', 'blob', {'name': 'value'}) - - -def test_sasblobservice_getblob(): - session = requests.Session() - adapter = requests_mock.Adapter() - session.mount('mock', adapter) - - with requests_mock.mock() as m: - m.get('mock://blobepcontainer/blob?saskey', content=b'data') - sbs = blobxfer.SasBlobService('mock://blobep', 'saskey', None) - results = sbs._get_blob('container', 'blob', 0, 1) - assert results.content == b'data' - - m.get('mock://blobepcontainer/blob?saskey', status_code=201) - sbs = blobxfer.SasBlobService('mock://blobep', 'saskey', None) - with pytest.raises(IOError): - sbs._get_blob('container', 'blob', 0, 1) - - -def test_sasblobservice_getblobproperties(): - session = requests.Session() - adapter = requests_mock.Adapter() - session.mount('mock', adapter) - - with requests_mock.mock() as m: - m.head('mock://blobepcontainer/blob?saskey', - headers={'x-ms-meta-hello': 'world', 'content-length': '1'}) - sbs = blobxfer.SasBlobService('mock://blobep', '?saskey', None) - results = sbs.get_blob_properties('container', 'blob') - assert results.metadata['hello'] == 'world' - - m.head('mock://blobepcontainer/blob?saskey', text='', status_code=201) - sbs = blobxfer.SasBlobService('mock://blobep', 'saskey', None) - with pytest.raises(IOError): - sbs.get_blob_properties('container', 'blob') - - -def test_sasblobservice_putblock(): - session = requests.Session() - adapter = requests_mock.Adapter() - session.mount('mock', adapter) - - with requests_mock.mock() as m: - m.put('mock://blobepcontainer/blob?saskey', status_code=201) - sbs = blobxfer.SasBlobService('mock://blobep', '?saskey', None) - try: - sbs.put_block( - 'container', 'blob', 'block', 'blockid', - validate_content=False) - except Exception: - pytest.fail('unexpected Exception raised') - - m.put('mock://blobepcontainer/blob?saskey', text='', status_code=200) - sbs = blobxfer.SasBlobService('mock://blobep', 'saskey', None) - with pytest.raises(IOError): - sbs.put_block( - 'container', 'blob', 'block', 'blockid', - validate_content=False) - - -def test_sasblobservice_putblocklist(): - session = requests.Session() - adapter = requests_mock.Adapter() - session.mount('mock', adapter) - - with requests_mock.mock() as m: - m.put('mock://blobepcontainer/blob?saskey', status_code=201) - sbs = blobxfer.SasBlobService('mock://blobep', 'saskey', None) - block_list = [ - azure.storage.blob.BlobBlock(id='1'), - azure.storage.blob.BlobBlock(id='2') - ] - cs = azure.storage.blob.ContentSettings(content_md5='md5') - sbs.put_block_list('container', 'blob', block_list, cs) - - m.put('mock://blobepcontainer/blob?saskey', text='', status_code=200) - sbs = blobxfer.SasBlobService('mock://blobep', 'saskey', None) - with pytest.raises(IOError): - sbs.put_block_list('container', 'blob', block_list, cs) - - -def test_sasblobservice_setblobproperties(): - session = requests.Session() - adapter = requests_mock.Adapter() - session.mount('mock', adapter) - - with requests_mock.mock() as m: - m.put('mock://blobepcontainer/blob?saskey', status_code=200) - sbs = blobxfer.SasBlobService('mock://blobep', 'saskey', None) - cs = azure.storage.blob.ContentSettings(content_md5='md5') - sbs.set_blob_properties('container', 'blob', cs) - - m.put('mock://blobepcontainer/blob?saskey', text='', status_code=201) - sbs = blobxfer.SasBlobService('mock://blobep', 'saskey', None) - with pytest.raises(IOError): - sbs.set_blob_properties('container', 'blob', cs) - - -def test_sasblobservice_putblob(): - session = requests.Session() - adapter = requests_mock.Adapter() - session.mount('mock', adapter) - - with requests_mock.mock() as m: - m.put('mock://blobepcontainer/blob?saskey', status_code=201) - sbs = blobxfer.SasBlobService('mock://blobep', 'saskey', None) - cs = azure.storage.blob.ContentSettings( - content_type='a', content_md5='md5') - sbs._put_blob('container', 'blob', None, cs) - - m.put('mock://blobepcontainer/blob?saskey', content=b'', - status_code=200) - sbs = blobxfer.SasBlobService('mock://blobep', 'saskey', None) - with pytest.raises(IOError): - sbs._put_blob('container', 'blob', None, cs) - - -def test_sasblobservice_createblob(): - session = requests.Session() - adapter = requests_mock.Adapter() - session.mount('mock', adapter) - - with requests_mock.mock() as m: - m.put('mock://blobepcontainer/blob?saskey', content=b'', - status_code=201) - sbs = blobxfer.SasBlobService('mock://blobep', 'saskey', None) - cs = azure.storage.blob.ContentSettings(content_md5='md5') - sbs.create_blob('container', 'blob', 0, cs) - - m.put('mock://blobepcontainer/blob?saskey', content=b'', - status_code=200) - sbs = blobxfer.SasBlobService('mock://blobep', 'saskey', None) - with pytest.raises(IOError): - sbs.create_blob('container', 'blob', 0, cs) - - -def test_sasblobservice_createcontainer(): - session = requests.Session() - adapter = requests_mock.Adapter() - session.mount('mock', adapter) - - with requests_mock.mock() as m: - m.put('mock://blobepcontainer?saskey', status_code=201) - sbs = blobxfer.SasBlobService('mock://blobep', 'saskey', None) - sbs.create_container('container', fail_on_exist=False) - - m.put('mock://blobepcontainer?saskey', status_code=409) - sbs = blobxfer.SasBlobService('mock://blobep', 'saskey', None) - with pytest.raises(requests.exceptions.HTTPError): - sbs.create_container('container', fail_on_exist=True) - - -def test_storagechunkworker_run(tmpdir): - lpath = str(tmpdir.join('test.tmp')) - with open(lpath, 'wt') as f: - f.write(str(uuid.uuid4())) - args = MagicMock() - args.rsakey = None - args.pageblob = True - args.autovhd = False - args.timeout = None - args.fileshare = False - - session = requests.Session() - adapter = requests_mock.Adapter() - session.mount('mock', adapter) - - exc_list = [] - flock = threading.Lock() - sa_in_queue = queue.PriorityQueue() - sa_out_queue = queue.Queue() - with requests_mock.mock() as m: - m.put('mock://blobepcontainer/blob?saskey', status_code=200) - sbs = blobxfer.SasBlobService('mock://blobep', 'saskey', None) - bcw = blobxfer.StorageChunkWorker( - exc_list, sa_in_queue, sa_out_queue, args, True, (sbs, sbs), None) - with pytest.raises(IOError): - bcw.put_storage_data( - lpath, 'container', 'blob', 'blockid', 0, 4, None, flock, None) - - args.pageblob = False - with requests_mock.mock() as m: - m.put('mock://blobepcontainer/blob?saskey', status_code=201) - sbs = blobxfer.SasBlobService('mock://blobep', 'saskey', None) - bcw = blobxfer.StorageChunkWorker( - exc_list, sa_in_queue, sa_out_queue, args, True, (sbs, sbs), None) - bcw.put_storage_data( - lpath, 'container', 'blob', 'blockid', 0, 4, None, flock, None) - - m.get('mock://blobepcontainer/blob?saskey', status_code=200) - bcw.get_storage_range( - lpath, 'container', 'blob', 0, 0, 4, - [None, None, None, None, None, False], flock, None) - - # test zero-length putblob - bcw.put_storage_data( - lpath, 'container', 'blob', 'blockid', 0, 0, None, flock, None) - bcw._pageblob = True - bcw.put_storage_data( - lpath, 'container', 'blob', 'blockid', 0, 0, None, flock, None) - - # test empty page - with open(lpath, 'wb') as f: - f.write(b'\0' * 4 * 1024 * 1024) - bcw.put_storage_data( - lpath, 'container', 'blob', 'blockid', 0, 4 * 1024 * 1024, - None, flock, None) - with open(lpath, 'wb') as f: - f.write(b'\0' * 4 * 1024) - bcw.put_storage_data( - lpath, 'container', 'blob', 'blockid', 0, 4 * 1024, - None, flock, None) - - sa_in_queue.put((0, (lpath, 'container', 'blob', 'blockid', 0, 4, - [None, None, None, None], flock, None))) - with requests_mock.mock() as m: - sbs = blobxfer.SasBlobService('mock://blobep', 'saskey', None) - bcw = blobxfer.StorageChunkWorker( - exc_list, sa_in_queue, sa_out_queue, args, False, (sbs, sbs), None) - m.get('mock://blobepcontainer/blob?saskey', status_code=201) - bcw.run() - assert len(exc_list) > 0 - - -@patch('azure.storage.file.FileService.update_range') -@patch('azure.storage.file.FileService._get_file') -def test_storagechunkworker_files_run( - patched_get_file, patched_update_range, tmpdir): - lpath = str(tmpdir.join('test.tmp')) - with open(lpath, 'wt') as f: - f.write(str(uuid.uuid4())) - args = MagicMock() - args.rsakey = None - args.pageblob = False - args.autovhd = False - args.timeout = None - args.fileshare = True - - exc_list = [] - flock = threading.Lock() - sa_in_queue = queue.PriorityQueue() - sa_out_queue = queue.Queue() - fs = azure.storage.file.FileService(account_name='sa', account_key='key') - bcw = blobxfer.StorageChunkWorker( - exc_list, sa_in_queue, sa_out_queue, args, True, None, fs) - patched_update_range.return_value = MagicMock() - bcw.put_storage_data( - lpath, 'container', 'blob', 'blockid', 0, 4, None, flock, None) - - bcw = blobxfer.StorageChunkWorker( - exc_list, sa_in_queue, sa_out_queue, args, False, None, fs) - patched_get_file.return_value = MagicMock() - patched_get_file.return_value.content = b'' - bcw.get_storage_range( - lpath, 'container', 'blob', 0, 0, 4, - [None, None, None, None, None, False], flock, None) - - -@patch('blobxfer.azure_request', return_value=None) -def test_generate_xferspec_download_invalid(patched_azure_request): - args = MagicMock() - args.storageaccount = 'blobep' - args.container = 'container' - args.storageaccountkey = 'saskey' - args.chunksizebytes = 5 - args.timeout = None - args.fileshare = False - sa_in_queue = queue.PriorityQueue() - - with requests_mock.mock() as m: - m.head('mock://blobepcontainer/blob?saskey', headers={ - 'content-length': '-1', 'content-md5': 'md5'}) - sbs = blobxfer.SasBlobService('mock://blobep', 'saskey', None) - with pytest.raises(ValueError): - blobxfer.generate_xferspec_download( - sbs, None, args, sa_in_queue, 'tmppath', 'blob', True, - [None, None, None]) - - -def test_generate_xferspec_download(tmpdir): - lpath = str(tmpdir.join('test.tmp')) - args = MagicMock() - args.rsakey = None - args.storageaccount = 'blobep' - args.container = 'container' - args.storageaccountkey = 'saskey' - args.chunksizebytes = 5 - args.timeout = None - args.fileshare = False - sa_in_queue = queue.PriorityQueue() - - session = requests.Session() - adapter = requests_mock.Adapter() - session.mount('mock', adapter) - - with requests_mock.mock() as m: - m.head('mock://blobepcontainer/blob?saskey', headers={ - 'content-length': '-1', 'content-md5': 'md5'}) - sbs = blobxfer.SasBlobService('mock://blobep', 'saskey', None) - with pytest.raises(ValueError): - blobxfer.generate_xferspec_download( - sbs, None, args, sa_in_queue, lpath, 'blob', True, - [None, None, None]) - assert sa_in_queue.qsize() == 0 - m.head('mock://blobepcontainer/blob?saskey', headers={ - 'content-length': '6', 'content-md5': 'md5'}) - cl, nsops, md5, fd = blobxfer.generate_xferspec_download( - sbs, None, args, sa_in_queue, lpath, 'blob', True, - [None, None, None]) - assert sa_in_queue.qsize() == 2 - assert 2 == nsops - assert 6 == cl - assert 2 == nsops - assert 'md5' == md5 - assert fd is not None - fd.close() - cl, nsops, md5, fd = blobxfer.generate_xferspec_download( - sbs, None, args, sa_in_queue, lpath, 'blob', False, - [None, None, None]) - assert 2 == nsops - assert fd is None - assert sa_in_queue.qsize() == 4 - with open(lpath, 'wt') as f: - f.write('012345') - m.head('mock://blobepcontainer/blob?saskey', headers={ - 'content-length': '6', 'content-md5': '1qmpM8iq/FHlWsBmK25NSg=='}) - cl, nsops, md5, fd = blobxfer.generate_xferspec_download( - sbs, None, args, sa_in_queue, lpath, 'blob', True, - [None, None, None]) - assert nsops is None - assert cl is None - assert sa_in_queue.qsize() == 4 - - sa_in_queue = queue.PriorityQueue() - args.rsaprivatekey = _RSAKEY - args.rsapublickey = None - symkey, signkey = blobxfer.generate_aes256_keys() - args.encmode = blobxfer._ENCRYPTION_MODE_CHUNKEDBLOB - metajson = blobxfer.EncryptionMetadataJson( - args, symkey, signkey, iv=b'0', encdata_signature=b'0', - preencrypted_md5=None) - encmeta = metajson.construct_metadata_json() - goodencjson = json.loads(encmeta[blobxfer._ENCRYPTION_METADATA_NAME]) - goodauthjson = json.loads( - encmeta[blobxfer._ENCRYPTION_METADATA_AUTH_NAME]) - metajson2 = blobxfer.EncryptionMetadataJson( - args, None, None, None, None, None) - metajson2.parse_metadata_json( - 'blob', args.rsaprivatekey, args.rsapublickey, encmeta) - assert metajson2.symkey == symkey - assert metajson2.signkey == signkey - assert metajson2.encmode == args.encmode - assert metajson2.chunksizebytes == args.chunksizebytes + \ - blobxfer._AES256CBC_HMACSHA256_OVERHEAD_BYTES + 1 - encjson = json.loads(encmeta[blobxfer._ENCRYPTION_METADATA_NAME]) - encjson[blobxfer._ENCRYPTION_METADATA_LAYOUT][ - blobxfer._ENCRYPTION_METADATA_CHUNKSTRUCTURE] = 'X' - headers = { - 'content-length': '64', - 'content-md5': 'md5', - 'x-ms-meta-' + blobxfer._ENCRYPTION_METADATA_NAME: - json.dumps(encjson), - 'x-ms-meta-' + blobxfer._ENCRYPTION_METADATA_AUTH_NAME: - json.dumps(goodauthjson), - } - m.head('mock://blobepcontainer/blob?saskey', headers=headers) - with pytest.raises(RuntimeError): - blobxfer.generate_xferspec_download( - sbs, None, args, sa_in_queue, lpath, 'blob', False, - [None, None, None]) - - # switch to full blob mode tests - args.encmode = blobxfer._ENCRYPTION_MODE_FULLBLOB - metajson = blobxfer.EncryptionMetadataJson( - args, symkey, signkey, iv=b'0', encdata_signature=b'0', - preencrypted_md5=None) - encmeta = metajson.construct_metadata_json() - goodencjson = json.loads(encmeta[blobxfer._ENCRYPTION_METADATA_NAME]) - goodauthjson = json.loads( - encmeta[blobxfer._ENCRYPTION_METADATA_AUTH_NAME]) - headers['x-ms-meta-' + blobxfer._ENCRYPTION_METADATA_NAME] = \ - json.dumps(goodencjson) - headers['x-ms-meta-' + blobxfer._ENCRYPTION_METADATA_AUTH_NAME] = \ - json.dumps(goodauthjson) - - encjson = copy.deepcopy(goodencjson) - encjson[blobxfer._ENCRYPTION_METADATA_AGENT][ - blobxfer._ENCRYPTION_METADATA_PROTOCOL] = 'X' - headers['x-ms-meta-' + blobxfer._ENCRYPTION_METADATA_NAME] = \ - json.dumps(encjson) - m.head('mock://blobepcontainer/blob?saskey', headers=headers) - with pytest.raises(RuntimeError): - blobxfer.generate_xferspec_download( - sbs, None, args, sa_in_queue, lpath, 'blob', False, - [None, None, None]) - - encjson = copy.deepcopy(goodencjson) - encjson[blobxfer._ENCRYPTION_METADATA_AGENT][ - blobxfer._ENCRYPTION_METADATA_ENCRYPTION_ALGORITHM] = 'X' - headers['x-ms-meta-' + blobxfer._ENCRYPTION_METADATA_NAME] = \ - json.dumps(encjson) - m.head('mock://blobepcontainer/blob?saskey', headers=headers) - with pytest.raises(RuntimeError): - blobxfer.generate_xferspec_download( - sbs, None, args, sa_in_queue, lpath, 'blob', False, - [None, None, None]) - - encjson = copy.deepcopy(goodencjson) - encjson[blobxfer._ENCRYPTION_METADATA_INTEGRITY_AUTH][ - blobxfer._ENCRYPTION_METADATA_ALGORITHM] = 'X' - headers['x-ms-meta-' + blobxfer._ENCRYPTION_METADATA_NAME] = \ - json.dumps(encjson) - m.head('mock://blobepcontainer/blob?saskey', headers=headers) - with pytest.raises(RuntimeError): - blobxfer.generate_xferspec_download( - sbs, None, args, sa_in_queue, lpath, 'blob', False, - [None, None, None]) - - encjson = copy.deepcopy(goodencjson) - encjson[blobxfer._ENCRYPTION_METADATA_WRAPPEDCONTENTKEY][ - blobxfer._ENCRYPTION_METADATA_ALGORITHM] = 'X' - headers['x-ms-meta-' + blobxfer._ENCRYPTION_METADATA_NAME] = \ - json.dumps(encjson) - m.head('mock://blobepcontainer/blob?saskey', headers=headers) - with pytest.raises(RuntimeError): - blobxfer.generate_xferspec_download( - sbs, None, args, sa_in_queue, lpath, 'blob', False, - [None, None, None]) - - authjson = copy.deepcopy(goodauthjson) - authjson.pop(blobxfer._ENCRYPTION_METADATA_AUTH_METAAUTH, None) - headers['x-ms-meta-' + blobxfer._ENCRYPTION_METADATA_NAME] = \ - json.dumps(goodencjson) - headers['x-ms-meta-' + blobxfer._ENCRYPTION_METADATA_AUTH_NAME] = \ - json.dumps(authjson) - m.head('mock://blobepcontainer/blob?saskey', headers=headers) - with pytest.raises(RuntimeError): - blobxfer.generate_xferspec_download( - sbs, None, args, sa_in_queue, lpath, 'blob', False, - [None, None, None]) - - authjson = copy.deepcopy(goodauthjson) - authjson[blobxfer._ENCRYPTION_METADATA_AUTH_METAAUTH].pop( - blobxfer._ENCRYPTION_METADATA_AUTH_ENCODING, None) - headers['x-ms-meta-' + blobxfer._ENCRYPTION_METADATA_NAME] = \ - json.dumps(goodencjson) - headers['x-ms-meta-' + blobxfer._ENCRYPTION_METADATA_AUTH_NAME] = \ - json.dumps(authjson) - m.head('mock://blobepcontainer/blob?saskey', headers=headers) - with pytest.raises(RuntimeError): - blobxfer.generate_xferspec_download( - sbs, None, args, sa_in_queue, lpath, 'blob', False, - [None, None, None]) - - authjson = copy.deepcopy(goodauthjson) - authjson[blobxfer._ENCRYPTION_METADATA_AUTH_METAAUTH][ - blobxfer._ENCRYPTION_METADATA_ALGORITHM] = 'X' - headers['x-ms-meta-' + blobxfer._ENCRYPTION_METADATA_NAME] = \ - json.dumps(goodencjson) - headers['x-ms-meta-' + blobxfer._ENCRYPTION_METADATA_AUTH_NAME] = \ - json.dumps(authjson) - m.head('mock://blobepcontainer/blob?saskey', headers=headers) - with pytest.raises(RuntimeError): - blobxfer.generate_xferspec_download( - sbs, None, args, sa_in_queue, lpath, 'blob', False, - [None, None, None]) - - authjson = copy.deepcopy(goodauthjson) - authjson[blobxfer._ENCRYPTION_METADATA_AUTH_METAAUTH][ - blobxfer._ENCRYPTION_METADATA_MAC] = blobxfer.base64encode(b'X') - headers['x-ms-meta-' + blobxfer._ENCRYPTION_METADATA_NAME] = \ - json.dumps(goodencjson) - headers['x-ms-meta-' + blobxfer._ENCRYPTION_METADATA_AUTH_NAME] = \ - json.dumps(authjson) - m.head('mock://blobepcontainer/blob?saskey', headers=headers) - with pytest.raises(RuntimeError): - blobxfer.generate_xferspec_download( - sbs, None, args, sa_in_queue, lpath, 'blob', False, - [None, None, None]) - - args.chunksizebytes = 5 - metajson.chunksizebytes = args.chunksizebytes - metajson.md5 = headers['content-md5'] - args.encmode = blobxfer._ENCRYPTION_MODE_FULLBLOB - encjson = copy.deepcopy(goodencjson) - headers['x-ms-meta-' + blobxfer._ENCRYPTION_METADATA_NAME] = \ - json.dumps(encjson) - headers['x-ms-meta-' + blobxfer._ENCRYPTION_METADATA_AUTH_NAME] = \ - json.dumps(goodauthjson) - hcl = int(headers['content-length']) - cl, nsops, md5, fd = blobxfer.generate_xferspec_download( - sbs, None, args, sa_in_queue, lpath, 'blob', False, - [hcl, headers['content-md5'], metajson]) - assert hcl == cl - calcops = hcl // args.chunksizebytes - hclmod = hcl % args.chunksizebytes - if hclmod > 0: - calcops += 1 - assert calcops == nsops - assert headers['content-md5'] == md5 - assert fd is None - assert sa_in_queue.qsize() == nsops - data = sa_in_queue.get() - assert data is not None - - -def test_generate_xferspec_upload(tmpdir): - lpath = str(tmpdir.join('test.tmp')) - with open(lpath, 'wt') as f: - f.write(str(uuid.uuid4())) - args = MagicMock() - args.storageaccount = 'sa' - args.container = 'container' - args.storageaccountkey = 'key' - args.chunksizebytes = 5 - args.skiponmatch = False - args.pageblob = False - args.autovhd = False - sa_in_queue = queue.PriorityQueue() - fs, nsops, md5, fd = blobxfer.generate_xferspec_upload( - args, sa_in_queue, {}, {}, lpath, 'rr', True) - stat = os.stat(lpath) - assert stat.st_size == fs - assert math.ceil(stat.st_size / 5.0) == nsops - assert fd is not None - fd.close() - args.skiponmatch = True - with open(lpath, 'wt') as f: - f.write('012345') - sd = {} - sd['rr'] = [6, '1qmpM8iq/FHlWsBmK25NSg=='] - fs, nsops, md5, fd = blobxfer.generate_xferspec_upload( - args, sa_in_queue, sd, {}, lpath, 'rr', False) - assert fs is None - - -def test_apply_file_collation_and_strip(): - args = MagicMock() - args.collate = 'collatedir' - rfname = blobxfer.apply_file_collation_and_strip( - args, 'tmpdir/file0') - assert rfname == 'collatedir/file0' - - args.collate = None - args.stripcomponents = 0 - rfname = blobxfer.apply_file_collation_and_strip( - args, 'tmpdir/file0') - assert rfname == 'tmpdir/file0' - args.stripcomponents = 1 - rfname = blobxfer.apply_file_collation_and_strip( - args, 'tmpdir/file0') - assert rfname == 'file0' - args.stripcomponents = 2 - rfname = blobxfer.apply_file_collation_and_strip( - args, 'tmpdir/file0') - assert rfname == 'file0' - args.stripcomponents = 1 - rfname = blobxfer.apply_file_collation_and_strip( - args, '/tmpdir/tmpdir2/file0') - assert rfname == 'tmpdir2/file0' - args.stripcomponents = 2 - rfname = blobxfer.apply_file_collation_and_strip( - args, 'tmpdir/tmpdir2/file0') - assert rfname == 'file0' - - -@patch('azure.storage.file.FileService.create_directory') -def test_create_all_parent_directories_fileshare(patched_cd): - patched_cd.return_value = MagicMock() - fsfile = ['tmp/a/b', None] - file_service = MagicMock() - args = MagicMock() - args.container = 'fshare' - args.timeout = None - dirscreated = set() - blobxfer.create_all_parent_directories_fileshare( - file_service, args, fsfile, dirscreated) - assert len(dirscreated) == 3 - assert 'tmp' in dirscreated - assert 'tmp/a' in dirscreated - assert 'tmp/a/b' in dirscreated - fsfile = ['tmp/a/b/c', None] - blobxfer.create_all_parent_directories_fileshare( - file_service, args, fsfile, dirscreated) - assert len(dirscreated) == 4 - assert 'tmp/a/b/c' in dirscreated - fsfile = ['x/a/b/c', None] - blobxfer.create_all_parent_directories_fileshare( - file_service, args, fsfile, dirscreated) - assert len(dirscreated) == 8 - assert 'x/a/b/c' in dirscreated - - -def _mock_get_storage_account_keys(timeout=None, service_name=None): - ret = MagicMock() - ret.storage_service_keys.primary = 'mmkey' - return ret - - -def _mock_get_storage_account_properties(timeout=None, service_name=None): - ret = MagicMock() - ret.storage_service_properties.endpoints = [None] - return ret - - -def _mock_blobservice_create_container(timeout=None, container_name=None, - fail_on_exist=None): - raise azure.common.AzureConflictHttpError('conflict', 409) - - -@patch('blobxfer.parseargs') -@patch('azure.servicemanagement.ServiceManagementService.' - 'get_storage_account_keys') -@patch('azure.servicemanagement.ServiceManagementService.' - 'get_storage_account_properties') -def test_main1( - patched_sms_saprops, patched_sms_sakeys, patched_parseargs, tmpdir): - lpath = str(tmpdir.join('test.tmp')) - args = MagicMock() - args.include = None - args.stripcomponents = 0 - args.delete = False - args.rsaprivatekey = None - args.rsapublickey = None - args.rsakeypassphrase = None - args.numworkers = 0 - args.localresource = '' - args.storageaccount = 'blobep' - args.container = 'container' - args.storageaccountkey = None - os.environ[blobxfer._ENVVAR_STORAGEACCOUNTKEY] = 'saskey' - args.chunksizebytes = 5 - args.pageblob = False - args.autovhd = False - args.fileshare = False - patched_parseargs.return_value = args - with pytest.raises(ValueError): - blobxfer.main() - args.localresource = lpath - args.endpoint = '' - with pytest.raises(ValueError): - blobxfer.main() - args.endpoint = 'blobep' - args.upload = True - args.download = True - with pytest.raises(ValueError): - blobxfer.main() - args.upload = None - args.download = None - with pytest.raises(ValueError): - blobxfer.main() - os.environ.pop(blobxfer._ENVVAR_STORAGEACCOUNTKEY) - args.storageaccountkey = None - args.timeout = -1 - args.saskey = '' - with pytest.raises(ValueError): - blobxfer.main() - args.saskey = None - args.storageaccountkey = None - args.managementcert = 'cert.spam' - args.subscriptionid = '1234' - with pytest.raises(ValueError): - blobxfer.main() - args.managementcert = 'cert.pem' - args.managementep = None - with pytest.raises(ValueError): - blobxfer.main() - args.managementep = 'mep' - args.subscriptionid = None - with pytest.raises(ValueError): - blobxfer.main() - args.subscriptionid = '1234' - args.pageblob = True - args.autovhd = True - with pytest.raises(ValueError): - blobxfer.main() - args.autovhd = False - args.fileshare = True - with pytest.raises(ValueError): - blobxfer.main() - args.pageblob = False - args.autovhd = True - with pytest.raises(ValueError): - blobxfer.main() - args.autovhd = False - args.fileshare = False - with patch('azure.servicemanagement.ServiceManagementService') as mock: - mock.return_value = MagicMock() - mock.return_value.get_storage_account_keys = \ - _mock_get_storage_account_keys - mock.return_value.get_storage_account_properties = \ - _mock_get_storage_account_properties - with pytest.raises(ValueError): - blobxfer.main() - args.managementep = None - args.managementcert = None - args.subscriptionid = None - args.remoteresource = 'blob' - args.chunksizebytes = None - with patch('azure.storage.blob.BlockBlobService') as mock: - mock.return_value = None - with pytest.raises(ValueError): - blobxfer.main() - args.storageaccountkey = None - args.saskey = None - os.environ[blobxfer._ENVVAR_SASKEY] = 'saskey' - args.remoteresource = None - args.download = True - with pytest.raises(ValueError): - blobxfer.main() - - args.download = False - args.upload = True - args.remoteresource = None - args.storageaccountkey = '' - args.saskey = None - with pytest.raises(ValueError): - blobxfer.main() - - args.collate = 'collatetmp' - with pytest.raises(ValueError): - blobxfer.main() - - args.collate = None - args.storageaccountkey = None - args.saskey = '' - with pytest.raises(ValueError): - blobxfer.main() - - args.saskey = None - os.environ.pop(blobxfer._ENVVAR_SASKEY) - with pytest.raises(ValueError): - blobxfer.main() - args.managementcert = '0' - args.managementep = '' - args.subscriptionid = '0' - with pytest.raises(ValueError): - blobxfer.main() - args.managementcert = 'test.pem' - with pytest.raises(ValueError): - blobxfer.main() - args.managementep = 'mep.mep' - ssk = MagicMock() - ssk.storage_service_keys = MagicMock() - ssk.storage_service_keys.primary = '' - patched_sms_sakeys.return_value = ssk - ssp = MagicMock() - ssp.storage_service_properties = MagicMock() - ssp.storage_service_properties.endpoints = ['blobep'] - patched_sms_saprops.return_value = ssp - with pytest.raises(ValueError): - blobxfer.main() - ssk.storage_service_keys.primary = 'key1' - args.storageaccountkey = None - args.rsaprivatekey = '' - args.rsapublickey = '' - with pytest.raises(ValueError): - blobxfer.main() - args.rsaprivatekey = '' - args.rsapublickey = None - args.encmode = blobxfer._ENCRYPTION_MODE_FULLBLOB - with pytest.raises(IOError): - blobxfer.main() - - args.rsaprivatekey = None - args.storageaccountkey = None - args.managementcert = None - args.managementep = None - args.subscriptionid = None - - args.upload = False - args.download = True - args.remoteresource = None - args.saskey = 'saskey&srt=c' - with pytest.raises(ValueError): - blobxfer.main() - args.upload = True - args.download = False - args.saskey = None - - os.environ[blobxfer._ENVVAR_SASKEY] = 'saskey' - with open(lpath, 'wt') as f: - f.write(str(uuid.uuid4())) - - session = requests.Session() - adapter = requests_mock.Adapter() - session.mount('mock', adapter) - with requests_mock.mock() as m: - m.put('https://blobep.blob.blobep/container/blob?saskey' - '&comp=block&blockid=00000000', status_code=201) - m.put('https://blobep.blob.blobep/container' + lpath + - '?saskey&blockid=00000000&comp=block', status_code=201) - m.put('https://blobep.blob.blobep/container' + lpath + - '?saskey&comp=blocklist', status_code=201) - m.put('https://blobep.blob.blobep/container' + lpath + - '?saskey&comp=block&blockid=00000000', status_code=201) - m.put('https://blobep.blob.blobep/container' + lpath + - '?saskey&comp=metadata', status_code=200) - m.get('https://blobep.blob.blobep/container?saskey&comp=list' - '&restype=container&maxresults=1000', - text='' - '' + lpath + '' - '6' - 'md5BlockBlob' - '') - args.progressbar = False - args.skiponmatch = True - blobxfer.main() - - args.progressbar = True - args.download = True - args.upload = False - args.remoteresource = None - with pytest.raises(ValueError): - blobxfer.main() - - args.remoteresource = 'blob' - args.localresource = str(tmpdir) - m.head('https://blobep.blob.blobep/container/blob?saskey', headers={ - 'content-length': '6', 'content-md5': '1qmpM8iq/FHlWsBmK25NSg=='}) - m.get('https://blobep.blob.blobep/container/blob?saskey', - content=b'012345') - blobxfer.main() - - args.pageblob = False - args.autovhd = False - args.skiponmatch = False - pemcontents = _RSAKEY.private_bytes( - encoding=cryptography.hazmat.primitives.serialization. - Encoding.PEM, - format=cryptography.hazmat.primitives.serialization. - PrivateFormat.PKCS8, - encryption_algorithm=cryptography.hazmat.primitives. - serialization.NoEncryption()) - pempath = str(tmpdir.join('rsa.pem')) - with open(pempath, 'wb') as f: - f.write(pemcontents) - args.rsaprivatekey = pempath - blobxfer.main() - os.remove(pempath) - - args.rsaprivatekey = None - args.skiponmatch = True - args.remoteresource = '.' - args.keepmismatchedmd5files = False - m.get('https://blobep.blob.blobep/container?saskey&comp=list' - '&restype=container&maxresults=1000', - text='' - 'blob' - '6' - 'BlockBlob' - '') - m.get('https://blobep.blob.blobep/container/?saskey') - with pytest.raises(SystemExit): - blobxfer.main() - - m.get('https://blobep.blob.blobep/container?saskey&comp=list' - '&restype=container&maxresults=1000', - text='' - 'blob' - '6md5' - 'BlockBlob' - '') - blobxfer.main() - - tmplpath = str(tmpdir.join('test', 'test2', 'test3')) - args.localresource = tmplpath - blobxfer.main() - - args.localresource = str(tmpdir) - notmp_lpath = '/'.join(lpath.strip('/').split('/')[1:]) - - with requests_mock.mock() as m: - args.delete = True - args.download = False - args.upload = True - args.remoteresource = None - args.skiponmatch = False - m.put('https://blobep.blob.blobep/container/test.tmp?saskey' - '&comp=block&blockid=00000000', status_code=200) - m.put('https://blobep.blob.blobep/container/test.tmp?saskey' - '&comp=blocklist', status_code=201) - m.put('https://blobep.blob.blobep/container' + lpath + - '?saskey&comp=block&blockid=00000000', status_code=200) - m.put('https://blobep.blob.blobep/container' + lpath + - '?saskey&comp=blocklist', status_code=201) - m.put('https://blobep.blob.blobep/container/' + notmp_lpath + - '?saskey&comp=block&blockid=00000000', status_code=200) - m.put('https://blobep.blob.blobep/container/' + notmp_lpath + - '?saskey&comp=blocklist', status_code=201) - m.get('https://blobep.blob.blobep/container?saskey&comp=list' - '&restype=container&maxresults=1000', - text='' - 'blob' - '6md5' - 'BlockBlob' - '') - m.delete('https://blobep.blob.blobep/container/blob?saskey', - status_code=202) - with pytest.raises(SystemExit): - blobxfer.main() - - args.recursive = False - m.put('https://blobep.blob.blobep/container/blob.blobtmp?saskey' - '&comp=blocklist', status_code=201) - m.put('https://blobep.blob.blobep/container/test.tmp.blobtmp?saskey' - '&comp=blocklist', status_code=201) - m.put('https://blobep.blob.blobep/container/blob.blobtmp?saskey' - '&comp=block&blockid=00000000', status_code=200) - m.put('https://blobep.blob.blobep/container/blob?saskey' - '&comp=blocklist', status_code=201) - with pytest.raises(SystemExit): - blobxfer.main() - - args.stripcomponents = None - args.collate = '.' - args.pageblob = True - args.upload = True - args.download = False - m.put('https://blobep.blob.blobep/container/blob.blobtmp?saskey', - status_code=201) - m.put('https://blobep.blob.blobep/container/test.tmp?saskey', - status_code=201) - m.put('https://blobep.blob.blobep/container/blob.blobtmp?saskey' - '&comp=properties', status_code=200) - m.put('https://blobep.blob.blobep/container/test.tmp?saskey' - '&comp=properties', status_code=200) - m.put('https://blobep.blob.blobep/container/blob?saskey', - status_code=201) - with pytest.raises(IOError): - blobxfer.main() - - args.stripcomponents = None - m.put('https://blobep.blob.blobep/container/blobsaskey', - status_code=200) - with pytest.raises(IOError): - blobxfer.main() - - args.stripcomponents = None - args.pageblob = False - m.put('https://blobep.blob.blobep/container/' + notmp_lpath + - '?saskey&comp=blocklist', status_code=201) - m.put('https://blobep.blob.blobep/container/blob?saskey', - status_code=201) - blobxfer.main() - - args.stripcomponents = None - args.autovhd = True - blobxfer.main() - - args.stripcomponents = None - args.include = 'nofiles' - with pytest.raises(SystemExit): - blobxfer.main() - - args.stripcomponents = None - args.include = '*' - blobxfer.main() - - args.include = None - args.stripcomponents = None - args.pageblob = False - args.autovhd = False - pempath = str(tmpdir.join('rsa.pem')) - with open(pempath, 'wb') as f: - f.write(pemcontents) - args.rsaprivatekey = pempath - m.put('https://blobep.blob.blobep/container/rsa.pem?saskey&comp=block' - '&blockid=00000000', status_code=201) - m.put('https://blobep.blob.blobep/container/rsa.pem?saskey' - '&comp=blocklist', status_code=201) - m.put('https://blobep.blob.blobep/container/rsa.pem?saskey' - '&comp=metadata', status_code=200) - m.put('https://blobep.blob.blobep/container/blob?saskey' - '&comp=metadata', status_code=200) - m.put('https://blobep.blob.blobep/container/blob.blobtmp?saskey' - '&comp=metadata', status_code=200) - m.put('https://blobep.blob.blobep/container/test.tmp.blobtmp?saskey' - '&comp=metadata', status_code=200) - m.put('https://blobep.blob.blobep/container/test.tmp?saskey' - '&comp=metadata', status_code=200) - blobxfer.main() - - args.stripcomponents = None - args.download = True - args.upload = False - args.rsaprivatekey = pempath - args.remoteresource = 'blob' - args.localresource = str(tmpdir) - m.head('https://blobep.blob.blobep/container/blob?saskey', headers={ - 'content-length': '6', 'content-md5': '1qmpM8iq/FHlWsBmK25NSg=='}) - m.get('https://blobep.blob.blobep/container/blob?saskey', - content=b'012345') - # TODO add encrypted data json - blobxfer.main() - - os.environ.pop(blobxfer._ENVVAR_SASKEY) - - -@patch('blobxfer.parseargs') -def test_main2(patched_parseargs, tmpdir): - lpath = str(tmpdir.join('test.tmp')) - args = MagicMock() - patched_parseargs.return_value = args - args.include = None - args.stripcomponents = 1 - args.delete = False - args.rsaprivatekey = None - args.rsapublickey = None - args.numworkers = 64 - args.storageaccount = 'blobep' - args.container = 'container' - args.chunksizebytes = 5 - args.localresource = lpath - args.endpoint = '.blobep' - args.timeout = 10 - args.managementep = None - args.managementcert = None - args.subscriptionid = None - args.chunksizebytes = None - args.download = False - args.upload = True - args.remoteresource = None - args.collate = None - args.saskey = None - args.storageaccountkey = 'key' - args.fileshare = False - with open(lpath, 'wt') as f: - f.write(str(uuid.uuid4())) - - session = requests.Session() - adapter = requests_mock.Adapter() - session.mount('mock', adapter) - - with patch('azure.storage.blob.BlockBlobService') as mock: - args.createcontainer = True - args.pageblob = False - args.autovhd = False - mock.return_value = MagicMock() - mock.return_value.create_container = _mock_blobservice_create_container - blobxfer.main() - - -@patch('azure.storage.file.FileService.create_share') -@patch('azure.storage.file.FileService.create_file') -@patch('azure.storage.file.FileService.create_directory') -@patch('azure.storage.file.FileService.get_file_properties') -@patch('azure.storage.file.FileService.get_file_metadata') -@patch('azure.storage.file.FileService.list_directories_and_files') -@patch('azure.storage.file.FileService.update_range') -@patch('azure.storage.file.FileService._get_file') -@patch('azure.storage.file.FileService.set_file_properties') -@patch('azure.storage.file.FileService.set_file_metadata') -@patch('azure.storage.file.FileService.resize_file') -@patch('blobxfer.parseargs') -def test_main3( - patched_parseargs, patched_rf, patched_sfm, patched_sfp, - patched_get_file, patched_update_range, patched_ldaf, patched_gfm, - patched_gfp, patched_cd, patched_cf, patched_cs, tmpdir): - lpath = str(tmpdir.join('test.tmp')) - args = MagicMock() - patched_parseargs.return_value = args - args.include = None - args.stripcomponents = 1 - args.delete = False - args.rsaprivatekey = None - args.rsapublickey = None - args.numworkers = 64 - args.storageaccount = 'sa' - args.container = 'myshare' - args.chunksizebytes = 5 - args.localresource = lpath - args.endpoint = 'core.windows.net' - args.timeout = 10 - args.managementep = None - args.managementcert = None - args.subscriptionid = None - args.chunksizebytes = None - args.download = False - args.upload = True - args.remoteresource = None - args.collate = None - args.saskey = None - args.storageaccountkey = 'key' - args.pageblob = False - args.autovhd = False - args.fileshare = True - args.computefilemd5 = True - args.skiponmatch = True - with open(lpath, 'wt') as f: - f.write(str(uuid.uuid4())) - - patched_cs.return_value = MagicMock() - patched_cf.return_value = MagicMock() - patched_gfp.return_value = MagicMock() - patched_update_range.return_value = MagicMock() - patched_get_file.return_value = MagicMock() - patched_get_file.return_value.content = b'\0' * 8 - - pemcontents = _RSAKEY.private_bytes( - encoding=cryptography.hazmat.primitives.serialization. - Encoding.PEM, - format=cryptography.hazmat.primitives.serialization. - PrivateFormat.PKCS8, - encryption_algorithm=cryptography.hazmat.primitives. - serialization.NoEncryption()) - pempath = str(tmpdir.join('rsa.pem')) - with open(pempath, 'wb') as f: - f.write(pemcontents) - - args.rsaprivatekey = pempath - args.rsakeypassphrase = None - args.encmode = blobxfer._ENCRYPTION_MODE_FULLBLOB - blobxfer.main() - - args.download = True - args.upload = False - args.rsaprivatekey = pempath - args.remoteresource = '.' - with pytest.raises(SystemExit): - blobxfer.main() - - patched_ldaf.return_value = [azure.storage.file.File(name='test.tmp')] - patched_gfp.return_value = MagicMock() - patched_gfp.return_value.properties = MagicMock() - patched_gfp.return_value.properties.content_length = 1 - patched_gfp.return_value.properties.content_settings = MagicMock() - patched_gfp.return_value.properties.content_settings.content_md5 = 'md5' - args.rsaprivatekey = pempath - args.localresource = lpath.rstrip(os.path.sep + 'test.tmp') - blobxfer.main() - - os.remove(pempath) diff --git a/test_requirements.txt b/test_requirements.txt new file mode 100644 index 0000000..925320c --- /dev/null +++ b/test_requirements.txt @@ -0,0 +1,5 @@ +flake8>=3.2.1 +mock>=2.0.0 +pypandoc>=1.3.3 +pytest>=3.0.5 +pytest-cov>=2.4.0 diff --git a/tests/test_blobxfer_util.py b/tests/test_blobxfer_util.py new file mode 100644 index 0000000..bc17d06 --- /dev/null +++ b/tests/test_blobxfer_util.py @@ -0,0 +1,133 @@ +# coding=utf-8 +"""Tests for util""" + +# stdlib imports +import sys +import uuid +# non-stdlib imports +import pytest +# module under test +import blobxfer.util + + +def test_on_python2(): + py2 = sys.version_info.major == 2 + assert py2 == blobxfer.util.on_python2() + + +def test_is_none_or_empty(): + a = None + assert blobxfer.util.is_none_or_empty(a) + a = [] + assert blobxfer.util.is_none_or_empty(a) + a = {} + assert blobxfer.util.is_none_or_empty(a) + a = '' + assert blobxfer.util.is_none_or_empty(a) + a = 'asdf' + assert not blobxfer.util.is_none_or_empty(a) + a = ['asdf'] + assert not blobxfer.util.is_none_or_empty(a) + a = {'asdf': 0} + assert not blobxfer.util.is_none_or_empty(a) + a = [None] + assert not blobxfer.util.is_none_or_empty(a) + + +def test_is_not_empty(): + a = None + assert not blobxfer.util.is_not_empty(a) + a = [] + assert not blobxfer.util.is_not_empty(a) + a = {} + assert not blobxfer.util.is_not_empty(a) + a = '' + assert not blobxfer.util.is_not_empty(a) + a = 'asdf' + assert blobxfer.util.is_not_empty(a) + a = ['asdf'] + assert blobxfer.util.is_not_empty(a) + a = {'asdf': 0} + assert blobxfer.util.is_not_empty(a) + a = [None] + assert blobxfer.util.is_not_empty(a) + + +def test_merge_dict(): + with pytest.raises(ValueError): + blobxfer.util.merge_dict(1, 2) + + a = {'a_only': 42, 'a_and_b': 43, + 'a_only_dict': {'a': 44}, 'a_and_b_dict': {'a_o': 45, 'a_a_b': 46}} + b = {'b_only': 45, 'a_and_b': 46, + 'b_only_dict': {'a': 47}, 'a_and_b_dict': {'b_o': 48, 'a_a_b': 49}} + c = blobxfer.util.merge_dict(a, b) + assert c['a_only'] == 42 + assert c['b_only'] == 45 + assert c['a_and_b_dict']['a_o'] == 45 + assert c['a_and_b_dict']['b_o'] == 48 + assert c['a_and_b_dict']['a_a_b'] == 49 + assert c['b_only_dict']['a'] == 47 + assert c['a_and_b'] == 46 + assert a['a_only'] == 42 + assert a['a_and_b'] == 43 + assert b['b_only'] == 45 + assert b['a_and_b'] == 46 + + +def test_scantree(tmpdir): + tmpdir.mkdir('abc') + abcpath = tmpdir.join('abc') + abcpath.join('hello.txt').write('hello') + abcpath.mkdir('def') + defpath = abcpath.join('def') + defpath.join('world.txt').write('world') + found = set() + for de in blobxfer.util.scantree(str(tmpdir.dirpath())): + if de.name != '.lock': + found.add(de.name) + assert 'hello.txt' in found + assert 'world.txt' in found + assert len(found) == 2 + + +def test_get_mime_type(): + a = 'b.txt' + mt = blobxfer.util.get_mime_type(a) + assert mt == 'text/plain' + a = 'c.probably_cant_determine_this' + mt = blobxfer.util.get_mime_type(a) + assert mt == 'application/octet-stream' + + +def test_base64_encode_as_string(): + a = b'abc' + enc = blobxfer.util.base64_encode_as_string(a) + assert type(enc) != bytes + dec = blobxfer.util.base64_decode_string(enc) + assert a == dec + + +def test_compute_md5(tmpdir): + lpath = str(tmpdir.join('test.tmp')) + testdata = str(uuid.uuid4()) + with open(lpath, 'wt') as f: + f.write(testdata) + md5_file = blobxfer.util.compute_md5_for_file_asbase64(lpath) + md5_data = blobxfer.util.compute_md5_for_data_asbase64( + testdata.encode('utf8')) + assert md5_file == md5_data + + md5_file_page = blobxfer.util.compute_md5_for_file_asbase64(lpath, True) + assert md5_file != md5_file_page + + # test non-existent file + with pytest.raises(IOError): + blobxfer.util.compute_md5_for_file_asbase64(testdata) + + +def test_page_align_content_length(): + assert 0 == blobxfer.util.page_align_content_length(0) + assert 512 == blobxfer.util.page_align_content_length(511) + assert 512 == blobxfer.util.page_align_content_length(512) + assert 1024 == blobxfer.util.page_align_content_length(513) diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..f2b110d --- /dev/null +++ b/tox.ini @@ -0,0 +1,18 @@ +[tox] +envlist = py35 + +[testenv] +deps = -rtest_requirements.txt +commands = + #flake8 {envsitepackagesdir}/blobxfer_cli/ + #flake8 {envsitepackagesdir}/blobxfer/ + py.test \ + -x -l -s \ + --ignore venv/ \ + --cov-config .coveragerc \ + --cov-report term-missing \ + --cov {envsitepackagesdir}/blobxfer + +[flake8] +max-line-length = 79 +select = F,E,W