From 15ecff9e0b26740b4e3c5192de5794341ea7bb4c Mon Sep 17 00:00:00 2001 From: Peter Barmettler Date: Thu, 1 Dec 2022 16:43:46 +0100 Subject: [PATCH 1/5] Squashed commit of the following: commit ab4ca54c755774be2673cdc76cbed2f4927684f4 Author: Peter Barmettler Date: Thu Dec 1 16:31:40 2022 +0100 auto use storage commit d5bdd8c119cb25574bb5c372730993c6ae6935ca Author: Peter Barmettler Date: Thu Dec 1 16:26:32 2022 +0100 azure sdk docstring | pass on parameters | basic test commit 8fdb60bedbc9210d6a25ee27f65148367f8374ac Author: Peter Barmettler Date: Thu Dec 1 15:19:03 2022 +0100 Revert "client and upload kwargs" This reverts commit cca3c5eeaa00c328bdf00e25d0092b6892859eb4. commit 8a71e55ef50f616731f36f9bef0c78db9117d55d Author: Peter Barmettler Date: Thu Dec 1 15:17:58 2022 +0100 add azure sdk docstring commit cca3c5eeaa00c328bdf00e25d0092b6892859eb4 Author: Peter Barmettler Date: Thu Oct 27 12:49:12 2022 +0200 client and upload kwargs --- adlfs/spec.py | 644 +++++++++++++++++++++++++++++++++++++- adlfs/tests/test_spec.py | 17 + requirements/dev.txt | 3 +- tools/azure_sdk_kwargs.py | 150 +++++++++ 4 files changed, 799 insertions(+), 15 deletions(-) create mode 100644 tools/azure_sdk_kwargs.py diff --git a/adlfs/spec.py b/adlfs/spec.py index db9d9b9a..2308c1b2 100644 --- a/adlfs/spec.py +++ b/adlfs/spec.py @@ -151,6 +151,544 @@ class AzureBlobFileSystem(AsyncFileSystem): skip_instance_cache: to control reuse of instances use_listings_cache, listings_expiry_time, max_paths: to control reuse of directory listings + Pass on to azure-sdk: + + timeout: int + Directly passed to azure-sdk calls: + - BlobClient.download_blob + - BlobClient.get_blob_properties + - BlobClient.start_copy_from_url + - BlobClient.upload_blob + - BlobServiceClient.create_container + - BlobServiceClient.delete_container + - BlobServiceClient.list_containers + - ContainerClient.delete_blob + - ContainerClient.get_container_properties + - ContainerClient.walk_blobs. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + The timeout parameter is expressed in seconds. + tags + Directly passed to azure-sdk calls: + - BlobClient.start_copy_from_url + - BlobClient.upload_blob. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + Name-value pairs associated with the blob as tag. Tags are case-sensitive. + The tag set may contain at most 10 tags. Tag keys must be between 1 and 128 characters, + and tag values must be between 0 and 256 characters. + Valid tag key and value characters include: lowercase and uppercase letters, digits (0-9), + space (` `), plus (+), minus (-), period (.), solidus (/), colon (:), equals (=), underscore (_) + + .. versionadded:: 12.4.0 + api_version: str + Advanced settings parameter. + Directly passed to azure-sdk calls: + - BlobServiceClient. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + The Storage API version to use for requests. Default value is the most recent service version that is + compatible with the current SDK. Setting to an older version may result in reduced feature compatibility. + + .. versionadded:: 12.2.0 + max_block_size: int + Advanced settings parameter. + Directly passed to azure-sdk calls: + - BlobServiceClient. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + The maximum chunk size for uploading a block blob in chunks. + Defaults to 4*1024*1024, or 4MB. + max_chunk_get_size: int + Advanced settings parameter. + Directly passed to azure-sdk calls: + - BlobServiceClient. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + The maximum chunk size used for downloading a blob. Defaults to 4*1024*1024, + or 4MB. + + .. admonition:: Example: + + .. literalinclude:: ../samples/blob_samples_authentication_async.py + :start-after: [START create_blob_service_client] + :end-before: [END create_blob_service_client] + :language: python + :dedent: 8 + :caption: Creating the BlobServiceClient with account url and credential. + + .. literalinclude:: ../samples/blob_samples_authentication_async.py + :start-after: [START create_blob_service_client_oauth] + :end-before: [END create_blob_service_client_oauth] + :language: python + :dedent: 8 + :caption: Creating the BlobServiceClient with Azure Identity credentials. + max_page_size: int + Advanced settings parameter. + Directly passed to azure-sdk calls: + - BlobServiceClient. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + The maximum chunk size for uploading a page blob. Defaults to 4*1024*1024, or 4MB. + max_single_get_size: int + Advanced settings parameter. + Directly passed to azure-sdk calls: + - BlobServiceClient. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + The maximum size for a blob to be downloaded in a single call, + the exceeded part will be downloaded in chunks (could be parallel). Defaults to 32*1024*1024, or 32MB. + max_single_put_size: int + Advanced settings parameter. + Directly passed to azure-sdk calls: + - BlobServiceClient. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + If the blob size is less than or equal max_single_put_size, then the blob will be + uploaded with only one http PUT request. If the blob size is larger than max_single_put_size, + the blob will be uploaded in chunks. Defaults to 64*1024*1024, or 64MB. + min_large_block_upload_threshold: int + Advanced settings parameter. + Directly passed to azure-sdk calls: + - BlobServiceClient. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + The minimum chunk size required to use the memory efficient + algorithm when uploading a block blob. Defaults to 4*1024*1024+1. + secondary_hostname: str + Advanced settings parameter. + Directly passed to azure-sdk calls: + - BlobServiceClient. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + The hostname of the secondary endpoint. + use_byte_buffer: bool + Advanced settings parameter. + Directly passed to azure-sdk calls: + - BlobServiceClient. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + Use a byte buffer for block blob uploads. Defaults to False. + authority: str + Advanced settings parameter. + Directly passed to azure-sdk calls: + - DefaultAzureCredential. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + Authority of an Azure Active Directory endpoint, for example 'login.microsoftonline.com', + the authority for Azure Public Cloud (which is the default). :class:`~azure.identity.AzureAuthorityHosts` + defines authorities for other clouds. Managed identities ignore this because they reside in a single cloud. + exclude_cli_credential: bool + Advanced settings parameter. + Directly passed to azure-sdk calls: + - DefaultAzureCredential. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + Whether to exclude the Azure CLI from the credential. Defaults to **False**. + exclude_environment_credential: bool + Advanced settings parameter. + Directly passed to azure-sdk calls: + - DefaultAzureCredential. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + Whether to exclude a service principal configured by environment + variables from the credential. Defaults to **False**. + exclude_managed_identity_credential: bool + Advanced settings parameter. + Directly passed to azure-sdk calls: + - DefaultAzureCredential. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + Whether to exclude managed identity from the credential. + Defaults to **False**. + exclude_powershell_credential: bool + Advanced settings parameter. + Directly passed to azure-sdk calls: + - DefaultAzureCredential. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + Whether to exclude Azure PowerShell. Defaults to **False**. + exclude_shared_token_cache_credential: bool + Advanced settings parameter. + Directly passed to azure-sdk calls: + - DefaultAzureCredential. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + Whether to exclude the shared token cache. Defaults to + **False**. + exclude_visual_studio_code_credential: bool + Advanced settings parameter. + Directly passed to azure-sdk calls: + - DefaultAzureCredential. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + Whether to exclude stored credential from VS Code. + Defaults to **True**. + managed_identity_client_id: str + Advanced settings parameter. + Directly passed to azure-sdk calls: + - DefaultAzureCredential. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + The client ID of a user-assigned managed identity. Defaults to the value + of the environment variable AZURE_CLIENT_ID, if any. If not specified, a system-assigned identity will be used. + shared_cache_tenant_id: str + Advanced settings parameter. + Directly passed to azure-sdk calls: + - DefaultAzureCredential. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + Preferred tenant for :class:`~azure.identity.aio.SharedTokenCacheCredential`. + Defaults to the value of environment variable AZURE_TENANT_ID, if any. + shared_cache_username: str + Advanced settings parameter. + Directly passed to azure-sdk calls: + - DefaultAzureCredential. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + Preferred username for :class:`~azure.identity.aio.SharedTokenCacheCredential`. + Defaults to the value of environment variable AZURE_USERNAME, if any. + visual_studio_code_tenant_id: str + Advanced settings parameter. + Directly passed to azure-sdk calls: + - DefaultAzureCredential. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + Tenant ID to use when authenticating with + :class:`~azure.identity.aio.VisualStudioCodeCredential`. Defaults to the "Azure: Tenant" setting in VS Code's + user settings or, when that setting has no value, the "organizations" tenant, which supports only Azure Active + Directory work or school accounts. + blob_type: azure.storage.blob.BlobType + Advanced settings parameter. + Directly passed to azure-sdk calls: + - BlobClient.upload_blob. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + The type of the blob. This can be + either BlockBlob, PageBlob or AppendBlob. The default value is BlockBlob. + container_encryption_scope + Advanced settings parameter. + Directly passed to azure-sdk calls: + - BlobServiceClient.create_container. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + Specifies the default encryption scope to set on the container and use for + all future writes. + + .. versionadded:: 12.2.0 + content_settings: azure.storage.blob.ContentSettings + Advanced settings parameter. + Directly passed to azure-sdk calls: + - BlobClient.upload_blob. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + ContentSettings object used to set blob properties. Used to set content type, encoding, + language, disposition, md5, and cache control. + cpk: azure.storage.blob.CustomerProvidedEncryptionKey + Advanced settings parameter. + Directly passed to azure-sdk calls: + - BlobClient.download_blob + - BlobClient.get_blob_properties + - BlobClient.upload_blob. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + Encrypts the data on the service-side with the given key. + Use of customer-provided keys must be done over HTTPS. + As the encryption key itself is provided in the request, + a secure connection must be established to transfer the key. + delete_snapshots: str + Advanced settings parameter. + Directly passed to azure-sdk calls: + - ContainerClient.delete_blob. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + Required if the blob has associated snapshots. Values include: + - "only": Deletes only the blobs snapshots. + - "include": Deletes the blob along with all snapshots. + delimiter: str + Advanced settings parameter. + Directly passed to azure-sdk calls: + - ContainerClient.walk_blobs. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + When the request includes this parameter, the operation returns a BlobPrefix + element in the response body that acts as a placeholder for all blobs whose + names begin with the same substring up to the appearance of the delimiter + character. The delimiter may be a single character or a string. + destination_lease + Advanced settings parameter. + Directly passed to azure-sdk calls: + - BlobClient.start_copy_from_url. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + The lease ID specified for this header must match the lease ID of the + destination blob. If the request does not include the lease ID or it is not + valid, the operation fails with status code 412 (Precondition Failed). + encoding: str + Advanced settings parameter. + Directly passed to azure-sdk calls: + - BlobClient.download_blob + - BlobClient.upload_blob. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + Encoding to decode the downloaded bytes. Default is None, i.e. no decoding. + encryption_scope: str + Advanced settings parameter. + Directly passed to azure-sdk calls: + - BlobClient.start_copy_from_url + - BlobClient.upload_blob. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + A predefined encryption scope used to encrypt the data on the service. An encryption + scope can be created using the Management API and referenced here by name. If a default + encryption scope has been defined at the container, this value will override it if the + container-level scope is configured to allow overrides. Otherwise an error will be raised. + + .. versionadded:: 12.2.0 + etag: str + Advanced settings parameter. + Directly passed to azure-sdk calls: + - BlobClient.download_blob + - BlobClient.get_blob_properties + - BlobClient.start_copy_from_url + - BlobClient.upload_blob + - BlobServiceClient.delete_container + - ContainerClient.delete_blob. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + An ETag value, or the wildcard character (*). Used to check if the resource has changed, + and act according to the condition specified by the `match_condition` parameter. + if_modified_since: datetime.datetime + Advanced settings parameter. + Directly passed to azure-sdk calls: + - BlobClient.download_blob + - BlobClient.get_blob_properties + - BlobClient.start_copy_from_url + - BlobClient.upload_blob + - BlobServiceClient.delete_container + - ContainerClient.delete_blob. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only + if the resource has been modified since the specified time. + if_tags_match_condition: str + Advanced settings parameter. + Directly passed to azure-sdk calls: + - BlobClient.download_blob + - BlobClient.get_blob_properties + - BlobClient.upload_blob + - ContainerClient.delete_blob. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + Specify a SQL where clause on blob tags to operate only on blob with a matching value. + eg. ``"\"tagname\"='my tag'"`` + + .. versionadded:: 12.4.0 + if_unmodified_since: datetime.datetime + Advanced settings parameter. + Directly passed to azure-sdk calls: + - BlobClient.download_blob + - BlobClient.get_blob_properties + - BlobClient.start_copy_from_url + - BlobClient.upload_blob + - BlobServiceClient.delete_container + - ContainerClient.delete_blob. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this header to perform the operation only if + the resource has not been modified since the specified date/time. + immutability_policy: azure.storage.blob.ImmutabilityPolicy + Advanced settings parameter. + Directly passed to azure-sdk calls: + - BlobClient.start_copy_from_url + - BlobClient.upload_blob. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + Specifies the immutability policy of a blob, blob snapshot or blob version. + Currently this parameter of upload_blob() API is for BlockBlob only. + + .. versionadded:: 12.10.0 + This was introduced in API version '2020-10-02'. + include_deleted: bool + Advanced settings parameter. + Directly passed to azure-sdk calls: + - BlobServiceClient.list_containers. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + Specifies that deleted containers to be returned in the response. This is for container restore enabled + account. The default value is `False`. + .. versionadded:: 12.4.0 + include_system: bool + Advanced settings parameter. + Directly passed to azure-sdk calls: + - BlobServiceClient.list_containers. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + Flag specifying that system containers should be included. + .. versionadded:: 12.10.0 + incremental_copy: bool + Advanced settings parameter. + Directly passed to azure-sdk calls: + - BlobClient.start_copy_from_url. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + Copies the snapshot of the source page blob to a destination page blob. + The snapshot is copied such that only the differential changes between + the previously copied snapshot are transferred to the destination. + The copied snapshots are complete copies of the original snapshot and + can be read or copied from as usual. Defaults to False. + lease + Advanced settings parameter. + Directly passed to azure-sdk calls: + - BlobClient.download_blob + - BlobClient.get_blob_properties + - BlobClient.upload_blob + - BlobServiceClient.delete_container + - ContainerClient.delete_blob + - ContainerClient.get_container_properties. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + If specified, get_container_properties only succeeds if the + container's lease is active and matches this ID. + legal_hold: bool + Advanced settings parameter. + Directly passed to azure-sdk calls: + - BlobClient.start_copy_from_url + - BlobClient.upload_blob. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + Specified if a legal hold should be set on the blob. + Currently this parameter of upload_blob() API is for BlockBlob only. + + .. versionadded:: 12.10.0 + This was introduced in API version '2020-10-02'. + length: int + Advanced settings parameter. + Directly passed to azure-sdk calls: + - BlobClient.upload_blob. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + Number of bytes to read from the stream. This is optional, but + should be supplied for optimal performance. + max_concurrency: int + Advanced settings parameter. + Directly passed to azure-sdk calls: + - BlobClient.download_blob + - BlobClient.upload_blob. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + The number of parallel connections with which to download. + maxsize_condition: int + Advanced settings parameter. + Directly passed to azure-sdk calls: + - BlobClient.upload_blob. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + Optional conditional header. The max length in bytes permitted for + the append blob. If the Append Block operation would cause the blob + to exceed that limit or if the blob size is already greater than the + value specified in this header, the request will fail with + MaxBlobSizeConditionNotMet error (HTTP status code 412 - Precondition Failed). + metadata: dict(str, str) + Advanced settings parameter. + Directly passed to azure-sdk calls: + - BlobClient.start_copy_from_url + - BlobServiceClient.create_container. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + A dict with name-value pairs to associate with the + container as metadata. Example: `{'Category':'test'}` + name_starts_with: str + Advanced settings parameter. + Directly passed to azure-sdk calls: + - BlobServiceClient.list_containers. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + Filters the results to return only containers whose names + begin with the specified prefix. + premium_page_blob_tier: azure.storage.blob.PremiumPageBlobTier + Advanced settings parameter. + Directly passed to azure-sdk calls: + - BlobClient.start_copy_from_url + - BlobClient.upload_blob. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + A page blob tier value to set the blob to. The tier correlates to the size of the + blob and number of allowed IOPS. This is only applicable to page blobs on + premium storage accounts. + progress_hook + Advanced settings parameter. + Directly passed to azure-sdk calls: + - BlobClient.download_blob + - BlobClient.upload_blob. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + A callback to track the progress of a long running download. The signature is + function(current: int, total: int) where current is the number of bytes transfered + so far, and total is the total size of the download. + public_access: str or azure.storage.blob.PublicAccess + Advanced settings parameter. + Directly passed to azure-sdk calls: + - BlobServiceClient.create_container. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + Possible values include: 'container', 'blob'. + rehydrate_priority: azure.storage.blob.RehydratePriority + Advanced settings parameter. + Directly passed to azure-sdk calls: + - BlobClient.start_copy_from_url. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + Indicates the priority with which to rehydrate an archived blob + requires_sync: bool + Advanced settings parameter. + Directly passed to azure-sdk calls: + - BlobClient.start_copy_from_url. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + Enforces that the service will not return a response until the copy is complete. + results_per_page: int + Advanced settings parameter. + Directly passed to azure-sdk calls: + - BlobServiceClient.list_containers. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + The maximum number of container names to retrieve per API + call. If the request does not specify the server will return up to 5,000 items. + seal_destination_blob: bool + Advanced settings parameter. + Directly passed to azure-sdk calls: + - BlobClient.start_copy_from_url. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + Seal the destination append blob. This operation is only for append blob. + .. versionadded:: 12.4.0 + source_authorization: str + Advanced settings parameter. + Directly passed to azure-sdk calls: + - BlobClient.start_copy_from_url. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + Authenticate as a service principal using a client secret to access a source blob. Ensure "bearer " is + the prefix of the source_authorization string. This option is only available when `incremental_copy` is + set to False and `requires_sync` is set to True. + + .. versionadded:: 12.9.0 + source_etag: str + Advanced settings parameter. + Directly passed to azure-sdk calls: + - BlobClient.start_copy_from_url. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + The source ETag value, or the wildcard character (*). Used to check if the resource has changed, + and act according to the condition specified by the `match_condition` parameter. + source_if_modified_since: datetime.datetime + Advanced settings parameter. + Directly passed to azure-sdk calls: + - BlobClient.start_copy_from_url. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this conditional header to copy the blob only if the source + blob has been modified since the specified date/time. + source_if_unmodified_since: datetime.datetime + Advanced settings parameter. + Directly passed to azure-sdk calls: + - BlobClient.start_copy_from_url. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + A DateTime value. Azure expects the date value passed in to be UTC. + If timezone is included, any non-UTC datetimes will be converted to UTC. + If a date is passed in without timezone info, it is assumed to be UTC. + Specify this conditional header to copy the blob only if the source blob + has not been modified since the specified date/time. + source_lease + Advanced settings parameter. + Directly passed to azure-sdk calls: + - BlobClient.start_copy_from_url. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + Specify this to perform the Copy Blob operation only if + the lease ID given matches the active lease ID of the source blob. + source_match_condition: azure.core.MatchConditions + Advanced settings parameter. + Directly passed to azure-sdk calls: + - BlobClient.start_copy_from_url. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + The source match condition to use upon the etag. + standard_blob_tier: azure.storage.blob.StandardBlobTier + Advanced settings parameter. + Directly passed to azure-sdk calls: + - BlobClient.start_copy_from_url + - BlobClient.upload_blob. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + A standard blob tier value to set the blob to. For this version of the library, + this is only applicable to block blobs on standard storage accounts. + validate_content: bool + Advanced settings parameter. + Directly passed to azure-sdk calls: + - BlobClient.download_blob + - BlobClient.upload_blob. + Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + If true, calculates an MD5 hash for each chunk of the blob. The storage + service checks the hash of the content that has arrived with the hash + that was sent. This is primarily valuable for detecting bitflips on + the wire if using http instead of https, as https (the default), will + already validate. Note that this MD5 hash is not stored with the + blob. Also note that if enabled, the memory-efficient upload algorithm + will not be used because computing the MD5 hash requires buffering + entire blocks, and doing so defeats the purpose of the memory-efficient algorithm. + Examples -------- @@ -252,6 +790,75 @@ def __init__( else: self.sync_credential = None + # generated from adlfs.tools.azure_sdk_kwargs + self._azure_sdk_kwargs = { + k: v + for k, v in kwargs.items() + if k + in { + "timeout", + "tags", + "api_version", + "max_block_size", + "max_chunk_get_size", + "max_page_size", + "max_single_get_size", + "max_single_put_size", + "min_large_block_upload_threshold", + "secondary_hostname", + "use_byte_buffer", + "authority", + "exclude_cli_credential", + "exclude_environment_credential", + "exclude_managed_identity_credential", + "exclude_powershell_credential", + "exclude_shared_token_cache_credential", + "exclude_visual_studio_code_credential", + "managed_identity_client_id", + "shared_cache_tenant_id", + "shared_cache_username", + "visual_studio_code_tenant_id", + "blob_type", + "container_encryption_scope", + "content_settings", + "cpk", + "delete_snapshots", + "delimiter", + "destination_lease", + "encoding", + "encryption_scope", + "etag", + "if_modified_since", + "if_tags_match_condition", + "if_unmodified_since", + "immutability_policy", + "include_deleted", + "include_system", + "incremental_copy", + "lease", + "legal_hold", + "length", + "max_concurrency", + "maxsize_condition", + "metadata", + "name_starts_with", + "premium_page_blob_tier", + "progress_hook", + "public_access", + "rehydrate_priority", + "requires_sync", + "seal_destination_blob", + "source_authorization", + "source_etag", + "source_if_modified_since", + "source_if_unmodified_since", + "source_lease", + "source_match_condition", + "standard_blob_tier", + "validate_content", + } + } + # Solving issue in https://github.com/fsspec/adlfs/issues/270 if ( self.credential is None @@ -263,7 +870,7 @@ def __init__( ( self.credential, self.sync_credential, - ) = self._get_default_azure_credential(**kwargs) + ) = self._get_default_azure_credential(**self._azure_sdk_kwargs) self.do_connect() weakref.finalize(self, sync, self.loop, close_service_client, self) @@ -919,12 +1526,11 @@ async def _find(self, path, withdirs=False, prefix="", with_parent=False, **kwar target_path = f"{full_path}/" container, path, _ = self.split_path(target_path) - async with self.service_client.get_container_client( container=container ) as container_client: blobs = container_client.list_blobs( - include=["metadata"], name_starts_with=path + include=["metadata"], name_starts_with=path, **self._azure_sdk_kwargs ) files = {} dir_set = set() @@ -1077,7 +1683,7 @@ async def _container_exists(self, container_name): async with self.service_client.get_container_client( container_name ) as client: - await client.get_container_properties() + await client.get_container_properties(**self._azure_sdk_kwargs) except ResourceNotFoundError: return False except Exception as e: @@ -1119,7 +1725,9 @@ async def _mkdir(self, path, create_parents=True, delimiter="/", **kwargs): if not container_exists: try: - await self.service_client.create_container(container_name) + await self.service_client.create_container( + container_name, **self._azure_sdk_kwargs + ) self.invalidate_cache(_ROOT_PATH) except Exception as e: @@ -1200,7 +1808,9 @@ async def _rm_file(self, path, delimiter="/", **kwargs): async with self.service_client.get_container_client( container=container_name ) as cc: - await cc.delete_blob(path.rstrip(delimiter)) + await cc.delete_blob( + path.rstrip(delimiter), **self._azure_sdk_kwargs + ) elif kind == "directory": await self._rmdir(container_name) else: @@ -1236,7 +1846,9 @@ async def _rmdir(self, path: str, delimiter="/", **kwargs): container_name, path, _ = self.split_path(path, delimiter=delimiter) container_exists = await self._container_exists(container_name) if container_exists and not path: - await self.service_client.delete_container(container_name) + await self.service_client.delete_container( + container_name, **self._azure_sdk_kwargs + ) self.invalidate_cache(_ROOT_PATH) def size(self, path): @@ -1274,7 +1886,9 @@ async def _isfile(self, path): async with self.service_client.get_blob_client( container_name, path ) as bc: - props = await bc.get_blob_properties(version_id=version_id) + props = await bc.get_blob_properties( + version_id=version_id, **self._azure_sdk_kwargs + ) if props["metadata"]["is_directory"] == "false": return True @@ -1336,10 +1950,9 @@ async def _exists(self, path): else: # Empty paths exist by definition return True - async with self.service_client.get_blob_client(container_name, path) as bc: try: - if await bc.exists(version_id=version_id): + if await bc.exists(version_id=version_id, **self._azure_sdk_kwargs): return True except HttpResponseError: if version_id is not None: @@ -1351,7 +1964,7 @@ async def _exists(self, path): container=container_name ) as container_client: async for blob in container_client.list_blobs( - results_per_page=1, name_starts_with=dir_path + results_per_page=1, name_starts_with=dir_path, **self._azure_sdk_kwargs ): return True else: @@ -1384,7 +1997,10 @@ async def _cat_file(self, path, start=None, end=None, **kwargs): ) as bc: try: stream = await bc.download_blob( - offset=start, length=length, version_id=version_id + offset=start, + length=length, + version_id=version_id, + **self._azure_sdk_kwargs, ) except ResourceNotFoundError as e: raise FileNotFoundError from e @@ -1449,7 +2065,6 @@ async def _url(self, path, expires=3600, **kwargs): expiry=datetime.utcnow() + timedelta(seconds=expires), version_id=version_id, ) - async with self.service_client.get_blob_client(container_name, blob) as bc: url = f"{bc.url}?{sas_token}" return url @@ -1549,6 +2164,7 @@ async def _put_file( raw_response_hook=make_callback( "upload_stream_current", callback ), + **self._azure_sdk_kwargs, ) self.invalidate_cache() except ResourceExistsError: @@ -1579,7 +2195,7 @@ async def _cp_file(self, path1, path2, **kwargs): else f"{blobclient1.url}?versionid={version_id}" ) try: - await blobclient2.start_copy_from_url(url) + await blobclient2.start_copy_from_url(url, **self._azure_sdk_kwargs) except ResourceNotFoundError as e: raise FileNotFoundError from e self.invalidate_cache(container1) diff --git a/adlfs/tests/test_spec.py b/adlfs/tests/test_spec.py index 4cbbb5fb..10c33a7f 100644 --- a/adlfs/tests/test_spec.py +++ b/adlfs/tests/test_spec.py @@ -1687,3 +1687,20 @@ async def test_get_file_versioned(storage, mocker): download_blob.side_effect = ResourceNotFoundError with pytest.raises(FileNotFoundError): await fs._get_file("data/root/a/file.txt?versionid=invalid_version", "file.txt") + + +def test_azure_sdk_pass_on(storage, mocker): + from azure.storage.blob.aio import BlobClient + + fs = AzureBlobFileSystem( + account_name=ACCOUNT_NAME, + connection_string=CONN_STR, + timeout=1, + version_id="foo", # excluded from pass on parameters + ) + assert "timeout" in fs._azure_sdk_kwargs + assert "version_id" not in fs._azure_sdk_kwargs + + download_blob = mocker.patch.object(BlobClient, "download_blob") + fs.cat("data/top_file.txt") + assert download_blob.call_args.kwargs["timeout"] == 1 diff --git a/requirements/dev.txt b/requirements/dev.txt index f22829c3..75e3f6e8 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -6,4 +6,5 @@ azure-storage-blob black flake8 isort -pre-commit \ No newline at end of file +pre-commit +docstring-parser \ No newline at end of file diff --git a/tools/azure_sdk_kwargs.py b/tools/azure_sdk_kwargs.py new file mode 100644 index 00000000..1fe45677 --- /dev/null +++ b/tools/azure_sdk_kwargs.py @@ -0,0 +1,150 @@ +""" +Generate keyword argument docstring for pass on parameters to azure sdk +""" + +import azure.storage.blob +import azure.storage.blob.aio +import azure.identity.aio +import io + +import docstring_parser + + +if __name__ == "__main__": + + # List of all invocations from spec.py of azure.storage.blob methods. Explicitly provided arguments to be excluded + # as second item in tuple. + + methods = [ + (azure.identity.aio.DefaultAzureCredential, []), + (azure.storage.blob.aio.BlobServiceClient, ["account_url"]), + ( + azure.storage.blob.ContainerClient.get_container_properties, + [], + ), # via container exists + ( + azure.storage.blob.BlobServiceClient.list_containers, + ["include_metadata"], + ), # via ls + ( + azure.storage.blob.ContainerClient.walk_blobs, + ["include", "name_starts_with"], + ), # via ls + ( + azure.storage.blob.ContainerClient.list_blobs, + ["include", "name_starts_with", "results_per_page"], + ), # via ls + (azure.storage.blob.BlobServiceClient.create_container, ["name"]), # via mkdir + (azure.storage.blob.ContainerClient.delete_blob, ["blob"]), # via rm + ( + azure.storage.blob.BlobServiceClient.delete_container, + ["container"], + ), # via rmdir + ( + azure.storage.blob.BlobClient.get_blob_properties, + ["version_id"], + ), # via isfile + (azure.storage.blob.BlobClient.exists, ["version_id"]), + ( + azure.storage.blob.BlobClient.download_blob, + ["offset", "length", "version_id"], + ), + ( + azure.storage.blob.BlobClient.upload_blob, + ["data", "overwrite", "metadata", "raw_response_hook"], + ), + (azure.storage.blob.BlobClient.start_copy_from_url, ["source_url"]), + ] + + method_params = list( + map(lambda _: (*_, docstring_parser.parse(_[0].__doc__).params), methods) + ) + + # Check correctly listed explicitly provided arguments + # Manually check that missing arguments are due to incorrect docstring or docstring parser. + unkown_args = {} + for _ in method_params: + arg_names = [*map(lambda param: param.arg_name, _[-1])] + for arg_name in _[1]: + if arg_name not in arg_names: + unkown_args[_[0]] = arg_name + + lookup = {} + + # parameters interfering with explicitly provided arguments + excludes = ["version_id", "match_condition", "credential", "results_per_page"] + + basic = ["timeout", "tags"] + + order = {v: n for n, v in enumerate(basic)} + + for _ in method_params: + for param in _[-1]: + if param.arg_name not in _[1] and param.arg_name not in excludes: + lookup.setdefault( + param.arg_name, + {"methods": [], "types": [], "defaults": [], "descriptions": []}, + ) + lookup[param.arg_name]["methods"].append(_[0].__qualname__) + if param.type_name not in lookup[param.arg_name]["types"]: + lookup[param.arg_name]["types"].append( + (param.type_name or "Any").replace("~", "") + ) + if param.default not in lookup[param.arg_name]["defaults"]: + lookup[param.arg_name]["defaults"].append(param.default) + description = param.description + if description not in lookup[param.arg_name]["descriptions"]: + lookup[param.arg_name]["descriptions"].append(description) + + def sort_rule(kv): + prefix1 = str(order.get(kv[0], 999999)).zfill(6) + prefix2 = "10000".zfill(6) + if "BlobServiceClient" in kv[1]["methods"]: + prefix2 = "0".zfill(6) + if "DefaultAzureCredential" in kv[1]["methods"]: + prefix2 = "1".zfill(6) + return f"{prefix1}-{prefix2}-{kv[0]}" + + indent = " " * 4 + + data = { + key: { + "methods": "".join( + [f"\n{2 * indent}- {v}" for v in sorted(value["methods"])] + ), + "type": value["types"][0], + "description": value["descriptions"][0], + } + for key, value in sorted(lookup.items(), key=sort_rule) + } + + def doc_reference(key): + if key == "DefaultAzureCredential": + return "https://learn.microsoft.com/en-us/python/api/azure-identity/?view=azure-python (version 1.12.0)" + return ( + "Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob" + "?view=azure-python (version 12.13.1) " + ) + + docstring = io.StringIO() + for key, value in data.items(): + docstring.write(f"{indent}{key}") + if value["type"] != "Any": + docstring.write(f": {value['type']}") + docstring.write("\n") + if key not in basic: + docstring.write(2 * indent + "Advanced settings parameter.\n") + docstring.write( + 2 * indent + f"Directly passed to azure-sdk calls: {value['methods']}.\n" + ) + docstring.write(2 * indent + f"{doc_reference(key)}:\n") + description = value["description"].replace("\n", f"\n{2 * indent}") + docstring.write(f"{2 * indent}{description}" + "\n") + + print("Use this in to filter azure_sdk_kwargs in AzureBlobFileSystem\n") + + print(list(data.keys())) + + print("\nAppend this to AzureBlobFileSystem docstring\n") + + print(docstring.getvalue()) From 132a30e44c1345f94937e0eafb35432b6862835c Mon Sep 17 00:00:00 2001 From: Peter Barmettler Date: Thu, 1 Dec 2022 16:55:39 +0100 Subject: [PATCH 2/5] exclude instead of include kwargs --- adlfs/spec.py | 71 ++++----------------------------------- tools/azure_sdk_kwargs.py | 2 +- 2 files changed, 8 insertions(+), 65 deletions(-) diff --git a/adlfs/spec.py b/adlfs/spec.py index 2308c1b2..1a0c53ea 100644 --- a/adlfs/spec.py +++ b/adlfs/spec.py @@ -795,68 +795,7 @@ def __init__( k: v for k, v in kwargs.items() if k - in { - "timeout", - "tags", - "api_version", - "max_block_size", - "max_chunk_get_size", - "max_page_size", - "max_single_get_size", - "max_single_put_size", - "min_large_block_upload_threshold", - "secondary_hostname", - "use_byte_buffer", - "authority", - "exclude_cli_credential", - "exclude_environment_credential", - "exclude_managed_identity_credential", - "exclude_powershell_credential", - "exclude_shared_token_cache_credential", - "exclude_visual_studio_code_credential", - "managed_identity_client_id", - "shared_cache_tenant_id", - "shared_cache_username", - "visual_studio_code_tenant_id", - "blob_type", - "container_encryption_scope", - "content_settings", - "cpk", - "delete_snapshots", - "delimiter", - "destination_lease", - "encoding", - "encryption_scope", - "etag", - "if_modified_since", - "if_tags_match_condition", - "if_unmodified_since", - "immutability_policy", - "include_deleted", - "include_system", - "incremental_copy", - "lease", - "legal_hold", - "length", - "max_concurrency", - "maxsize_condition", - "metadata", - "name_starts_with", - "premium_page_blob_tier", - "progress_hook", - "public_access", - "rehydrate_priority", - "requires_sync", - "seal_destination_blob", - "source_authorization", - "source_etag", - "source_if_modified_since", - "source_if_unmodified_since", - "source_lease", - "source_match_condition", - "standard_blob_tier", - "validate_content", - } + not in {"version_id", "match_condition", "credential", "results_per_page"} } # Solving issue in https://github.com/fsspec/adlfs/issues/270 @@ -1242,7 +1181,9 @@ async def _ls_containers(self, return_glob: bool = False): logger.info( "Returning a list of containers in the azure blob storage account" ) - contents = self.service_client.list_containers(include_metadata=True) + contents = self.service_client.list_containers( + include_metadata=True, **self._azure_sdk_kwargs + ) containers = [c async for c in contents] files = await self._details(containers) self.dircache[_ROOT_PATH] = files @@ -1280,7 +1221,9 @@ async def _ls_blobs( "filesystem is not version aware" ) include.append("versions") - blobs = cc.walk_blobs(include=include, name_starts_with=path) + blobs = cc.walk_blobs( + include=include, name_starts_with=path, **self._azure_sdk_kwargs + ) # Check the depth that needs to be screened depth = target_path.count("/") diff --git a/tools/azure_sdk_kwargs.py b/tools/azure_sdk_kwargs.py index 1fe45677..07002abc 100644 --- a/tools/azure_sdk_kwargs.py +++ b/tools/azure_sdk_kwargs.py @@ -143,7 +143,7 @@ def doc_reference(key): print("Use this in to filter azure_sdk_kwargs in AzureBlobFileSystem\n") - print(list(data.keys())) + print(excludes) print("\nAppend this to AzureBlobFileSystem docstring\n") From aae9fd88a418d6bf5a7e587874b7177c7ca5c3ef Mon Sep 17 00:00:00 2001 From: Peter Barmettler Date: Thu, 1 Dec 2022 20:31:36 +0100 Subject: [PATCH 3/5] apply isort --- tools/azure_sdk_kwargs.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tools/azure_sdk_kwargs.py b/tools/azure_sdk_kwargs.py index 07002abc..bba19766 100644 --- a/tools/azure_sdk_kwargs.py +++ b/tools/azure_sdk_kwargs.py @@ -2,14 +2,13 @@ Generate keyword argument docstring for pass on parameters to azure sdk """ -import azure.storage.blob -import azure.storage.blob.aio -import azure.identity.aio import io +import azure.identity.aio +import azure.storage.blob +import azure.storage.blob.aio import docstring_parser - if __name__ == "__main__": # List of all invocations from spec.py of azure.storage.blob methods. Explicitly provided arguments to be excluded From 120f4aa4cad1daa945fee5164b072789584c31ce Mon Sep 17 00:00:00 2001 From: Peter Barmettler Date: Thu, 1 Dec 2022 21:06:20 +0100 Subject: [PATCH 4/5] wrap docstring text --- adlfs/spec.py | 474 +++++++++++++++++++------------------- tools/azure_sdk_kwargs.py | 16 +- 2 files changed, 245 insertions(+), 245 deletions(-) diff --git a/adlfs/spec.py b/adlfs/spec.py index 1a0c53ea..d9a082e8 100644 --- a/adlfs/spec.py +++ b/adlfs/spec.py @@ -165,255 +165,257 @@ class AzureBlobFileSystem(AsyncFileSystem): - ContainerClient.delete_blob - ContainerClient.get_container_properties - ContainerClient.walk_blobs. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : The timeout parameter is expressed in seconds. tags Directly passed to azure-sdk calls: - BlobClient.start_copy_from_url - BlobClient.upload_blob. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): - Name-value pairs associated with the blob as tag. Tags are case-sensitive. - The tag set may contain at most 10 tags. Tag keys must be between 1 and 128 characters, - and tag values must be between 0 and 256 characters. - Valid tag key and value characters include: lowercase and uppercase letters, digits (0-9), - space (` `), plus (+), minus (-), period (.), solidus (/), colon (:), equals (=), underscore (_) - - .. versionadded:: 12.4.0 + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : + Name-value pairs associated with the blob as tag. Tags are case-sensitive.The tag set may contain at most 10 + tags. Tag keys must be between 1 and 128 characters,and tag values must be between 0 and 256 characters.Valid + tag key and value characters include: lowercase and uppercase letters, digits (0-9),space (` `), plus (+), minus + (-), period (.), solidus (/), colon (:), equals (=), underscore (_).. versionadded:: 12.4.0 api_version: str Advanced settings parameter. Directly passed to azure-sdk calls: - BlobServiceClient. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): - The Storage API version to use for requests. Default value is the most recent service version that is - compatible with the current SDK. Setting to an older version may result in reduced feature compatibility. - - .. versionadded:: 12.2.0 + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : + The Storage API version to use for requests. Default value is the most recent service version that iscompatible + with the current SDK. Setting to an older version may result in reduced feature compatibility... versionadded:: + 12.2.0 max_block_size: int Advanced settings parameter. Directly passed to azure-sdk calls: - BlobServiceClient. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): - The maximum chunk size for uploading a block blob in chunks. - Defaults to 4*1024*1024, or 4MB. + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : + The maximum chunk size for uploading a block blob in chunks.Defaults to 4*1024*1024, or 4MB. max_chunk_get_size: int Advanced settings parameter. Directly passed to azure-sdk calls: - BlobServiceClient. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): - The maximum chunk size used for downloading a blob. Defaults to 4*1024*1024, - or 4MB. - - .. admonition:: Example: - - .. literalinclude:: ../samples/blob_samples_authentication_async.py - :start-after: [START create_blob_service_client] - :end-before: [END create_blob_service_client] - :language: python - :dedent: 8 - :caption: Creating the BlobServiceClient with account url and credential. - - .. literalinclude:: ../samples/blob_samples_authentication_async.py - :start-after: [START create_blob_service_client_oauth] - :end-before: [END create_blob_service_client_oauth] - :language: python - :dedent: 8 - :caption: Creating the BlobServiceClient with Azure Identity credentials. + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : + The maximum chunk size used for downloading a blob. Defaults to 4*1024*1024,or 4MB... admonition:: Example: + .. literalinclude:: ../samples/blob_samples_authentication_async.py :start-after: [START + create_blob_service_client] :end-before: [END create_blob_service_client] :language: python + :dedent: 8 :caption: Creating the BlobServiceClient with account url and credential. .. + literalinclude:: ../samples/blob_samples_authentication_async.py :start-after: [START + create_blob_service_client_oauth] :end-before: [END create_blob_service_client_oauth] :language: + python :dedent: 8 :caption: Creating the BlobServiceClient with Azure Identity credentials. max_page_size: int Advanced settings parameter. Directly passed to azure-sdk calls: - BlobServiceClient. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : The maximum chunk size for uploading a page blob. Defaults to 4*1024*1024, or 4MB. max_single_get_size: int Advanced settings parameter. Directly passed to azure-sdk calls: - BlobServiceClient. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): - The maximum size for a blob to be downloaded in a single call, - the exceeded part will be downloaded in chunks (could be parallel). Defaults to 32*1024*1024, or 32MB. + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : + The maximum size for a blob to be downloaded in a single call,the exceeded part will be downloaded in chunks + (could be parallel). Defaults to 32*1024*1024, or 32MB. max_single_put_size: int Advanced settings parameter. Directly passed to azure-sdk calls: - BlobServiceClient. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): - If the blob size is less than or equal max_single_put_size, then the blob will be - uploaded with only one http PUT request. If the blob size is larger than max_single_put_size, - the blob will be uploaded in chunks. Defaults to 64*1024*1024, or 64MB. + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : + If the blob size is less than or equal max_single_put_size, then the blob will beuploaded with only one http PUT + request. If the blob size is larger than max_single_put_size,the blob will be uploaded in chunks. Defaults to + 64*1024*1024, or 64MB. min_large_block_upload_threshold: int Advanced settings parameter. Directly passed to azure-sdk calls: - BlobServiceClient. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): - The minimum chunk size required to use the memory efficient - algorithm when uploading a block blob. Defaults to 4*1024*1024+1. + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : + The minimum chunk size required to use the memory efficientalgorithm when uploading a block blob. Defaults to + 4*1024*1024+1. secondary_hostname: str Advanced settings parameter. Directly passed to azure-sdk calls: - BlobServiceClient. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : The hostname of the secondary endpoint. use_byte_buffer: bool Advanced settings parameter. Directly passed to azure-sdk calls: - BlobServiceClient. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : Use a byte buffer for block blob uploads. Defaults to False. authority: str Advanced settings parameter. Directly passed to azure-sdk calls: - DefaultAzureCredential. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): - Authority of an Azure Active Directory endpoint, for example 'login.microsoftonline.com', - the authority for Azure Public Cloud (which is the default). :class:`~azure.identity.AzureAuthorityHosts` - defines authorities for other clouds. Managed identities ignore this because they reside in a single cloud. + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : + Authority of an Azure Active Directory endpoint, for example 'login.microsoftonline.com',the authority for Azure + Public Cloud (which is the default). :class:`~azure.identity.AzureAuthorityHosts`defines authorities for other + clouds. Managed identities ignore this because they reside in a single cloud. exclude_cli_credential: bool Advanced settings parameter. Directly passed to azure-sdk calls: - DefaultAzureCredential. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : Whether to exclude the Azure CLI from the credential. Defaults to **False**. exclude_environment_credential: bool Advanced settings parameter. Directly passed to azure-sdk calls: - DefaultAzureCredential. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): - Whether to exclude a service principal configured by environment - variables from the credential. Defaults to **False**. + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : + Whether to exclude a service principal configured by environmentvariables from the credential. Defaults to + **False**. exclude_managed_identity_credential: bool Advanced settings parameter. Directly passed to azure-sdk calls: - DefaultAzureCredential. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): - Whether to exclude managed identity from the credential. - Defaults to **False**. + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : + Whether to exclude managed identity from the credential.Defaults to **False**. exclude_powershell_credential: bool Advanced settings parameter. Directly passed to azure-sdk calls: - DefaultAzureCredential. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : Whether to exclude Azure PowerShell. Defaults to **False**. exclude_shared_token_cache_credential: bool Advanced settings parameter. Directly passed to azure-sdk calls: - DefaultAzureCredential. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): - Whether to exclude the shared token cache. Defaults to - **False**. + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : + Whether to exclude the shared token cache. Defaults to**False**. exclude_visual_studio_code_credential: bool Advanced settings parameter. Directly passed to azure-sdk calls: - DefaultAzureCredential. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): - Whether to exclude stored credential from VS Code. - Defaults to **True**. + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : + Whether to exclude stored credential from VS Code.Defaults to **True**. managed_identity_client_id: str Advanced settings parameter. Directly passed to azure-sdk calls: - DefaultAzureCredential. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): - The client ID of a user-assigned managed identity. Defaults to the value - of the environment variable AZURE_CLIENT_ID, if any. If not specified, a system-assigned identity will be used. + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : + The client ID of a user-assigned managed identity. Defaults to the valueof the environment variable + AZURE_CLIENT_ID, if any. If not specified, a system-assigned identity will be used. shared_cache_tenant_id: str Advanced settings parameter. Directly passed to azure-sdk calls: - DefaultAzureCredential. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): - Preferred tenant for :class:`~azure.identity.aio.SharedTokenCacheCredential`. - Defaults to the value of environment variable AZURE_TENANT_ID, if any. + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : + Preferred tenant for :class:`~azure.identity.aio.SharedTokenCacheCredential`.Defaults to the value of + environment variable AZURE_TENANT_ID, if any. shared_cache_username: str Advanced settings parameter. Directly passed to azure-sdk calls: - DefaultAzureCredential. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): - Preferred username for :class:`~azure.identity.aio.SharedTokenCacheCredential`. - Defaults to the value of environment variable AZURE_USERNAME, if any. + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : + Preferred username for :class:`~azure.identity.aio.SharedTokenCacheCredential`.Defaults to the value of + environment variable AZURE_USERNAME, if any. visual_studio_code_tenant_id: str Advanced settings parameter. Directly passed to azure-sdk calls: - DefaultAzureCredential. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): - Tenant ID to use when authenticating with - :class:`~azure.identity.aio.VisualStudioCodeCredential`. Defaults to the "Azure: Tenant" setting in VS Code's - user settings or, when that setting has no value, the "organizations" tenant, which supports only Azure Active - Directory work or school accounts. + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : + Tenant ID to use when authenticating with:class:`~azure.identity.aio.VisualStudioCodeCredential`. Defaults to + the "Azure: Tenant" setting in VS Code'suser settings or, when that setting has no value, the "organizations" + tenant, which supports only Azure ActiveDirectory work or school accounts. blob_type: azure.storage.blob.BlobType Advanced settings parameter. Directly passed to azure-sdk calls: - BlobClient.upload_blob. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): - The type of the blob. This can be - either BlockBlob, PageBlob or AppendBlob. The default value is BlockBlob. + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : + The type of the blob. This can beeither BlockBlob, PageBlob or AppendBlob. The default value is BlockBlob. container_encryption_scope Advanced settings parameter. Directly passed to azure-sdk calls: - BlobServiceClient.create_container. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): - Specifies the default encryption scope to set on the container and use for - all future writes. - - .. versionadded:: 12.2.0 + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : + Specifies the default encryption scope to set on the container and use forall future writes... versionadded:: + 12.2.0 content_settings: azure.storage.blob.ContentSettings Advanced settings parameter. Directly passed to azure-sdk calls: - BlobClient.upload_blob. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): - ContentSettings object used to set blob properties. Used to set content type, encoding, - language, disposition, md5, and cache control. + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : + ContentSettings object used to set blob properties. Used to set content type, encoding,language, disposition, + md5, and cache control. cpk: azure.storage.blob.CustomerProvidedEncryptionKey Advanced settings parameter. Directly passed to azure-sdk calls: - BlobClient.download_blob - BlobClient.get_blob_properties - BlobClient.upload_blob. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): - Encrypts the data on the service-side with the given key. - Use of customer-provided keys must be done over HTTPS. - As the encryption key itself is provided in the request, - a secure connection must be established to transfer the key. + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : + Encrypts the data on the service-side with the given key.Use of customer-provided keys must be done over + HTTPS.As the encryption key itself is provided in the request,a secure connection must be established to + transfer the key. delete_snapshots: str Advanced settings parameter. Directly passed to azure-sdk calls: - ContainerClient.delete_blob. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): - Required if the blob has associated snapshots. Values include: - - "only": Deletes only the blobs snapshots. - - "include": Deletes the blob along with all snapshots. + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : + Required if the blob has associated snapshots. Values include:- "only": Deletes only the blobs snapshots.- + "include": Deletes the blob along with all snapshots. delimiter: str Advanced settings parameter. Directly passed to azure-sdk calls: - ContainerClient.walk_blobs. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): - When the request includes this parameter, the operation returns a BlobPrefix - element in the response body that acts as a placeholder for all blobs whose - names begin with the same substring up to the appearance of the delimiter - character. The delimiter may be a single character or a string. + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : + When the request includes this parameter, the operation returns a BlobPrefixelement in the response body that + acts as a placeholder for all blobs whosenames begin with the same substring up to the appearance of the + delimitercharacter. The delimiter may be a single character or a string. destination_lease Advanced settings parameter. Directly passed to azure-sdk calls: - BlobClient.start_copy_from_url. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): - The lease ID specified for this header must match the lease ID of the - destination blob. If the request does not include the lease ID or it is not - valid, the operation fails with status code 412 (Precondition Failed). + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : + The lease ID specified for this header must match the lease ID of thedestination blob. If the request does not + include the lease ID or it is notvalid, the operation fails with status code 412 (Precondition Failed). encoding: str Advanced settings parameter. Directly passed to azure-sdk calls: - BlobClient.download_blob - BlobClient.upload_blob. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : Encoding to decode the downloaded bytes. Default is None, i.e. no decoding. encryption_scope: str Advanced settings parameter. Directly passed to azure-sdk calls: - BlobClient.start_copy_from_url - BlobClient.upload_blob. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): - A predefined encryption scope used to encrypt the data on the service. An encryption - scope can be created using the Management API and referenced here by name. If a default - encryption scope has been defined at the container, this value will override it if the - container-level scope is configured to allow overrides. Otherwise an error will be raised. - - .. versionadded:: 12.2.0 + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : + A predefined encryption scope used to encrypt the data on the service. An encryptionscope can be created using + the Management API and referenced here by name. If a defaultencryption scope has been defined at the container, + this value will override it if thecontainer-level scope is configured to allow overrides. Otherwise an error + will be raised... versionadded:: 12.2.0 etag: str Advanced settings parameter. Directly passed to azure-sdk calls: @@ -423,9 +425,10 @@ class AzureBlobFileSystem(AsyncFileSystem): - BlobClient.upload_blob - BlobServiceClient.delete_container - ContainerClient.delete_blob. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): - An ETag value, or the wildcard character (*). Used to check if the resource has changed, - and act according to the condition specified by the `match_condition` parameter. + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : + An ETag value, or the wildcard character (*). Used to check if the resource has changed,and act according to the + condition specified by the `match_condition` parameter. if_modified_since: datetime.datetime Advanced settings parameter. Directly passed to azure-sdk calls: @@ -435,12 +438,11 @@ class AzureBlobFileSystem(AsyncFileSystem): - BlobClient.upload_blob - BlobServiceClient.delete_container - ContainerClient.delete_blob. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only - if the resource has been modified since the specified time. + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : + A DateTime value. Azure expects the date value passed in to be UTC.If timezone is included, any non-UTC + datetimes will be converted to UTC.If a date is passed in without timezone info, it is assumed to be UTC.Specify + this header to perform the operation onlyif the resource has been modified since the specified time. if_tags_match_condition: str Advanced settings parameter. Directly passed to azure-sdk calls: @@ -448,11 +450,10 @@ class AzureBlobFileSystem(AsyncFileSystem): - BlobClient.get_blob_properties - BlobClient.upload_blob - ContainerClient.delete_blob. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): - Specify a SQL where clause on blob tags to operate only on blob with a matching value. - eg. ``"\"tagname\"='my tag'"`` - - .. versionadded:: 12.4.0 + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : + Specify a SQL where clause on blob tags to operate only on blob with a matching value.eg. ``"\"tagname\"='my + tag'"``.. versionadded:: 12.4.0 if_unmodified_since: datetime.datetime Advanced settings parameter. Directly passed to azure-sdk calls: @@ -462,48 +463,46 @@ class AzureBlobFileSystem(AsyncFileSystem): - BlobClient.upload_blob - BlobServiceClient.delete_container - ContainerClient.delete_blob. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this header to perform the operation only if - the resource has not been modified since the specified date/time. + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : + A DateTime value. Azure expects the date value passed in to be UTC.If timezone is included, any non-UTC + datetimes will be converted to UTC.If a date is passed in without timezone info, it is assumed to be UTC.Specify + this header to perform the operation only ifthe resource has not been modified since the specified date/time. immutability_policy: azure.storage.blob.ImmutabilityPolicy Advanced settings parameter. Directly passed to azure-sdk calls: - BlobClient.start_copy_from_url - BlobClient.upload_blob. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): - Specifies the immutability policy of a blob, blob snapshot or blob version. - Currently this parameter of upload_blob() API is for BlockBlob only. - - .. versionadded:: 12.10.0 - This was introduced in API version '2020-10-02'. + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : + Specifies the immutability policy of a blob, blob snapshot or blob version.Currently this parameter of + upload_blob() API is for BlockBlob only... versionadded:: 12.10.0 This was introduced in API version + '2020-10-02'. include_deleted: bool Advanced settings parameter. Directly passed to azure-sdk calls: - BlobServiceClient.list_containers. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): - Specifies that deleted containers to be returned in the response. This is for container restore enabled - account. The default value is `False`. - .. versionadded:: 12.4.0 + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : + Specifies that deleted containers to be returned in the response. This is for container restore enabledaccount. + The default value is `False`... versionadded:: 12.4.0 include_system: bool Advanced settings parameter. Directly passed to azure-sdk calls: - BlobServiceClient.list_containers. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): - Flag specifying that system containers should be included. - .. versionadded:: 12.10.0 + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : + Flag specifying that system containers should be included... versionadded:: 12.10.0 incremental_copy: bool Advanced settings parameter. Directly passed to azure-sdk calls: - BlobClient.start_copy_from_url. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): - Copies the snapshot of the source page blob to a destination page blob. - The snapshot is copied such that only the differential changes between - the previously copied snapshot are transferred to the destination. - The copied snapshots are complete copies of the original snapshot and - can be read or copied from as usual. Defaults to False. + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : + Copies the snapshot of the source page blob to a destination page blob.The snapshot is copied such that only the + differential changes betweenthe previously copied snapshot are transferred to the destination.The copied + snapshots are complete copies of the original snapshot andcan be read or copied from as usual. Defaults to + False. lease Advanced settings parameter. Directly passed to azure-sdk calls: @@ -513,181 +512,176 @@ class AzureBlobFileSystem(AsyncFileSystem): - BlobServiceClient.delete_container - ContainerClient.delete_blob - ContainerClient.get_container_properties. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): - If specified, get_container_properties only succeeds if the - container's lease is active and matches this ID. + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : + If specified, get_container_properties only succeeds if thecontainer's lease is active and matches this ID. legal_hold: bool Advanced settings parameter. Directly passed to azure-sdk calls: - BlobClient.start_copy_from_url - BlobClient.upload_blob. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): - Specified if a legal hold should be set on the blob. - Currently this parameter of upload_blob() API is for BlockBlob only. - - .. versionadded:: 12.10.0 - This was introduced in API version '2020-10-02'. + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : + Specified if a legal hold should be set on the blob.Currently this parameter of upload_blob() API is for + BlockBlob only... versionadded:: 12.10.0 This was introduced in API version '2020-10-02'. length: int Advanced settings parameter. Directly passed to azure-sdk calls: - BlobClient.upload_blob. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): - Number of bytes to read from the stream. This is optional, but - should be supplied for optimal performance. + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : + Number of bytes to read from the stream. This is optional, butshould be supplied for optimal performance. max_concurrency: int Advanced settings parameter. Directly passed to azure-sdk calls: - BlobClient.download_blob - BlobClient.upload_blob. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : The number of parallel connections with which to download. maxsize_condition: int Advanced settings parameter. Directly passed to azure-sdk calls: - BlobClient.upload_blob. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): - Optional conditional header. The max length in bytes permitted for - the append blob. If the Append Block operation would cause the blob - to exceed that limit or if the blob size is already greater than the - value specified in this header, the request will fail with - MaxBlobSizeConditionNotMet error (HTTP status code 412 - Precondition Failed). + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : + Optional conditional header. The max length in bytes permitted forthe append blob. If the Append Block operation + would cause the blobto exceed that limit or if the blob size is already greater than thevalue specified in this + header, the request will fail withMaxBlobSizeConditionNotMet error (HTTP status code 412 - Precondition Failed). metadata: dict(str, str) Advanced settings parameter. Directly passed to azure-sdk calls: - BlobClient.start_copy_from_url - BlobServiceClient.create_container. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): - A dict with name-value pairs to associate with the - container as metadata. Example: `{'Category':'test'}` + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : + A dict with name-value pairs to associate with thecontainer as metadata. Example: `{'Category':'test'}` name_starts_with: str Advanced settings parameter. Directly passed to azure-sdk calls: - BlobServiceClient.list_containers. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): - Filters the results to return only containers whose names - begin with the specified prefix. + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : + Filters the results to return only containers whose namesbegin with the specified prefix. premium_page_blob_tier: azure.storage.blob.PremiumPageBlobTier Advanced settings parameter. Directly passed to azure-sdk calls: - BlobClient.start_copy_from_url - BlobClient.upload_blob. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): - A page blob tier value to set the blob to. The tier correlates to the size of the - blob and number of allowed IOPS. This is only applicable to page blobs on - premium storage accounts. + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : + A page blob tier value to set the blob to. The tier correlates to the size of theblob and number of allowed + IOPS. This is only applicable to page blobs onpremium storage accounts. progress_hook Advanced settings parameter. Directly passed to azure-sdk calls: - BlobClient.download_blob - BlobClient.upload_blob. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): - A callback to track the progress of a long running download. The signature is - function(current: int, total: int) where current is the number of bytes transfered - so far, and total is the total size of the download. + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : + A callback to track the progress of a long running download. The signature isfunction(current: int, total: int) + where current is the number of bytes transferedso far, and total is the total size of the download. public_access: str or azure.storage.blob.PublicAccess Advanced settings parameter. Directly passed to azure-sdk calls: - BlobServiceClient.create_container. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : Possible values include: 'container', 'blob'. rehydrate_priority: azure.storage.blob.RehydratePriority Advanced settings parameter. Directly passed to azure-sdk calls: - BlobClient.start_copy_from_url. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : Indicates the priority with which to rehydrate an archived blob requires_sync: bool Advanced settings parameter. Directly passed to azure-sdk calls: - BlobClient.start_copy_from_url. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : Enforces that the service will not return a response until the copy is complete. - results_per_page: int - Advanced settings parameter. - Directly passed to azure-sdk calls: - - BlobServiceClient.list_containers. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): - The maximum number of container names to retrieve per API - call. If the request does not specify the server will return up to 5,000 items. seal_destination_blob: bool Advanced settings parameter. Directly passed to azure-sdk calls: - BlobClient.start_copy_from_url. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): - Seal the destination append blob. This operation is only for append blob. - .. versionadded:: 12.4.0 + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : + Seal the destination append blob. This operation is only for append blob... versionadded:: 12.4.0 source_authorization: str Advanced settings parameter. Directly passed to azure-sdk calls: - BlobClient.start_copy_from_url. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): - Authenticate as a service principal using a client secret to access a source blob. Ensure "bearer " is - the prefix of the source_authorization string. This option is only available when `incremental_copy` is - set to False and `requires_sync` is set to True. - - .. versionadded:: 12.9.0 + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : + Authenticate as a service principal using a client secret to access a source blob. Ensure "bearer " isthe prefix + of the source_authorization string. This option is only available when `incremental_copy` isset to False and + `requires_sync` is set to True... versionadded:: 12.9.0 source_etag: str Advanced settings parameter. Directly passed to azure-sdk calls: - BlobClient.start_copy_from_url. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): - The source ETag value, or the wildcard character (*). Used to check if the resource has changed, - and act according to the condition specified by the `match_condition` parameter. + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : + The source ETag value, or the wildcard character (*). Used to check if the resource has changed,and act + according to the condition specified by the `match_condition` parameter. source_if_modified_since: datetime.datetime Advanced settings parameter. Directly passed to azure-sdk calls: - BlobClient.start_copy_from_url. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this conditional header to copy the blob only if the source - blob has been modified since the specified date/time. + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : + A DateTime value. Azure expects the date value passed in to be UTC.If timezone is included, any non-UTC + datetimes will be converted to UTC.If a date is passed in without timezone info, it is assumed to be UTC.Specify + this conditional header to copy the blob only if the sourceblob has been modified since the specified date/time. source_if_unmodified_since: datetime.datetime Advanced settings parameter. Directly passed to azure-sdk calls: - BlobClient.start_copy_from_url. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): - A DateTime value. Azure expects the date value passed in to be UTC. - If timezone is included, any non-UTC datetimes will be converted to UTC. - If a date is passed in without timezone info, it is assumed to be UTC. - Specify this conditional header to copy the blob only if the source blob - has not been modified since the specified date/time. + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : + A DateTime value. Azure expects the date value passed in to be UTC.If timezone is included, any non-UTC + datetimes will be converted to UTC.If a date is passed in without timezone info, it is assumed to be UTC.Specify + this conditional header to copy the blob only if the source blobhas not been modified since the specified + date/time. source_lease Advanced settings parameter. Directly passed to azure-sdk calls: - BlobClient.start_copy_from_url. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): - Specify this to perform the Copy Blob operation only if - the lease ID given matches the active lease ID of the source blob. + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : + Specify this to perform the Copy Blob operation only ifthe lease ID given matches the active lease ID of the + source blob. source_match_condition: azure.core.MatchConditions Advanced settings parameter. Directly passed to azure-sdk calls: - BlobClient.start_copy_from_url. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : The source match condition to use upon the etag. standard_blob_tier: azure.storage.blob.StandardBlobTier Advanced settings parameter. Directly passed to azure-sdk calls: - BlobClient.start_copy_from_url - BlobClient.upload_blob. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): - A standard blob tier value to set the blob to. For this version of the library, - this is only applicable to block blobs on standard storage accounts. + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : + A standard blob tier value to set the blob to. For this version of the library,this is only applicable to block + blobs on standard storage accounts. validate_content: bool Advanced settings parameter. Directly passed to azure-sdk calls: - BlobClient.download_blob - BlobClient.upload_blob. - Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python (version 12.13.1): - If true, calculates an MD5 hash for each chunk of the blob. The storage - service checks the hash of the content that has arrived with the hash - that was sent. This is primarily valuable for detecting bitflips on - the wire if using http instead of https, as https (the default), will - already validate. Note that this MD5 hash is not stored with the - blob. Also note that if enabled, the memory-efficient upload algorithm - will not be used because computing the MD5 hash requires buffering - entire blocks, and doing so defeats the purpose of the memory-efficient algorithm. + Description from version 12.13.1 + https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python : + If true, calculates an MD5 hash for each chunk of the blob. The storageservice checks the hash of the content + that has arrived with the hashthat was sent. This is primarily valuable for detecting bitflips onthe wire if + using http instead of https, as https (the default), willalready validate. Note that this MD5 hash is not stored + with theblob. Also note that if enabled, the memory-efficient upload algorithmwill not be used because computing + the MD5 hash requires bufferingentire blocks, and doing so defeats the purpose of the memory-efficient + algorithm. Examples -------- diff --git a/tools/azure_sdk_kwargs.py b/tools/azure_sdk_kwargs.py index bba19766..fc13e3d8 100644 --- a/tools/azure_sdk_kwargs.py +++ b/tools/azure_sdk_kwargs.py @@ -8,6 +8,7 @@ import azure.storage.blob import azure.storage.blob.aio import docstring_parser +import textwrap if __name__ == "__main__": @@ -119,10 +120,13 @@ def sort_rule(kv): def doc_reference(key): if key == "DefaultAzureCredential": - return "https://learn.microsoft.com/en-us/python/api/azure-identity/?view=azure-python (version 1.12.0)" + return ( + f"Description from version 1.12.0\n{2*indent}https://learn.microsoft.com/en-us/python/api/azure" + f"-identity/?view=azure-python" + ) return ( - "Description from https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob" - "?view=azure-python (version 12.13.1) " + f"Description from version 12.13.1\n{2*indent}https://learn.microsoft.com/en-us/python/api/azure" + f"-storage-blob/azure.storage.blob?view=azure-python " ) docstring = io.StringIO() @@ -137,8 +141,10 @@ def doc_reference(key): 2 * indent + f"Directly passed to azure-sdk calls: {value['methods']}.\n" ) docstring.write(2 * indent + f"{doc_reference(key)}:\n") - description = value["description"].replace("\n", f"\n{2 * indent}") - docstring.write(f"{2 * indent}{description}" + "\n") + description = value["description"].replace("\n", "") + max_len = 144 - 8 * 4 + for line in textwrap.wrap(description, max_len): + docstring.write(f"{2 * indent}{line}" + "\n") print("Use this in to filter azure_sdk_kwargs in AzureBlobFileSystem\n") From 938be17e77e704602bb04660a54911688db36853 Mon Sep 17 00:00:00 2001 From: Peter Barmettler Date: Thu, 1 Dec 2022 21:13:55 +0100 Subject: [PATCH 5/5] isort --- tools/azure_sdk_kwargs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/azure_sdk_kwargs.py b/tools/azure_sdk_kwargs.py index fc13e3d8..5ea22007 100644 --- a/tools/azure_sdk_kwargs.py +++ b/tools/azure_sdk_kwargs.py @@ -3,12 +3,12 @@ """ import io +import textwrap import azure.identity.aio import azure.storage.blob import azure.storage.blob.aio import docstring_parser -import textwrap if __name__ == "__main__":