updates

anjaliratnam-msft · anjaliratnam-msft · commit df76154f8682 · 2025-08-19T14:33:28.000-07:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -10,6 +10,7 @@ Unreleased
 - The block size is now used for partitioned uploads. Previously, 1 GiB was used for each uploaded block irrespective of the block size  
 - Updated default block size to be 50 MiB. Set `blocksize` for `AzureBlobFileSystem` or `block_size` when opening `AzureBlobFile` to revert back to 5 MiB default. 
 - `AzureBlobFile` now inherits the block size from `AzureBlobFileSystem` when fs.open() is called and a block_size is not passed in.
+- Added concurrency for `_async_upload_chunk`. Can be set using `max_concurrency` for `AzureBlobFileSystem`.
 
 
 2024.12.0
diff --git a/adlfs/spec.py b/adlfs/spec.py
@@ -2156,6 +2156,15 @@ def _get_chunks(self, data):
             yield data[start:end]
             start = end
 
+    async def _upload(self, chunk, block_id, semaphore):
+        async with semaphore:
+            async with self.container_client.get_blob_client(blob=self.blob) as bc:
+                await bc.stage_block(
+                    block_id=block_id,
+                    data=chunk,
+                    length=len(chunk),
+                )
+
     async def _async_upload_chunk(
         self, final: bool = False, max_concurrency=None, **kwargs
     ):
@@ -2180,28 +2189,20 @@ async def _async_upload_chunk(
                 max_concurrency = max_concurrency or self.fs.max_concurrency or 1
                 semaphore = asyncio.Semaphore(max_concurrency)
                 tasks = []
-                block_ids = []
+                block_ids = self._block_list or []
+                start_idx = len(block_ids)
                 chunks = list(self._get_chunks(data))
                 for _ in range(len(chunks)):
                     block_ids.append(block_id)
                     block_id = self._get_block_id(block_ids)
+
                 if chunks:
                     self._block_list = block_ids
-                for chunk, block_id in zip(chunks, block_ids):
-
-                    async def _upload_chunk(chunk=chunk, block_id=block_id):
-                        async with semaphore:
-                            async with self.container_client.get_blob_client(
-                                blob=self.blob
-                            ) as bc:
-                                await bc.stage_block(
-                                    block_id=block_id,
-                                    data=chunk,
-                                    length=len(chunk),
-                                )
-
-                    tasks.append(_upload_chunk())
+                for chunk, block_id in zip(chunks, block_ids[start_idx:]):
+                    tasks.append(self._upload(chunk, block_id, semaphore))
+
                 await asyncio.gather(*tasks)
+
                 if final:
                     block_list = [BlobBlock(_id) for _id in self._block_list]
                     async with self.container_client.get_blob_client(
diff --git a/adlfs/tests/test_spec.py b/adlfs/tests/test_spec.py
@@ -2142,20 +2142,26 @@ def test_blobfile_default_blocksize(storage):
     assert f.blocksize == 50 * 2**20
 
 
-@pytest.mark.parametrize("max_concurrency", [1, 2, 4, 8])
-def test_large_blob_max_concurrency(storage, max_concurrency):
+@pytest.mark.parametrize(
+    "max_concurrency, blob_size",
+    [
+        (1, 51 * 2**20),
+        (4, 200 * 2**20),
+        (4, 49 * 2**20),
+    ],
+)
+def test_max_concurrency(storage, max_concurrency, blob_size):
     fs = AzureBlobFileSystem(
         account_name=storage.account_name,
         connection_string=CONN_STR,
         max_concurrency=max_concurrency,
     )
-    blob_size = 1_120_000_000
     data = os.urandom(blob_size)
     fs.mkdir("large-file-container")
-    path = "large-file-container/blob.bin"
+    path = "large-file-container/blob.txt"
 
-    with fs.open(path, "wb") as dst:
-        dst.write(data)
+    with fs.open(path, "wb") as f:
+        f.write(data)
 
     assert fs.exists(path)
     assert fs.size(path) == blob_size