Skip to content

Commit a4e2baf

Browse files
authored
Merge pull request #86 from pinellolab/v218
Fix complete-test to reflect latest updates
2 parents 5a42185 + 7ddc78b commit a4e2baf

File tree

1 file changed

+38
-13
lines changed

1 file changed

+38
-13
lines changed

PostProcess/complete_test.py

Lines changed: 38 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -286,34 +286,59 @@ def download_annotation_data() -> Tuple[str, str]:
286286
)
287287
return gencode, encode
288288

289+
def _bgzip_ann_data(ann_fname: str) -> str:
290+
"""
291+
Compress an annotation file using bgzip and verify the output.
292+
293+
This function calls the bgzip utility to compress the specified annotation file,
294+
checks that the compressed file exists, and returns its path. It raises an error
295+
if the compression fails.
296+
297+
Args:
298+
ann_fname (str): The path to the annotation file to be compressed.
299+
300+
Returns:
301+
str: The path to the compressed annotation file.
302+
303+
Raises:
304+
subprocess.SubprocessError: If bgzip compression fails.
305+
"""
306+
307+
try:
308+
subprocess.call(f"bgzip -f {ann_fname}", shell=True)
309+
except (subprocess.SubprocessError, Exception) as e:
310+
raise subprocess.SubprocessError(f"Bgzip compression failed on {ann_fname}") from e
311+
ann_fname_gz = f"{ann_fname}.gz"
312+
assert os.path.isfile(ann_fname_gz) # check that the bgzipped bed exists
313+
return ann_fname_gz
314+
315+
289316

290317
def _retrieve_ann_data(annotation_dir: str, url: str, fname: str) -> str:
291-
"""Retrieve and validate annotation data from a specified URL.
318+
"""
319+
Download and extract an annotation file, then compress it with bgzip.
292320
293-
This function downloads annotation data from a given URL, verifies the
294-
integrity of the downloaded file using its MD5 checksum, and extracts the
295-
relevant file to the specified annotation directory. It raises an error if
296-
the checksum does not match.
321+
This function downloads an annotation archive, verifies its integrity, extracts
322+
the specified file, and compresses it using bgzip. It returns the path to the
323+
compressed annotation file.
297324
298325
Args:
299-
annotation_dir (str): The directory where the annotation data will be
300-
stored.
301-
url (str): The URL from which to download the annotation data.
302-
fname (str): The name of the file to retrieve after extraction.
326+
annotation_dir (str): The directory to store the annotation data.
327+
url (str): The URL of the annotation archive to download.
328+
fname (str): The name of the file to extract and compress.
303329
304330
Returns:
305-
str: The path to the extracted annotation file.
331+
str: The path to the compressed annotation file.
306332
307333
Raises:
308-
ValueError: If the MD5 checksum of the downloaded file does not match
309-
the expected value.
334+
ValueError: If the downloaded file fails the MD5 check.
310335
"""
311336

312337
# download gencode annotation
313338
annfile_tar = download(annotation_dir, http_url=os.path.join(TESTDATAURL, url))
314339
if MD5ANNOTATION[os.path.basename(annfile_tar)] != compute_md5(annfile_tar):
315340
raise ValueError(f"Download for {os.path.basename(annfile_tar)} failed")
316-
return os.path.join(untar(annfile_tar, annotation_dir), fname)
341+
return _bgzip_ann_data(os.path.join(untar(annfile_tar, annotation_dir), fname))
317342

318343

319344
def ensure_pams_directory(dest: str) -> str:

0 commit comments

Comments
 (0)