From dbd38f9cc62d680ec4b4067a6090e618dc0872a6 Mon Sep 17 00:00:00 2001 From: Emma Cooke Date: Tue, 22 Aug 2023 10:36:51 +0100 Subject: [PATCH 1/2] add blast step to rfblast.py --- scripts/validation/rfblast.py | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/scripts/validation/rfblast.py b/scripts/validation/rfblast.py index 2d148b3e..8143f6f7 100755 --- a/scripts/validation/rfblast.py +++ b/scripts/validation/rfblast.py @@ -106,8 +106,11 @@ def generate_new_seed(fasta): filename = 'replacement.fasta' with open(filename, 'w') as f: f.write(fasta) - if not os.path.exists('CM') or not os.path.exists('SEED'): - raise Exception('Error: CM or SEED files not found') + if not os.path.exists('CM'): + cmd = 'rfsearch.pl -t 30 -nodesc -relax' + os.system(cmd) + if not os.path.exists('SEED'): + raise Exception('Error: SEED file not found') cmd = ('cmalign --mapali SEED --noprob CM {} > tempseed && ' 'esl-reformat pfam tempseed > NEWSEEDtemp && ' 'rm tempseed').format(filename) @@ -219,6 +222,26 @@ def validate(): os.system(cmd) parse_fasta(fasta) +@cli.command() +def blast_invalid_sequences(file_path, blast_program='blastn'): + """ + Upload the file `invalid.fa` to NCBI BLAST and download results in the `Single-file JSON` format + """ + + blast_url = f'https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Put&PROGRAM={blast_program}' + + with open(file_path, 'rb') as file: + file_content = file.read() + + payload = { + 'QUERY': file_content, + 'FORMAT_TYPE': 'HTML', + } + + response = requests.post(blast_url, data=payload) + return response.text + + @cli.command() @click.argument('blast_json', type=click.Path(exists=True)) @@ -228,12 +251,16 @@ def replace(blast_json, identity, query_coverage): """ Replace unknown accessions in SEED alignment using NCBI BLAST results """ - filename = blast_json - blast_data = get_blast_data(filename) + blast_data = get_blast_data(blast_json) fasta = choose_replacement(blast_data, identity, query_coverage) generate_new_seed(fasta) click.echo('Done') +@cli.command() +def all(): + validate() + blast_result = blast_invalid_sequences('invalid.fa') + replace(blast_json=blast_result) if __name__ == '__main__': cli() From d7aa3948267456458ce861cc5af1615066fcbba8 Mon Sep 17 00:00:00 2001 From: Emma Cooke Date: Fri, 25 Aug 2023 13:49:01 +0100 Subject: [PATCH 2/2] Update rfblast.py Add Dockerfile, poetry Move to new folder Add step to perform BLAST search on invalid sequences --- scripts/rfblast/Dockerfile | 34 +++ scripts/rfblast/README.md | 0 scripts/rfblast/invalid.fa | 3 + scripts/rfblast/poetry.lock | 372 +++++++++++++++++++++++++++++++++ scripts/rfblast/pyproject.toml | 21 ++ scripts/rfblast/rfblast.py | 314 ++++++++++++++++++++++++++++ scripts/validation/rfblast.py | 266 ----------------------- 7 files changed, 744 insertions(+), 266 deletions(-) create mode 100644 scripts/rfblast/Dockerfile create mode 100644 scripts/rfblast/README.md create mode 100644 scripts/rfblast/invalid.fa create mode 100644 scripts/rfblast/poetry.lock create mode 100644 scripts/rfblast/pyproject.toml create mode 100755 scripts/rfblast/rfblast.py delete mode 100755 scripts/validation/rfblast.py diff --git a/scripts/rfblast/Dockerfile b/scripts/rfblast/Dockerfile new file mode 100644 index 00000000..2200b84b --- /dev/null +++ b/scripts/rfblast/Dockerfile @@ -0,0 +1,34 @@ +FROM python:3.9 + +ENV RNA /usr/src/rfam +ENV INFERNAL /usr/src/infernal + +RUN mkdir $INFERNAL + +WORKDIR $RNA +COPY . . +COPY pyproject.toml $RNA + +ENV PYTHONPATH=${PYTHONPATH}:${PWD} + +# Install Infernal +RUN \ + cd $INFERNAL && \ + curl -OL http://eddylab.org/infernal/infernal-1.1.4.tar.gz && \ + tar -xvzf infernal-1.1.4.tar.gz && \ + cd infernal-1.1.4 && \ + ./configure --prefix=$INFERNAL/infernal-1.1.4 && \ + make && \ + make install && \ + cd easel && \ + make install && \ + cd $INFERNAL && \ + rm infernal-1.1.4.tar.gz + +ENV PATH="$INFERNAL/infernal-1.1.4/bin:$RNA:$PATH" + +RUN pip3 install poetry +RUN poetry config virtualenvs.create false +RUN poetry install --no-dev + +ENTRYPOINT ["/bin/bash"] diff --git a/scripts/rfblast/README.md b/scripts/rfblast/README.md new file mode 100644 index 00000000..e69de29b diff --git a/scripts/rfblast/invalid.fa b/scripts/rfblast/invalid.fa new file mode 100644 index 00000000..7241a0cd --- /dev/null +++ b/scripts/rfblast/invalid.fa @@ -0,0 +1,3 @@ +>1_MN977327 +CCGACCCCCGGCGCAGGUCACGCAAUUUGGGUAAGGUCAUCGAUACCCUCACGUGUGGCU +UCGCCGACCUCAUGGGGUAC diff --git a/scripts/rfblast/poetry.lock b/scripts/rfblast/poetry.lock new file mode 100644 index 00000000..9e39226e --- /dev/null +++ b/scripts/rfblast/poetry.lock @@ -0,0 +1,372 @@ +# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. + +[[package]] +name = "biopython" +version = "1.81" +description = "Freely available tools for computational molecular biology." +optional = false +python-versions = ">=3.7" +files = [ + {file = "biopython-1.81-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ef7c79b65b0b3f3c7dc59e20a7f8ae5758d8e852cb8b9cace590dc5617e348ba"}, + {file = "biopython-1.81-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6ebfbce0d91796c7aef422ee9dffe8827e07e5abaa94545e006f1f20e965c80b"}, + {file = "biopython-1.81-cp310-cp310-win32.whl", hash = "sha256:919a2c583cabf9c96d2ae4e1245a6b0376932fb342aca302a0fc198b71ab3275"}, + {file = "biopython-1.81-cp310-cp310-win_amd64.whl", hash = "sha256:b37c0d24191e5c96ca02415a5188551980c83a0d518bbc4ffe3c9a5d1fe0ee81"}, + {file = "biopython-1.81-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7a168709694e10b338718c18d967edd5b56c237dc88642c22275796007a70000"}, + {file = "biopython-1.81-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a51d9c1d1b4b634447535da74a644fae59bc234fbbf9001e2dc6b6fbabb98019"}, + {file = "biopython-1.81-cp311-cp311-win32.whl", hash = "sha256:2f9cfaf16d55ab80d514e7aebe5710dabe4e4ff47ede851031202e33b3249da3"}, + {file = "biopython-1.81-cp311-cp311-win_amd64.whl", hash = "sha256:e41b55edcfd448630e77bf4de66a7235324a8a149621499891da6bd1d5085b9a"}, + {file = "biopython-1.81-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:3b36ba1bf6395c09a365c53530c9d71f3617763fa2c1d452b3d8948368c0f1de"}, + {file = "biopython-1.81-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7c5c07123ff5f44c9e6b5369df854a38afd3c0c50ef58498a0ae8f7eb799f3e8"}, + {file = "biopython-1.81-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:97cbdbed01b2512471f36c74b91658d1dfbdcbf39bc038f6ce5a41c3e60a8fc6"}, + {file = "biopython-1.81-cp37-cp37m-win32.whl", hash = "sha256:35506e39822c52d11cf09a3951e82375ca1bb9303960b4286acf02c9a6f6c4cc"}, + {file = "biopython-1.81-cp37-cp37m-win_amd64.whl", hash = "sha256:793c42a376cd63f62f8a088ce39b7dc6b5c55e4e9031d887c434de1595bfa4b8"}, + {file = "biopython-1.81-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:11d673698b3d0d6589292ea951fb62cb24ea27d273eca0d08dbbd956690f97f5"}, + {file = "biopython-1.81-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:655df416936662c0c8a06a549cb25e1560e1fea5067d850f34fb714b8a3fae6c"}, + {file = "biopython-1.81-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:762c6c43a8486b5fcd07f136a3217b87d24755618b9ea9da1f17124ff44c2ad6"}, + {file = "biopython-1.81-cp38-cp38-win32.whl", hash = "sha256:ee51bb1cd7decffd24da6b76d5e01b7e2fd818ab85cf0c180226cbb5793a3abd"}, + {file = "biopython-1.81-cp38-cp38-win_amd64.whl", hash = "sha256:ccd729249fd5f586dd4c2a3507c2ea2456825d7e615e97c07c409c850eaf4594"}, + {file = "biopython-1.81-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9ba33244f0eff830beaa7240065bdb5095d96fded6599b76bbb9ddab45cd2bbd"}, + {file = "biopython-1.81-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8bb0c690c7368f255ed45236bf0f5464b476b8c083c8f634533921af78278261"}, + {file = "biopython-1.81-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:65b93b513ce9dd7b2ce058720eadf42cd03f312db3409356efeb93123d1320aa"}, + {file = "biopython-1.81-cp39-cp39-win32.whl", hash = "sha256:811796f8d222aa3869a50e31e54ce62b69106b47cd8bb06934867c0d843297b5"}, + {file = "biopython-1.81-cp39-cp39-win_amd64.whl", hash = "sha256:b09efcb4733c8770f25eab5fe555a96a08f5ab9e1bc36939e08ebf2ffbf3e0f1"}, + {file = "biopython-1.81.tar.gz", hash = "sha256:2cf38112b6d8415ad39d6a611988cd11fb5f33eb09346666a87263beba9614e0"}, +] + +[package.dependencies] +numpy = "*" + +[[package]] +name = "black" +version = "23.7.0" +description = "The uncompromising code formatter." +optional = false +python-versions = ">=3.8" +files = [ + {file = "black-23.7.0-cp310-cp310-macosx_10_16_arm64.whl", hash = "sha256:5c4bc552ab52f6c1c506ccae05681fab58c3f72d59ae6e6639e8885e94fe2587"}, + {file = "black-23.7.0-cp310-cp310-macosx_10_16_universal2.whl", hash = "sha256:552513d5cd5694590d7ef6f46e1767a4df9af168d449ff767b13b084c020e63f"}, + {file = "black-23.7.0-cp310-cp310-macosx_10_16_x86_64.whl", hash = "sha256:86cee259349b4448adb4ef9b204bb4467aae74a386bce85d56ba4f5dc0da27be"}, + {file = "black-23.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:501387a9edcb75d7ae8a4412bb8749900386eaef258f1aefab18adddea1936bc"}, + {file = "black-23.7.0-cp310-cp310-win_amd64.whl", hash = "sha256:fb074d8b213749fa1d077d630db0d5f8cc3b2ae63587ad4116e8a436e9bbe995"}, + {file = "black-23.7.0-cp311-cp311-macosx_10_16_arm64.whl", hash = "sha256:b5b0ee6d96b345a8b420100b7d71ebfdd19fab5e8301aff48ec270042cd40ac2"}, + {file = "black-23.7.0-cp311-cp311-macosx_10_16_universal2.whl", hash = "sha256:893695a76b140881531062d48476ebe4a48f5d1e9388177e175d76234ca247cd"}, + {file = "black-23.7.0-cp311-cp311-macosx_10_16_x86_64.whl", hash = "sha256:c333286dc3ddca6fdff74670b911cccedacb4ef0a60b34e491b8a67c833b343a"}, + {file = "black-23.7.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:831d8f54c3a8c8cf55f64d0422ee875eecac26f5f649fb6c1df65316b67c8926"}, + {file = "black-23.7.0-cp311-cp311-win_amd64.whl", hash = "sha256:7f3bf2dec7d541b4619b8ce526bda74a6b0bffc480a163fed32eb8b3c9aed8ad"}, + {file = "black-23.7.0-cp38-cp38-macosx_10_16_arm64.whl", hash = "sha256:f9062af71c59c004cd519e2fb8f5d25d39e46d3af011b41ab43b9c74e27e236f"}, + {file = "black-23.7.0-cp38-cp38-macosx_10_16_universal2.whl", hash = "sha256:01ede61aac8c154b55f35301fac3e730baf0c9cf8120f65a9cd61a81cfb4a0c3"}, + {file = "black-23.7.0-cp38-cp38-macosx_10_16_x86_64.whl", hash = "sha256:327a8c2550ddc573b51e2c352adb88143464bb9d92c10416feb86b0f5aee5ff6"}, + {file = "black-23.7.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d1c6022b86f83b632d06f2b02774134def5d4d4f1dac8bef16d90cda18ba28a"}, + {file = "black-23.7.0-cp38-cp38-win_amd64.whl", hash = "sha256:27eb7a0c71604d5de083757fbdb245b1a4fae60e9596514c6ec497eb63f95320"}, + {file = "black-23.7.0-cp39-cp39-macosx_10_16_arm64.whl", hash = "sha256:8417dbd2f57b5701492cd46edcecc4f9208dc75529bcf76c514864e48da867d9"}, + {file = "black-23.7.0-cp39-cp39-macosx_10_16_universal2.whl", hash = "sha256:47e56d83aad53ca140da0af87678fb38e44fd6bc0af71eebab2d1f59b1acf1d3"}, + {file = "black-23.7.0-cp39-cp39-macosx_10_16_x86_64.whl", hash = "sha256:25cc308838fe71f7065df53aedd20327969d05671bac95b38fdf37ebe70ac087"}, + {file = "black-23.7.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:642496b675095d423f9b8448243336f8ec71c9d4d57ec17bf795b67f08132a91"}, + {file = "black-23.7.0-cp39-cp39-win_amd64.whl", hash = "sha256:ad0014efc7acf0bd745792bd0d8857413652979200ab924fbf239062adc12491"}, + {file = "black-23.7.0-py3-none-any.whl", hash = "sha256:9fd59d418c60c0348505f2ddf9609c1e1de8e7493eab96198fc89d9f865e7a96"}, + {file = "black-23.7.0.tar.gz", hash = "sha256:022a582720b0d9480ed82576c920a8c1dde97cc38ff11d8d8859b3bd6ca9eedb"}, +] + +[package.dependencies] +click = ">=8.0.0" +mypy-extensions = ">=0.4.3" +packaging = ">=22.0" +pathspec = ">=0.9.0" +platformdirs = ">=2" +tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} +typing-extensions = {version = ">=3.10.0.0", markers = "python_version < \"3.10\""} + +[package.extras] +colorama = ["colorama (>=0.4.3)"] +d = ["aiohttp (>=3.7.4)"] +jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] +uvloop = ["uvloop (>=0.15.2)"] + +[[package]] +name = "certifi" +version = "2023.7.22" +description = "Python package for providing Mozilla's CA Bundle." +optional = false +python-versions = ">=3.6" +files = [ + {file = "certifi-2023.7.22-py3-none-any.whl", hash = "sha256:92d6037539857d8206b8f6ae472e8b77db8058fec5937a1ef3f54304089edbb9"}, + {file = "certifi-2023.7.22.tar.gz", hash = "sha256:539cc1d13202e33ca466e88b2807e29f4c13049d6d87031a3c110744495cb082"}, +] + +[[package]] +name = "charset-normalizer" +version = "3.2.0" +description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "charset-normalizer-3.2.0.tar.gz", hash = "sha256:3bb3d25a8e6c0aedd251753a79ae98a093c7e7b471faa3aa9a93a81431987ace"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0b87549028f680ca955556e3bd57013ab47474c3124dc069faa0b6545b6c9710"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7c70087bfee18a42b4040bb9ec1ca15a08242cf5867c58726530bdf3945672ed"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a103b3a7069b62f5d4890ae1b8f0597618f628b286b03d4bc9195230b154bfa9"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94aea8eff76ee6d1cdacb07dd2123a68283cb5569e0250feab1240058f53b623"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:db901e2ac34c931d73054d9797383d0f8009991e723dab15109740a63e7f902a"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b0dac0ff919ba34d4df1b6131f59ce95b08b9065233446be7e459f95554c0dc8"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:193cbc708ea3aca45e7221ae58f0fd63f933753a9bfb498a3b474878f12caaad"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:09393e1b2a9461950b1c9a45d5fd251dc7c6f228acab64da1c9c0165d9c7765c"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:baacc6aee0b2ef6f3d308e197b5d7a81c0e70b06beae1f1fcacffdbd124fe0e3"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:bf420121d4c8dce6b889f0e8e4ec0ca34b7f40186203f06a946fa0276ba54029"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:c04a46716adde8d927adb9457bbe39cf473e1e2c2f5d0a16ceb837e5d841ad4f"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:aaf63899c94de41fe3cf934601b0f7ccb6b428c6e4eeb80da72c58eab077b19a"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d62e51710986674142526ab9f78663ca2b0726066ae26b78b22e0f5e571238dd"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-win32.whl", hash = "sha256:04e57ab9fbf9607b77f7d057974694b4f6b142da9ed4a199859d9d4d5c63fe96"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:48021783bdf96e3d6de03a6e39a1171ed5bd7e8bb93fc84cc649d11490f87cea"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:4957669ef390f0e6719db3613ab3a7631e68424604a7b448f079bee145da6e09"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:46fb8c61d794b78ec7134a715a3e564aafc8f6b5e338417cb19fe9f57a5a9bf2"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f779d3ad205f108d14e99bb3859aa7dd8e9c68874617c72354d7ecaec2a054ac"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f25c229a6ba38a35ae6e25ca1264621cc25d4d38dca2942a7fce0b67a4efe918"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2efb1bd13885392adfda4614c33d3b68dee4921fd0ac1d3988f8cbb7d589e72a"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f30b48dd7fa1474554b0b0f3fdfdd4c13b5c737a3c6284d3cdc424ec0ffff3a"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:246de67b99b6851627d945db38147d1b209a899311b1305dd84916f2b88526c6"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9bd9b3b31adcb054116447ea22caa61a285d92e94d710aa5ec97992ff5eb7cf3"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:8c2f5e83493748286002f9369f3e6607c565a6a90425a3a1fef5ae32a36d749d"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:3170c9399da12c9dc66366e9d14da8bf7147e1e9d9ea566067bbce7bb74bd9c2"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:7a4826ad2bd6b07ca615c74ab91f32f6c96d08f6fcc3902ceeedaec8cdc3bcd6"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:3b1613dd5aee995ec6d4c69f00378bbd07614702a315a2cf6c1d21461fe17c23"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9e608aafdb55eb9f255034709e20d5a83b6d60c054df0802fa9c9883d0a937aa"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-win32.whl", hash = "sha256:f2a1d0fd4242bd8643ce6f98927cf9c04540af6efa92323e9d3124f57727bfc1"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:681eb3d7e02e3c3655d1b16059fbfb605ac464c834a0c629048a30fad2b27489"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c57921cda3a80d0f2b8aec7e25c8aa14479ea92b5b51b6876d975d925a2ea346"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:41b25eaa7d15909cf3ac4c96088c1f266a9a93ec44f87f1d13d4a0e86c81b982"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f058f6963fd82eb143c692cecdc89e075fa0828db2e5b291070485390b2f1c9c"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a7647ebdfb9682b7bb97e2a5e7cb6ae735b1c25008a70b906aecca294ee96cf4"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eef9df1eefada2c09a5e7a40991b9fc6ac6ef20b1372abd48d2794a316dc0449"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e03b8895a6990c9ab2cdcd0f2fe44088ca1c65ae592b8f795c3294af00a461c3"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:ee4006268ed33370957f55bf2e6f4d263eaf4dc3cfc473d1d90baff6ed36ce4a"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c4983bf937209c57240cff65906b18bb35e64ae872da6a0db937d7b4af845dd7"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:3bb7fda7260735efe66d5107fb7e6af6a7c04c7fce9b2514e04b7a74b06bf5dd"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:72814c01533f51d68702802d74f77ea026b5ec52793c791e2da806a3844a46c3"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:70c610f6cbe4b9fce272c407dd9d07e33e6bf7b4aa1b7ffb6f6ded8e634e3592"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-win32.whl", hash = "sha256:a401b4598e5d3f4a9a811f3daf42ee2291790c7f9d74b18d75d6e21dda98a1a1"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-win_amd64.whl", hash = "sha256:c0b21078a4b56965e2b12f247467b234734491897e99c1d51cee628da9786959"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:95eb302ff792e12aba9a8b8f8474ab229a83c103d74a750ec0bd1c1eea32e669"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1a100c6d595a7f316f1b6f01d20815d916e75ff98c27a01ae817439ea7726329"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6339d047dab2780cc6220f46306628e04d9750f02f983ddb37439ca47ced7149"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4b749b9cc6ee664a3300bb3a273c1ca8068c46be705b6c31cf5d276f8628a94"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a38856a971c602f98472050165cea2cdc97709240373041b69030be15047691f"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f87f746ee241d30d6ed93969de31e5ffd09a2961a051e60ae6bddde9ec3583aa"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89f1b185a01fe560bc8ae5f619e924407efca2191b56ce749ec84982fc59a32a"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e1c8a2f4c69e08e89632defbfabec2feb8a8d99edc9f89ce33c4b9e36ab63037"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2f4ac36d8e2b4cc1aa71df3dd84ff8efbe3bfb97ac41242fbcfc053c67434f46"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a386ebe437176aab38c041de1260cd3ea459c6ce5263594399880bbc398225b2"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:ccd16eb18a849fd8dcb23e23380e2f0a354e8daa0c984b8a732d9cfaba3a776d"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:e6a5bf2cba5ae1bb80b154ed68a3cfa2fa00fde979a7f50d6598d3e17d9ac20c"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:45de3f87179c1823e6d9e32156fb14c1927fcc9aba21433f088fdfb555b77c10"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-win32.whl", hash = "sha256:1000fba1057b92a65daec275aec30586c3de2401ccdcd41f8a5c1e2c87078706"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:8b2c760cfc7042b27ebdb4a43a4453bd829a5742503599144d54a032c5dc7e9e"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:855eafa5d5a2034b4621c74925d89c5efef61418570e5ef9b37717d9c796419c"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:203f0c8871d5a7987be20c72442488a0b8cfd0f43b7973771640fc593f56321f"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e857a2232ba53ae940d3456f7533ce6ca98b81917d47adc3c7fd55dad8fab858"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5e86d77b090dbddbe78867a0275cb4df08ea195e660f1f7f13435a4649e954e5"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c4fb39a81950ec280984b3a44f5bd12819953dc5fa3a7e6fa7a80db5ee853952"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2dee8e57f052ef5353cf608e0b4c871aee320dd1b87d351c28764fc0ca55f9f4"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8700f06d0ce6f128de3ccdbc1acaea1ee264d2caa9ca05daaf492fde7c2a7200"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1920d4ff15ce893210c1f0c0e9d19bfbecb7983c76b33f046c13a8ffbd570252"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:c1c76a1743432b4b60ab3358c937a3fe1341c828ae6194108a94c69028247f22"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:f7560358a6811e52e9c4d142d497f1a6e10103d3a6881f18d04dbce3729c0e2c"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:c8063cf17b19661471ecbdb3df1c84f24ad2e389e326ccaf89e3fb2484d8dd7e"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:cd6dbe0238f7743d0efe563ab46294f54f9bc8f4b9bcf57c3c666cc5bc9d1299"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:1249cbbf3d3b04902ff081ffbb33ce3377fa6e4c7356f759f3cd076cc138d020"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-win32.whl", hash = "sha256:6c409c0deba34f147f77efaa67b8e4bb83d2f11c8806405f76397ae5b8c0d1c9"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:7095f6fbfaa55defb6b733cfeb14efaae7a29f0b59d8cf213be4e7ca0b857b80"}, + {file = "charset_normalizer-3.2.0-py3-none-any.whl", hash = "sha256:8e098148dd37b4ce3baca71fb394c81dc5d9c7728c95df695d2dca218edf40e6"}, +] + +[[package]] +name = "click" +version = "8.1.7" +description = "Composable command line interface toolkit" +optional = false +python-versions = ">=3.7" +files = [ + {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"}, + {file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[[package]] +name = "colorama" +version = "0.4.6" +description = "Cross-platform colored terminal text." +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +files = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] + +[[package]] +name = "idna" +version = "3.4" +description = "Internationalized Domain Names in Applications (IDNA)" +optional = false +python-versions = ">=3.5" +files = [ + {file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"}, + {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"}, +] + +[[package]] +name = "mypy-extensions" +version = "1.0.0" +description = "Type system extensions for programs checked with the mypy type checker." +optional = false +python-versions = ">=3.5" +files = [ + {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, + {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, +] + +[[package]] +name = "numpy" +version = "1.25.2" +description = "Fundamental package for array computing in Python" +optional = false +python-versions = ">=3.9" +files = [ + {file = "numpy-1.25.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:db3ccc4e37a6873045580d413fe79b68e47a681af8db2e046f1dacfa11f86eb3"}, + {file = "numpy-1.25.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:90319e4f002795ccfc9050110bbbaa16c944b1c37c0baeea43c5fb881693ae1f"}, + {file = "numpy-1.25.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dfe4a913e29b418d096e696ddd422d8a5d13ffba4ea91f9f60440a3b759b0187"}, + {file = "numpy-1.25.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f08f2e037bba04e707eebf4bc934f1972a315c883a9e0ebfa8a7756eabf9e357"}, + {file = "numpy-1.25.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bec1e7213c7cb00d67093247f8c4db156fd03075f49876957dca4711306d39c9"}, + {file = "numpy-1.25.2-cp310-cp310-win32.whl", hash = "sha256:7dc869c0c75988e1c693d0e2d5b26034644399dd929bc049db55395b1379e044"}, + {file = "numpy-1.25.2-cp310-cp310-win_amd64.whl", hash = "sha256:834b386f2b8210dca38c71a6e0f4fd6922f7d3fcff935dbe3a570945acb1b545"}, + {file = "numpy-1.25.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c5462d19336db4560041517dbb7759c21d181a67cb01b36ca109b2ae37d32418"}, + {file = "numpy-1.25.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c5652ea24d33585ea39eb6a6a15dac87a1206a692719ff45d53c5282e66d4a8f"}, + {file = "numpy-1.25.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d60fbae8e0019865fc4784745814cff1c421df5afee233db6d88ab4f14655a2"}, + {file = "numpy-1.25.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:60e7f0f7f6d0eee8364b9a6304c2845b9c491ac706048c7e8cf47b83123b8dbf"}, + {file = "numpy-1.25.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:bb33d5a1cf360304754913a350edda36d5b8c5331a8237268c48f91253c3a364"}, + {file = "numpy-1.25.2-cp311-cp311-win32.whl", hash = "sha256:5883c06bb92f2e6c8181df7b39971a5fb436288db58b5a1c3967702d4278691d"}, + {file = "numpy-1.25.2-cp311-cp311-win_amd64.whl", hash = "sha256:5c97325a0ba6f9d041feb9390924614b60b99209a71a69c876f71052521d42a4"}, + {file = "numpy-1.25.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b79e513d7aac42ae918db3ad1341a015488530d0bb2a6abcbdd10a3a829ccfd3"}, + {file = "numpy-1.25.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:eb942bfb6f84df5ce05dbf4b46673ffed0d3da59f13635ea9b926af3deb76926"}, + {file = "numpy-1.25.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e0746410e73384e70d286f93abf2520035250aad8c5714240b0492a7302fdca"}, + {file = "numpy-1.25.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d7806500e4f5bdd04095e849265e55de20d8cc4b661b038957354327f6d9b295"}, + {file = "numpy-1.25.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8b77775f4b7df768967a7c8b3567e309f617dd5e99aeb886fa14dc1a0791141f"}, + {file = "numpy-1.25.2-cp39-cp39-win32.whl", hash = "sha256:2792d23d62ec51e50ce4d4b7d73de8f67a2fd3ea710dcbc8563a51a03fb07b01"}, + {file = "numpy-1.25.2-cp39-cp39-win_amd64.whl", hash = "sha256:76b4115d42a7dfc5d485d358728cdd8719be33cc5ec6ec08632a5d6fca2ed380"}, + {file = "numpy-1.25.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:1a1329e26f46230bf77b02cc19e900db9b52f398d6722ca853349a782d4cff55"}, + {file = "numpy-1.25.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c3abc71e8b6edba80a01a52e66d83c5d14433cbcd26a40c329ec7ed09f37901"}, + {file = "numpy-1.25.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:1b9735c27cea5d995496f46a8b1cd7b408b3f34b6d50459d9ac8fe3a20cc17bf"}, + {file = "numpy-1.25.2.tar.gz", hash = "sha256:fd608e19c8d7c55021dffd43bfe5492fab8cc105cc8986f813f8c3c048b38760"}, +] + +[[package]] +name = "packaging" +version = "23.1" +description = "Core utilities for Python packages" +optional = false +python-versions = ">=3.7" +files = [ + {file = "packaging-23.1-py3-none-any.whl", hash = "sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61"}, + {file = "packaging-23.1.tar.gz", hash = "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"}, +] + +[[package]] +name = "pathspec" +version = "0.11.2" +description = "Utility library for gitignore style pattern matching of file paths." +optional = false +python-versions = ">=3.7" +files = [ + {file = "pathspec-0.11.2-py3-none-any.whl", hash = "sha256:1d6ed233af05e679efb96b1851550ea95bbb64b7c490b0f5aa52996c11e92a20"}, + {file = "pathspec-0.11.2.tar.gz", hash = "sha256:e0d8d0ac2f12da61956eb2306b69f9469b42f4deb0f3cb6ed47b9cce9996ced3"}, +] + +[[package]] +name = "platformdirs" +version = "3.10.0" +description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." +optional = false +python-versions = ">=3.7" +files = [ + {file = "platformdirs-3.10.0-py3-none-any.whl", hash = "sha256:d7c24979f292f916dc9cbf8648319032f551ea8c49a4c9bf2fb556a02070ec1d"}, + {file = "platformdirs-3.10.0.tar.gz", hash = "sha256:b45696dab2d7cc691a3226759c0d3b00c47c8b6e293d96f6436f733303f77f6d"}, +] + +[package.extras] +docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.1)", "sphinx-autodoc-typehints (>=1.24)"] +test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4)", "pytest-cov (>=4.1)", "pytest-mock (>=3.11.1)"] + +[[package]] +name = "requests" +version = "2.31.0" +description = "Python HTTP for Humans." +optional = false +python-versions = ">=3.7" +files = [ + {file = "requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"}, + {file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"}, +] + +[package.dependencies] +certifi = ">=2017.4.17" +charset-normalizer = ">=2,<4" +idna = ">=2.5,<4" +urllib3 = ">=1.21.1,<3" + +[package.extras] +socks = ["PySocks (>=1.5.6,!=1.5.7)"] +use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] + +[[package]] +name = "tomli" +version = "2.0.1" +description = "A lil' TOML parser" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, + {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, +] + +[[package]] +name = "typing-extensions" +version = "4.7.1" +description = "Backported and Experimental Type Hints for Python 3.7+" +optional = false +python-versions = ">=3.7" +files = [ + {file = "typing_extensions-4.7.1-py3-none-any.whl", hash = "sha256:440d5dd3af93b060174bf433bccd69b0babc3b15b1a8dca43789fd7f61514b36"}, + {file = "typing_extensions-4.7.1.tar.gz", hash = "sha256:b75ddc264f0ba5615db7ba217daeb99701ad295353c45f9e95963337ceeeffb2"}, +] + +[[package]] +name = "urllib3" +version = "2.0.4" +description = "HTTP library with thread-safe connection pooling, file post, and more." +optional = false +python-versions = ">=3.7" +files = [ + {file = "urllib3-2.0.4-py3-none-any.whl", hash = "sha256:de7df1803967d2c2a98e4b11bb7d6bd9210474c46e8a0401514e3a42a75ebde4"}, + {file = "urllib3-2.0.4.tar.gz", hash = "sha256:8d22f86aae8ef5e410d4f539fde9ce6b2113a001bb4d189e0aed70642d602b11"}, +] + +[package.extras] +brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] +secure = ["certifi", "cryptography (>=1.9)", "idna (>=2.0.0)", "pyopenssl (>=17.1.0)", "urllib3-secure-extra"] +socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] +zstd = ["zstandard (>=0.18.0)"] + +[[package]] +name = "xmltodict" +version = "0.13.0" +description = "Makes working with XML feel like you are working with JSON" +optional = false +python-versions = ">=3.4" +files = [ + {file = "xmltodict-0.13.0-py2.py3-none-any.whl", hash = "sha256:aa89e8fd76320154a40d19a0df04a4695fb9dc5ba977cbb68ab3e4eb225e7852"}, + {file = "xmltodict-0.13.0.tar.gz", hash = "sha256:341595a488e3e01a85a9d8911d8912fd922ede5fecc4dce437eb4b6c8d037e56"}, +] + +[metadata] +lock-version = "2.0" +python-versions = "^3.9" +content-hash = "eb5a89a192ff3d3bdc518f98c35d21764ce2eb9dbc313f11918346c78d05a882" diff --git a/scripts/rfblast/pyproject.toml b/scripts/rfblast/pyproject.toml new file mode 100644 index 00000000..2be63fd3 --- /dev/null +++ b/scripts/rfblast/pyproject.toml @@ -0,0 +1,21 @@ +[tool.poetry] +name = "rfblast" +version = "0.1.0" +description = "" +authors = ["Emma Cooke "] +readme = "README.md" + +[tool.poetry.dependencies] +python = "^3.9" +requests = "^2.31.0" +click = "^8.1.7" +biopython = "^1.81" +xmltodict = "^0.13.0" + + +[tool.poetry.group.dev.dependencies] +black = "^23.7.0" + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" diff --git a/scripts/rfblast/rfblast.py b/scripts/rfblast/rfblast.py new file mode 100755 index 00000000..6b3d0c16 --- /dev/null +++ b/scripts/rfblast/rfblast.py @@ -0,0 +1,314 @@ +""" +Usage: + +# navigate to the folder with the SEED and CM files +cd /path/to/folder/with/SEED + +# identify invalid accessions that need to be replaced +rfblast.py validate + +# manually upload the file `invalid.fa` to NCBI BLAST +# download results in the `Single-file JSON` format + +# replace invalid accessions in the SEED file +rfblast.py replace XXXXXXXXXXX-Alignment.json + +# where XXXXXXXXXXX-Alignment.json is the NCBI BLAST result. +""" + +import json +import os +import re +import xmltodict + +import click +import requests + +from Bio.Blast import NCBIWWW +from Bio import SeqIO + + +IDENTITY = 90 +QUERY_COVERAGE = 70 +TEMPDIR = "temp" + + +def get_accession(gid): + """ + Get versioned accession, for example: + Input: + gi|2047803076|gb|CP061286.1| + Output: + CP061286.1 + """ + parts = gid.split("|") + return parts[-2] + + +def get_blast_data(filename): + """ + Load BLAST JSON output. + """ + with open(filename, "r") as f: + return json.load(f) + + +def choose_replacement(data, min_identity, min_query_coverage): + """ + Loop over BLAST results and pick best replacement for each hit. + """ + # do not pick replacement from the same accession if already seen + fasta = "" + seen_accessions = set() + for query_num, search in enumerate(data["BlastOutput2"]): + query_title = search["report"]["results"]["search"]["query_title"] + query_len = search["report"]["results"]["search"]["query_len"] + replacement_found = False + for entry in search["report"]["results"]["search"]["hits"]: + acc = get_accession(entry["description"][0]["id"]) + if acc not in seen_accessions: + seen_accessions.add(acc) + replacement_found = True + else: + continue + sequence = entry["hsps"][0]["hseq"] + start = entry["hsps"][0]["hit_from"] + end = entry["hsps"][0]["hit_to"] + align_len = entry["hsps"][0]["align_len"] + gaps = entry["hsps"][0]["gaps"] + exact_matches = entry["hsps"][0]["identity"] + identity = float(exact_matches) / align_len * 100 + query_coverage = float(align_len - gaps) / query_len * 100 + target_coverage = float(align_len - gaps) / len(sequence) * 100 + if identity >= min_identity and query_coverage >= min_query_coverage: + warning = False + else: + warning = True + summary = ( + "#{query_num} {message} {query_title} " + "with {acc}/{start}-{end} at {identity}% identity; " + "{gaps} gaps; query coverage {query_coverage}" + ).format( + acc=acc, + start=start, + end=end, + query_title=query_title, + identity=round(identity), + query_coverage=round(query_coverage), + target_coverage=round(target_coverage, 2), + gaps=gaps, + message="Replace" + if not warning + else " WARNING: No replacement found for", + query_num=query_num + 1, + ) + print(summary) + if not warning: + fasta += ">{acc}/{start}-{end}\n{sequence}\n".format( + acc=acc, + start=start, + end=end, + sequence=sequence.replace("-", "").replace("T", "U"), + ) + if replacement_found: + break + return fasta + + +def generate_new_seed(fasta): + filename = "replacement.fasta" + with open(filename, "w") as f: + f.write(fasta) + if not os.path.exists("CM"): + cmd = "rfsearch.pl -t 30 -nodesc -relax" + os.system(cmd) + if not os.path.exists("SEED"): + raise Exception("Error: SEED file not found") + cmd = ( + "cmalign --mapali SEED --noprob CM {} > tempseed && " + "esl-reformat pfam tempseed > NEWSEEDtemp && " + "rm tempseed" + ).format(filename) + os.system(cmd) + invalid = set() + with open("invalid.txt", "r") as f: + for line in f: + invalid.add(line.strip()) + newseed = open("NEWSEED", "w") + with open("NEWSEEDtemp", "r") as f: + for line in f: + skip = False + for invalid_accession in invalid: + if invalid_accession in line: + skip = True + break + if not skip: + newseed.write(line) + newseed.close() + os.remove("NEWSEEDtemp") + cmd = ( + 'echo "Old SEED info:" && esl-alistat SEED && ' + 'echo "New SEED info:" && esl-alistat NEWSEED' + ) + os.system(cmd) + + +def is_valid_accession(accession): + """ + TB03JUN2009E__Contig_2000/988-772 + NZ_CP007501.1/771730-771924 + + Found: + {"header":{"type":"esearch","version":"0.3"},"esearchresult":{"count":"1","retmax":"1","retstart":"0","idlist":["1119664412"],"translationset":[],"querytranslation":""}} + Found but a different ID: + {"header":{"type":"esearch","version":"0.3"},"esearchresult":{"count":"1","retmax":"1","retstart":"0","idlist":["EP994606.1"],"translationset":[],"translationstack":[{"term":"JCVI_SCAF_1096627298421[All Fields]","field":"All Fields","count":"1","explode":"N"},"GROUP"],"querytranslation":"JCVI_SCAF_1096627298421[All Fields]"}} + Not found: + {"header":{"type":"esearch","version":"0.3"},"esearchresult":{"count":"0","retmax":"0","retstart":"0","idlist":[],"translationset":[],"querytranslation":"(TB03JUN2009E__Contig_2000[All Fields])","errorlist":{"phrasesnotfound":["TB03JUN2009E__Contig_2000"],"fieldsnotfound":[]},"warninglist":{"phrasesignored":[],"quotedphrasesnotfound":[],"outputmessages":["No items found."]}}} + """ + parts = accession.split("/") + url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=nucleotide&term={}&retmode=json&idtype=acc" + r = requests.get(url.format(parts[0])) + data = r.json() + if ( + int(data["esearchresult"]["count"]) == 1 + and len(data["esearchresult"]["idlist"]) > 0 + and data["esearchresult"]["idlist"][0] == parts[0] + ): + return True + else: + return False + + +def fetch_seqs(filename, accessions, label): + """ """ + f_txt = "{}.txt".format(label) + f_fa = "{}.fa".format(label) + with open(f_txt, "w") as f: + for accession in accessions: + f.write(accession + "\n") + cmd = "esl-sfetch -f {} {} > {}".format(filename, f_txt, f_fa) + os.system(cmd) + click.echo("Saved {} {} accessions in {}".format(len(accessions), label, f_fa)) + + +def parse_fasta(filename): + """ """ + accessions = set() + with open(filename, "r") as f: + for line in f: + if line.startswith(">"): + parts = re.split(r"\s+", line) + accession = parts[0].replace(">", "") + accessions.add(accession) + valid = set() + invalid = set() + for accession in accessions: + if is_valid_accession(accession): + click.echo("{} is valid".format(accession)) + valid.add(accession) + else: + click.echo("{} is invalid".format(accession)) + invalid.add(accession) + os.system("esl-sfetch --index {}".format(filename)) + if valid: + fetch_seqs(filename, valid, "valid") + else: + click.echo("No valid accessions found") + if invalid: + fetch_seqs(filename, invalid, "invalid") + click.echo("=========================\nGenerated file invalid.fa") + os.system("esl-seqstat {}".format("invalid.fa")) + click.echo("Upload file invalid.fa to NCBI BLAST") + else: + click.echo("No invalid accessions found") + + +@click.group() +def cli(): + pass + + +def validate(seed): + """ + Convert SEED to a fasta file containing sequences with unknown IDs. + """ + if not os.path.exists(seed): + raise Exception("Error: SEED does not exist") + fasta = "seed.fasta" + cmd = "esl-reformat fasta {} > {}".format(seed, fasta) + os.system(cmd) + print(fasta) + parse_fasta(fasta) + + +@cli.command() +@click.argument("invalid", type=click.Path(exists=True)) +def blast_invalid_sequences(invalid, blast_program="blastn"): + """ + Upload the file `invalid.fa` to NCBI BLAST and download results in the `Single-file JSON` format + """ + + blast_results = [] + fasta_file = "invalid.fa" + sequences = SeqIO.parse(fasta_file, "fasta") + + for seq in sequences: + result_handle = NCBIWWW.qblast("blastn", "nt", seq.seq) + + blast_results.append(result_handle.read()) + + output_file = "blast_results.xml" + with open(output_file, "w") as output: + for res in blast_results: + output.write(res) + + with open(output_file, "r") as xml_file: + xml_data = xml_file.read() + + xml_dict = xmltodict.parse(xml_data) + json_data = json.dumps(xml_dict, indent=4) + + with open("output.json", "w") as json_file: + json_file.write(json_data) + + print(f"BLAST search completed. Results saved to {json_file}") + + +@cli.command() +@click.argument("blast_json", type=click.Path(exists=True)) +@click.option( + "--identity", + default=IDENTITY, + type=click.FLOAT, + help="Minimum % identity between query and target", +) +@click.option( + "--query_coverage", + default=QUERY_COVERAGE, + type=click.FLOAT, + help="Minimum coverage of the seed sequence", +) +def replace(blast_json, identity, query_coverage): + """ + Replace unknown accessions in SEED alignment using NCBI BLAST results + """ + blast_data = get_blast_data(blast_json) + fasta = choose_replacement(blast_data, identity, query_coverage) + generate_new_seed(fasta) + click.echo("Done") + + +@cli.command() +@click.argument("seed", type=click.Path(exists=True)) +def all(seed): + """ + Carry out all steps + """ + validate(seed) + blast_result = blast_invalid_sequences("invalid.fa") + print(blast_result) + replace(blast_json=blast_result) + + +if __name__ == "__main__": + cli() diff --git a/scripts/validation/rfblast.py b/scripts/validation/rfblast.py deleted file mode 100755 index 8143f6f7..00000000 --- a/scripts/validation/rfblast.py +++ /dev/null @@ -1,266 +0,0 @@ -#!/usr/bin/env python - -""" -Usage: - -# navigate to the folder with the SEED and CM files -cd /path/to/folder/with/SEED - -# identify invalid accessions that need to be replaced -rfblast.py validate - -# manually upload the file `invalid.fa` to NCBI BLAST -# download results in the `Single-file JSON` format - -# replace invalid accessions in the SEED file -rfblast.py replace XXXXXXXXXXX-Alignment.json - -# where XXXXXXXXXXX-Alignment.json is the NCBI BLAST result. -""" - -import json -import os -import re - -import click -import requests - -IDENTITY = 90 -QUERY_COVERAGE = 70 - - -def get_accession(gid): - """ - Get versioned accession, for example: - Input: - gi|2047803076|gb|CP061286.1| - Output: - CP061286.1 - """ - parts = gid.split('|') - return parts[-2] - - -def get_blast_data(filename): - """ - Load BLAST JSON output. - """ - with open(filename, 'r') as f: - return json.load(f) - - -def choose_replacement(data, min_identity, min_query_coverage): - """ - Loop over BLAST results and pick best replacement for each hit. - """ - # do not pick replacement from the same accession if already seen - fasta = '' - seen_accessions = set() - for query_num, search in enumerate(data['BlastOutput2']): - query_title = search['report']['results']['search']['query_title'] - query_len = search['report']['results']['search']['query_len'] - replacement_found = False - for entry in search['report']['results']['search']['hits']: - acc = get_accession(entry['description'][0]['id']) - if acc not in seen_accessions: - seen_accessions.add(acc) - replacement_found = True - else: - continue - sequence = entry['hsps'][0]['hseq'] - start = entry['hsps'][0]['hit_from'] - end = entry['hsps'][0]['hit_to'] - align_len = entry['hsps'][0]['align_len'] - gaps = entry['hsps'][0]['gaps'] - exact_matches = entry['hsps'][0]['identity'] - identity = float(exact_matches) / align_len * 100 - query_coverage = float(align_len - gaps) / query_len * 100 - target_coverage = float(align_len - gaps) / len(sequence) * 100 - if identity >= min_identity and query_coverage >= min_query_coverage: - warning = False - else: - warning = True - summary = ('#{query_num} {message} {query_title} ' - 'with {acc}/{start}-{end} at {identity}% identity; ' - '{gaps} gaps; query coverage {query_coverage}').format( - acc=acc, start=start, end=end, query_title=query_title, - identity=round(identity), query_coverage=round(query_coverage), - target_coverage=round(target_coverage, 2), gaps=gaps, - message='Replace' if not warning else ' WARNING: No replacement found for', - query_num=query_num+1 - ) - print(summary) - if not warning: - fasta += '>{acc}/{start}-{end}\n{sequence}\n'.format( - acc=acc, - start=start, - end=end, - sequence=sequence.replace('-', '').replace('T', 'U') - ) - if replacement_found: - break - return fasta - - -def generate_new_seed(fasta): - filename = 'replacement.fasta' - with open(filename, 'w') as f: - f.write(fasta) - if not os.path.exists('CM'): - cmd = 'rfsearch.pl -t 30 -nodesc -relax' - os.system(cmd) - if not os.path.exists('SEED'): - raise Exception('Error: SEED file not found') - cmd = ('cmalign --mapali SEED --noprob CM {} > tempseed && ' - 'esl-reformat pfam tempseed > NEWSEEDtemp && ' - 'rm tempseed').format(filename) - os.system(cmd) - invalid = set() - with open('invalid.txt', 'r') as f: - for line in f: - invalid.add(line.strip()) - newseed = open('NEWSEED', 'w') - with open('NEWSEEDtemp', 'r') as f: - for line in f: - skip = False - for invalid_accession in invalid: - if invalid_accession in line: - skip = True - break - if not skip: - newseed.write(line) - newseed.close() - os.remove('NEWSEEDtemp') - cmd = ('echo "Old SEED info:" && esl-alistat SEED && ' - 'echo "New SEED info:" && esl-alistat NEWSEED') - os.system(cmd) - - - -def is_valid_accession(accession): - """ - TB03JUN2009E__Contig_2000/988-772 - NZ_CP007501.1/771730-771924 - - Found: - {"header":{"type":"esearch","version":"0.3"},"esearchresult":{"count":"1","retmax":"1","retstart":"0","idlist":["1119664412"],"translationset":[],"querytranslation":""}} - Found but a different ID: - {"header":{"type":"esearch","version":"0.3"},"esearchresult":{"count":"1","retmax":"1","retstart":"0","idlist":["EP994606.1"],"translationset":[],"translationstack":[{"term":"JCVI_SCAF_1096627298421[All Fields]","field":"All Fields","count":"1","explode":"N"},"GROUP"],"querytranslation":"JCVI_SCAF_1096627298421[All Fields]"}} - Not found: - {"header":{"type":"esearch","version":"0.3"},"esearchresult":{"count":"0","retmax":"0","retstart":"0","idlist":[],"translationset":[],"querytranslation":"(TB03JUN2009E__Contig_2000[All Fields])","errorlist":{"phrasesnotfound":["TB03JUN2009E__Contig_2000"],"fieldsnotfound":[]},"warninglist":{"phrasesignored":[],"quotedphrasesnotfound":[],"outputmessages":["No items found."]}}} - """ - parts = accession.split('/') - url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=nucleotide&term={}&retmode=json&idtype=acc' - r = requests.get(url.format(parts[0])) - data = r.json() - if int(data['esearchresult']['count']) == 1 and len(data['esearchresult']['idlist']) > 0 and data['esearchresult']['idlist'][0] == parts[0]: - return True - else: - return False - - -def fetch_seqs(filename, accessions, label): - """ - """ - f_txt = '{}.txt'.format(label) - f_fa = '{}.fa'.format(label) - with open(f_txt, 'w') as f: - for accession in accessions: - f.write(accession + '\n') - cmd = 'esl-sfetch -f {} {} > {}'.format(filename, f_txt, f_fa) - os.system(cmd) - click.echo('Saved {} {} accessions in {}'.format(len(accessions), label, f_fa)) - - -def parse_fasta(filename): - """ - """ - accessions = set() - with open(filename, 'r') as f: - for line in f: - if line.startswith('>'): - parts = re.split(r'\s+', line) - accession = parts[0].replace('>', '') - accessions.add(accession) - valid = set() - invalid = set() - for accession in accessions: - if is_valid_accession(accession): - click.echo('{} is valid'.format(accession)) - valid.add(accession) - else: - click.echo('{} is invalid'.format(accession)) - invalid.add(accession) - os.system('esl-sfetch --index {}'.format(filename)) - if valid: - fetch_seqs(filename, valid, 'valid') - else: - click.echo('No valid accessions found') - if invalid: - fetch_seqs(filename, invalid, 'invalid') - click.echo('=========================\nGenerated file invalid.fa') - os.system('esl-seqstat {}'.format('invalid.fa')) - click.echo('Upload file invalid.fa to NCBI BLAST') - else: - click.echo('No invalid accessions found') - - -@click.group() -def cli(): - pass - - -@cli.command() -def validate(): - """ - Convert SEED to a fasta file containing sequences with unknown IDs. - """ - if not os.path.exists('SEED'): - raise Exception('Error: SEED does not exist') - fasta = 'seed.fasta' - cmd = 'esl-reformat fasta SEED > {}'.format(fasta) - os.system(cmd) - parse_fasta(fasta) - -@cli.command() -def blast_invalid_sequences(file_path, blast_program='blastn'): - """ - Upload the file `invalid.fa` to NCBI BLAST and download results in the `Single-file JSON` format - """ - - blast_url = f'https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Put&PROGRAM={blast_program}' - - with open(file_path, 'rb') as file: - file_content = file.read() - - payload = { - 'QUERY': file_content, - 'FORMAT_TYPE': 'HTML', - } - - response = requests.post(blast_url, data=payload) - return response.text - - - -@cli.command() -@click.argument('blast_json', type=click.Path(exists=True)) -@click.option('--identity', default=IDENTITY, type=click.FLOAT, help='Minimum % identity between query and target') -@click.option('--query_coverage', default=QUERY_COVERAGE, type=click.FLOAT, help='Minimum coverage of the seed sequence') -def replace(blast_json, identity, query_coverage): - """ - Replace unknown accessions in SEED alignment using NCBI BLAST results - """ - blast_data = get_blast_data(blast_json) - fasta = choose_replacement(blast_data, identity, query_coverage) - generate_new_seed(fasta) - click.echo('Done') - -@cli.command() -def all(): - validate() - blast_result = blast_invalid_sequences('invalid.fa') - replace(blast_json=blast_result) - -if __name__ == '__main__': - cli()