diff --git a/.ci/flake8_ignorelist.txt b/.ci/flake8_ignorelist.txt index f1cffc4f5952..d4ced27b5b8d 100644 --- a/.ci/flake8_ignorelist.txt +++ b/.ci/flake8_ignorelist.txt @@ -11,6 +11,7 @@ database doc/build eggs lib/galaxy/web/proxy/js/node_modules +lib/tool_shed/test/test_data/repos static/maps static/scripts test/functional/tools/cwl_tools/v1.?/ diff --git a/.flake8 b/.flake8 index fe463fb975e1..3e086e9d1287 100644 --- a/.flake8 +++ b/.flake8 @@ -7,3 +7,4 @@ # W503 is line breaks before binary operators, which has been reversed in PEP 8. # D** are docstring linting - which we mostly ignore except D302. (Hopefully we will solve more over time). ignore = B008,E203,E402,E501,W503,D100,D101,D102,D103,D104,D105,D106,D107,D200,D201,D202,D204,D205,D206,D207,D208,D209,D210,D211,D300,D301,D400,D401,D402,D403,D412,D413 +exclude = lib/tool_shed/test/test_data/repos diff --git a/.isort.cfg b/.isort.cfg index c99585f7f278..ca490cd2930a 100644 --- a/.isort.cfg +++ b/.isort.cfg @@ -10,5 +10,5 @@ profile=black reverse_relative=true skip_gitignore=true # Make isort run faster by skipping database -skip_glob=database/* +skip_glob=database/*,lib/tool_shed/test/test_data/repos/* src_paths=lib diff --git a/lib/tool_shed/test/base/api.py b/lib/tool_shed/test/base/api.py index 3cd998769013..b14c19dd7c15 100644 --- a/lib/tool_shed/test/base/api.py +++ b/lib/tool_shed/test/base/api.py @@ -35,7 +35,7 @@ class ShedBaseTestCase(DrivenFunctionalTestCase): @property def populator(self) -> ToolShedPopulator: if self._populator is None: - self._populator = ToolShedPopulator(self.admin_api_interactor, self.api_interactor) + self._populator = self._get_populator(self.api_interactor) return self._populator @property @@ -50,7 +50,18 @@ def api_interactor(self) -> ShedApiInteractor: password = "testpassword" ensure_user_with_email(self.admin_api_interactor, email, password) user_api_key = self._api_key(email, password) - return ShedApiInteractor(self.url, user_api_key) + return self._api_interactor(user_api_key) + + def _api_interactor_by_credentials(self, email: str, password: str) -> ShedApiInteractor: + ensure_user_with_email(self.admin_api_interactor, email, password) + user_api_key = self._api_key(email, password) + return self._api_interactor(user_api_key) + + def _api_interactor(self, api_key: str) -> ShedApiInteractor: + return ShedApiInteractor(self.url, api_key) + + def _get_populator(self, user_api_interactor) -> ToolShedPopulator: + return ToolShedPopulator(self.admin_api_interactor, user_api_interactor) def _api_key(self, email: str, password: str) -> str: headers = baseauth_headers(email, password) diff --git a/lib/tool_shed/test/base/populators.py b/lib/tool_shed/test/base/populators.py index 01f3f6c0e870..2dd53dc27537 100644 --- a/lib/tool_shed/test/base/populators.py +++ b/lib/tool_shed/test/base/populators.py @@ -81,21 +81,52 @@ def __init__(self, admin_api_interactor: ShedApiInteractor, api_interactor: Shed self._admin_api_interactor = admin_api_interactor self._api_interactor = api_interactor - def setup_test_data_repo(self, test_data_path: str) -> Repository: - prefix = test_data_path.replace("_", "") - category_id = self.new_category(prefix=prefix).id - repository = self.new_repository(category_id, prefix=prefix) - repository_id = repository.id + def setup_bismark_repo(self, repository_id: HasRepositoryId, end: Optional[int] = None): + self.setup_test_data_repo_by_id("bismark", repository_id, assert_ok=False, end=end) + + def setup_test_data_repo_by_id( + self, + test_data_path: str, + repository_id: Optional[HasRepositoryId] = None, + assert_ok=True, + start: int = 0, + end: Optional[int] = None, + ) -> HasRepositoryId: + if repository_id is None: + prefix = test_data_path.replace("_", "") + category_id = self.new_category(prefix=prefix).id + repository = self.new_repository(category_id, prefix=prefix) + repository_id = repository.id + assert repository_id for index, repo_tar in enumerate(repo_tars(test_data_path)): + if index < start: + continue + + if end and index >= end: + break + commit_message = f"Updating {test_data_path} with index {index} with tar {repo_tar}" - response = self.upload_revision( - repository_id, - repo_tar, - commit_message=commit_message, - ) - assert response.is_ok + response = self.upload_revision_raw(repository_id, repo_tar, commit_message) + if assert_ok: + api_asserts.assert_status_code_is_ok(response) + assert RepositoryUpdate(__root__=response.json()).is_ok + return repository_id + + def setup_test_data_repo( + self, + test_data_path: str, + repository: Optional[Repository] = None, + assert_ok=True, + start: int = 0, + end: Optional[int] = None, + ) -> Repository: + if repository is None: + prefix = test_data_path.replace("_", "") + category_id = self.new_category(prefix=prefix).id + repository = self.new_repository(category_id, prefix=prefix) + self.setup_test_data_repo_by_id(test_data_path, repository, assert_ok=assert_ok, start=start, end=end) return repository def setup_column_maker_repo(self, prefix=DEFAULT_PREFIX) -> Repository: @@ -152,7 +183,7 @@ def upload_revision_raw( def upload_revision( self, repository: HasRepositoryId, path: Traversable, commit_message: str = DEFAULT_COMMIT_MESSAGE ): - response = self.upload_revision_raw(repository, path, commit_message) + response = self.upload_revision_raw(repository, path, commit_message=commit_message) if response.status_code != 200: response_json = None err_msg = None diff --git a/lib/tool_shed/test/base/twilltestcase.py b/lib/tool_shed/test/base/twilltestcase.py index 3961f1b00ebb..89743aff6877 100644 --- a/lib/tool_shed/test/base/twilltestcase.py +++ b/lib/tool_shed/test/base/twilltestcase.py @@ -1,4 +1,5 @@ import abc +import contextlib import logging import os import shutil @@ -11,6 +12,7 @@ from typing import ( Any, Dict, + Iterator, List, Optional, ) @@ -59,6 +61,8 @@ hgweb_config, xml_util, ) +from tool_shed.test.base.populators import TEST_DATA_REPO_FILES +from tool_shed.util.repository_content_util import tar_open from tool_shed.webapp.model import Repository as DbRepository from tool_shed_client.schema import ( Category, @@ -716,6 +720,12 @@ def last_page(self): def last_url(self): return tc.browser.url + def user_api_interactor(self, email="test@bx.psu.edu", password="testuser"): + return self._api_interactor_by_credentials(email, password) + + def user_populator(self, email="test@bx.psu.edu", password="testuser"): + return self._get_populator(self.user_api_interactor(email=email, password=password)) + def login( self, email="test@bx.psu.edu", password="testuser", username="admin-user", redirect="", logout_first=True ): @@ -1025,7 +1035,7 @@ def create_repository_dependency( repository_names = [] if complex: filename = "tool_dependencies.xml" - self.generate_complex_dependency_xml( + target = self.generate_complex_dependency_xml( filename=filename, filepath=filepath, repository_tuples=repository_tuples, @@ -1037,55 +1047,115 @@ def create_repository_dependency( repository_names.append(name) dependency_description = f"{repository.name} depends on {', '.join(repository_names)}." filename = "repository_dependencies.xml" - self.generate_simple_dependency_xml( + target = self.generate_simple_dependency_xml( repository_tuples=repository_tuples, filename=filename, filepath=filepath, dependency_description=dependency_description, prior_installation_required=prior_installation_required, ) - self.upload_file( - repository, - filename=filename, - filepath=filepath, - valid_tools_only=False, - uncompress_file=False, - remove_repo_files_not_in_tar=False, - commit_message=f"Uploaded dependency on {', '.join(repository_names)}.", - strings_displayed=None, - strings_not_displayed=None, - ) + self.add_file_to_repository(repository, target, filename, strings_displayed=strings_displayed) def deactivate_repository(self, installed_repository: galaxy_model.ToolShedRepository) -> None: assert self._installation_client self._installation_client.deactivate_repository(installed_repository) - def delete_files_from_repository(self, repository: Repository, filenames: List[str]): + @contextlib.contextmanager + def cloned_repo(self, repository: Repository) -> Iterator[str]: temp_directory = tempfile.mkdtemp(prefix="toolshedrepowithoutfiles") try: self.clone_repository(repository, temp_directory) - for filename in filenames: - to_delete = os.path.join(temp_directory, filename) - os.remove(to_delete) shutil.rmtree(os.path.join(temp_directory, ".hg")) - tf = tempfile.NamedTemporaryFile() - with tarfile.open(tf.name, "w:gz") as tar: - tar.add(temp_directory, arcname="repo") - target = os.path.abspath(tf.name) - self.upload_file( - repository, - filename=os.path.basename(target), - filepath=os.path.dirname(target), - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=True, - commit_message="Uploaded revision with deleted files.", - strings_displayed=[], - strings_not_displayed=[], - ) + contents = os.listdir(temp_directory) + if len(contents) == 1 and contents[0] == "repo": + yield os.path.join(temp_directory, "repo") + else: + yield temp_directory finally: shutil.rmtree(temp_directory) + def setup_freebayes_0010_repo(self, repository: Repository): + strings_displayed = [ + "Metadata may have been defined", + "This file requires an entry", + "tool_data_table_conf", + ] + self.add_file_to_repository(repository, "freebayes/freebayes.xml", strings_displayed=strings_displayed) + strings_displayed = ["Upload a file named sam_fa_indices.loc.sample"] + self.add_file_to_repository(repository, "freebayes/tool_data_table_conf.xml.sample", strings_displayed=strings_displayed) + self.add_file_to_repository(repository, "freebayes/sam_fa_indices.loc.sample") + target = os.path.join("freebayes", "malformed_tool_dependencies", "tool_dependencies.xml") + self.add_file_to_repository( + repository, target, strings_displayed=["Exception attempting to parse", "invalid element name"] + ) + target = os.path.join("freebayes", "invalid_tool_dependencies", "tool_dependencies.xml") + strings_displayed = [ + "The settings for name, version and type from a contained tool configuration" + ] + # , strings_displayed=strings_displayed + self.add_file_to_repository(repository, target) + target = os.path.join("freebayes", "tool_dependencies.xml") + self.add_file_to_repository(repository, target) + + def add_file_to_repository( + self, repository: Repository, source: str, target: Optional[str] = None, strings_displayed=None, commit_message: Optional[str] = None + ): + with self.cloned_repo(repository) as temp_directory: + if target is None: + target = os.path.basename(source) + full_target = os.path.join(temp_directory, target) + full_source = TEST_DATA_REPO_FILES.joinpath(source) + shutil.copyfile(str(full_source), full_target) + commit_message = commit_message or "Uploaded revision with added file." + self._upload_dir_to_repository(repository, temp_directory, commit_message=commit_message, strings_displayed=strings_displayed) + + def add_tar_to_repository( + self, repository: Repository, source: str, strings_displayed=None + ): + with self.cloned_repo(repository) as temp_directory: + full_source = TEST_DATA_REPO_FILES.joinpath(source) + tar = tar_open(full_source) + tar.extractall(path=temp_directory) + tar.close() + commit_message = "Uploaded revision with added files from tar." + self._upload_dir_to_repository(repository, temp_directory, commit_message=commit_message, strings_displayed=strings_displayed) + + def commit_tar_to_repository( + self, repository: Repository, source: str, commit_message=None, strings_displayed=None + ): + full_source = TEST_DATA_REPO_FILES.joinpath(source) + assert full_source.is_file(), f"Attempting to upload {full_source} as a tar which is not a file" + populator = self.user_populator() + if strings_displayed is None: + # Just assume this is a valid upload... + populator.upload_revision(repository, full_source, commit_message=commit_message) + else: + response = populator.upload_revision_raw(repository, full_source, commit_message=commit_message) + try: + text = response.json()["message"] + except Exception: + text = response.text + for string_displayed in strings_displayed: + if string_displayed not in text: + raise AssertionError(f"Failed to find {string_displayed} in JSON response {text}") + + def delete_files_from_repository(self, repository: Repository, filenames: List[str]): + with self.cloned_repo(repository) as temp_directory: + for filename in filenames: + to_delete = os.path.join(temp_directory, filename) + os.remove(to_delete) + commit_message = "Uploaded revision with deleted files." + self._upload_dir_to_repository(repository, temp_directory, commit_message=commit_message) + + def _upload_dir_to_repository(self, repository: Repository, target, commit_message, strings_displayed=None): + tf = tempfile.NamedTemporaryFile() + with tarfile.open(tf.name, "w:gz") as tar: + tar.add(target, arcname=".") + target = os.path.abspath(tf.name) + self.commit_tar_to_repository( + repository, target, commit_message=commit_message, strings_displayed=strings_displayed + ) + def delete_repository(self, repository: Repository) -> None: repository_id = repository.id self.visit_url("/admin/browse_repositories") @@ -1241,6 +1311,7 @@ def generate_complex_dependency_xml(self, filename, filepath, repository_tuples, ) # Save the generated xml to the specified location. open(file_path, "w").write(repository_dependency_xml) + return file_path def generate_simple_dependency_xml( self, @@ -1282,6 +1353,7 @@ def generate_simple_dependency_xml( # Save the generated xml to the specified location. full_path = os.path.join(filepath, filename) open(full_path, "w").write(repository_dependency_xml) + return full_path def generate_temp_path(self, test_script_path, additional_paths=None): additional_paths = additional_paths or [] @@ -1417,6 +1489,10 @@ def get_repository_tip(self, repository: Repository) -> str: repo = self.get_hg_repo(self.get_repo_path(repository)) return str(repo[repo.changelog.tip()]) + def get_repository_first_revision(self, repository: Repository) -> str: + repo = self.get_hg_repo(self.get_repo_path(repository)) + return str(repo[0]) + def _get_metadata_revision_count(self, repository: Repository) -> int: repostiory_metadata: RepositoryMetadata = self.populator.get_metadata(repository, downloadable_only=False) return len(repostiory_metadata.__root__) @@ -1745,97 +1821,6 @@ def update_installed_repository( assert self._installation_client return self._installation_client.update_installed_repository(installed_repository, verify_no_updates=False) - def upload_file( - self, - repository: Repository, - filename, - filepath, - valid_tools_only, - uncompress_file, - remove_repo_files_not_in_tar, - commit_message, - strings_displayed=None, - strings_not_displayed=None, - ): - if strings_displayed is None: - strings_displayed = [] - if strings_not_displayed is None: - strings_not_displayed = [] - removed_message = "files were removed from the repository" - if remove_repo_files_not_in_tar: - if not self.repository_is_new(repository): - if removed_message not in strings_displayed: - strings_displayed.append(removed_message) - else: - if removed_message not in strings_not_displayed: - strings_not_displayed.append(removed_message) - params = {"repository_id": repository.id} - self.visit_url("/upload/upload", params=params) - if valid_tools_only: - strings_displayed.extend(["has been successfully", "uploaded to the repository."]) - tc.formfile("1", "file_data", self.get_filename(filename, filepath)) - if uncompress_file: - tc.fv(1, "uncompress_file", "Yes") - else: - tc.fv(1, "uncompress_file", "No") - if not self.repository_is_new(repository): - if remove_repo_files_not_in_tar: - tc.fv(1, "remove_repo_files_not_in_tar", "Yes") - else: - tc.fv(1, "remove_repo_files_not_in_tar", "No") - tc.fv(1, "commit_message", commit_message) - tc.submit("upload_button") - self.check_for_strings(strings_displayed, strings_not_displayed) - # Uncomment this if it becomes necessary to wait for an asynchronous process to complete after submitting an upload. - # for i in range( 5 ): - # try: - # self.check_for_strings( strings_displayed, strings_not_displayed ) - # break - # except Exception as e: - # if i == 4: - # raise e - # else: - # time.sleep( 1 ) - # continue - - def upload_url( - self, - repository, - url, - filepath, - valid_tools_only, - uncompress_file, - remove_repo_files_not_in_tar, - commit_message, - strings_displayed=None, - strings_not_displayed=None, - ): - removed_message = "files were removed from the repository" - if remove_repo_files_not_in_tar: - if not self.repository_is_new(repository): - if removed_message not in strings_displayed: - strings_displayed.append(removed_message) - else: - if removed_message not in strings_not_displayed: - strings_not_displayed.append(removed_message) - params = {"repository_id": repository.id} - self.visit_url("/upload/upload", params=params) - if valid_tools_only: - strings_displayed.extend(["has been successfully", "uploaded to the repository."]) - tc.fv("1", "url", url) - if uncompress_file: - tc.fv(1, "uncompress_file", "Yes") - else: - tc.fv(1, "uncompress_file", "No") - if not self.repository_is_new(repository): - if remove_repo_files_not_in_tar: - tc.fv(1, "remove_repo_files_not_in_tar", "Yes") - else: - tc.fv(1, "remove_repo_files_not_in_tar", "No") - tc.fv(1, "commit_message", commit_message) - tc.submit("upload_button") - self.check_for_strings(strings_displayed, strings_not_displayed) - def verify_installed_repositories(self, installed_repositories=None, uninstalled_repositories=None): installed_repositories = installed_repositories or [] uninstalled_repositories = uninstalled_repositories or [] diff --git a/lib/tool_shed/test/functional/test_0000_basic_repository_features.py b/lib/tool_shed/test/functional/test_0000_basic_repository_features.py index 1bd1e51948fd..261e9c5a6187 100644 --- a/lib/tool_shed/test/functional/test_0000_basic_repository_features.py +++ b/lib/tool_shed/test/functional/test_0000_basic_repository_features.py @@ -78,17 +78,7 @@ def test_0030_grant_write_access(self): def test_0035_upload_filtering_1_1_0(self): """Upload filtering_1.1.0.tar to the repository""" repository = self._get_repository_by_name_and_owner(repository_name, common.test_user_1_name) - self.upload_file( - repository, - filename="filtering/filtering_1.1.0.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=True, - commit_message="Uploaded filtering 1.1.0", - strings_displayed=[], - strings_not_displayed=[], - ) + self.commit_tar_to_repository(repository, "filtering/filtering_1.1.0.tar", commit_message="Uploaded filtering 1.1.0") def test_0040_verify_repository(self): """Display basic repository pages""" @@ -164,17 +154,7 @@ def test_0050_display_repository_tip_file(self): def test_0055_upload_filtering_txt_file(self): """Upload filtering.txt file associated with tool version 1.1.0.""" repository = self._get_repository_by_name_and_owner(repository_name, common.test_user_1_name) - self.upload_file( - repository, - filename="filtering/filtering_0000.txt", - filepath=None, - valid_tools_only=True, - uncompress_file=False, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded filtering.txt", - strings_displayed=[], - strings_not_displayed=[], - ) + self.add_file_to_repository(repository, "filtering/filtering_0000.txt") self.display_manage_repository_page( repository, strings_displayed=["Readme file for filtering 1.1.0"] ) @@ -182,17 +162,7 @@ def test_0055_upload_filtering_txt_file(self): def test_0060_upload_filtering_test_data(self): """Upload filtering test data.""" repository = self._get_repository_by_name_and_owner(repository_name, common.test_user_1_name) - self.upload_file( - repository, - filename="filtering/filtering_test_data.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded filtering test data", - strings_displayed=[], - strings_not_displayed=[], - ) + self.add_tar_to_repository(repository, "filtering/filtering_test_data.tar") self.display_repository_file_contents( repository=repository, filename="1.bed", @@ -205,17 +175,7 @@ def test_0060_upload_filtering_test_data(self): def test_0065_upload_filtering_2_2_0(self): """Upload filtering version 2.2.0""" repository = self._get_repository_by_name_and_owner(repository_name, common.test_user_1_name) - self.upload_file( - repository, - filename="filtering/filtering_2.2.0.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded filtering 2.2.0", - strings_displayed=[], - strings_not_displayed=[], - ) + self.add_tar_to_repository(repository, "filtering/filtering_2.2.0.tar") def test_0070_verify_filtering_repository(self): """Verify the new tool versions and repository metadata.""" @@ -245,17 +205,7 @@ def test_0070_verify_filtering_repository(self): def test_0075_upload_readme_txt_file(self): """Upload readme.txt file associated with tool version 2.2.0.""" repository = self._get_repository_by_name_and_owner(repository_name, common.test_user_1_name) - self.upload_file( - repository, - filename="readme.txt", - filepath=None, - valid_tools_only=True, - uncompress_file=False, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded readme.txt", - strings_displayed=[], - strings_not_displayed=[], - ) + self.add_file_to_repository(repository, "readme.txt") self.display_manage_repository_page( repository, strings_displayed=["This is a readme file."] ) @@ -378,17 +328,7 @@ def test_0125_upload_new_readme_file(self): self.login(email=common.test_user_1_email, username=common.test_user_1_name) repository = self._get_repository_by_name_and_owner(repository_name, common.test_user_1_name) # Upload readme.txt to the filtering_0000 repository and verify that it is now displayed. - self.upload_file( - repository, - filename="filtering/readme.txt", - filepath=None, - valid_tools_only=True, - uncompress_file=False, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded new readme.txt with invalid ascii characters.", - strings_displayed=[], - strings_not_displayed=[], - ) + self.add_file_to_repository(repository, "filtering/readme.txt") self.display_manage_repository_page( repository, strings_displayed=["These characters should not"] ) diff --git a/lib/tool_shed/test/functional/test_0010_repository_with_tool_dependencies.py b/lib/tool_shed/test/functional/test_0010_repository_with_tool_dependencies.py index f4fb76e6acc7..3ec4755caed8 100644 --- a/lib/tool_shed/test/functional/test_0010_repository_with_tool_dependencies.py +++ b/lib/tool_shed/test/functional/test_0010_repository_with_tool_dependencies.py @@ -53,17 +53,8 @@ def test_0010_create_freebayes_repository_and_upload_tool_xml(self): strings_displayed=[], ) assert repository - self.upload_file( - repository, - filename="freebayes/freebayes.xml", - filepath=None, - valid_tools_only=False, - uncompress_file=False, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded the tool xml.", - strings_displayed=["Metadata may have been defined", "This file requires an entry", "tool_data_table_conf"], - strings_not_displayed=[], - ) + strings_displayed = ["Metadata may have been defined", "This file requires an entry", "tool_data_table_conf"] + self.add_file_to_repository(repository, "freebayes/freebayes.xml", strings_displayed=strings_displayed) self.display_manage_repository_page( repository, strings_displayed=["Invalid tools"], strings_not_displayed=["Valid tools"] ) @@ -78,17 +69,8 @@ def test_0015_upload_missing_tool_data_table_conf_file(self): Uploading the tool_data_table_conf.xml.sample alone should not make the tool valid, but the error message should change. """ repository = self._get_repository_by_name_and_owner(repository_name, common.test_user_1_name) - self.upload_file( - repository, - filename="freebayes/tool_data_table_conf.xml.sample", - filepath=None, - valid_tools_only=False, - uncompress_file=False, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded the tool data table sample file.", - strings_displayed=[], - strings_not_displayed=[], - ) + strings_displayed = ["Upload a file named sam_fa_indices.loc.sample"] + self.add_file_to_repository(repository, "freebayes/tool_data_table_conf.xml.sample", strings_displayed=strings_displayed) self.display_manage_repository_page( repository, strings_displayed=["Invalid tools"], strings_not_displayed=["Valid tools"] ) @@ -103,17 +85,7 @@ def test_0020_upload_missing_sample_loc_file(self): Uploading the tool_data_table_conf.xml.sample alone should not make the tool valid, but the error message should change. """ repository = self._get_repository_by_name_and_owner(repository_name, common.test_user_1_name) - self.upload_file( - repository, - filename="freebayes/sam_fa_indices.loc.sample", - filepath=None, - valid_tools_only=True, - uncompress_file=False, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded tool data table .loc file.", - strings_displayed=[], - strings_not_displayed=[], - ) + self.add_file_to_repository(repository, "freebayes/sam_fa_indices.loc.sample") def test_0025_upload_malformed_tool_dependency_xml(self): """Upload tool_dependencies.xml with bad characters in the readme tag. @@ -122,16 +94,9 @@ def test_0025_upload_malformed_tool_dependency_xml(self): Upload a tool_dependencies.xml file that contains <> in the text of the readme tag. This should show an error message about malformed xml. """ repository = self._get_repository_by_name_and_owner(repository_name, common.test_user_1_name) - self.upload_file( - repository, - filename=os.path.join("freebayes", "malformed_tool_dependencies", "tool_dependencies.xml"), - filepath=None, - valid_tools_only=False, - uncompress_file=False, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded malformed tool dependency XML.", - strings_displayed=["Exception attempting to parse", "invalid element name"], - strings_not_displayed=[], + target = os.path.join("freebayes", "malformed_tool_dependencies", "tool_dependencies.xml") + self.add_file_to_repository( + repository, target, strings_displayed=["Exception attempting to parse", "invalid element name"] ) def test_0030_upload_invalid_tool_dependency_xml(self): @@ -141,19 +106,11 @@ def test_0030_upload_invalid_tool_dependency_xml(self): This should result in a message about the tool dependency configuration not matching the tool's requirements. """ repository = self._get_repository_by_name_and_owner(repository_name, common.test_user_1_name) - self.upload_file( - repository, - filename=os.path.join("freebayes", "invalid_tool_dependencies", "tool_dependencies.xml"), - filepath=None, - valid_tools_only=False, - uncompress_file=False, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded invalid tool dependency XML.", - strings_displayed=[ - "The settings for name, version and type from a contained tool configuration" - ], - strings_not_displayed=[], - ) + target = os.path.join("freebayes", "invalid_tool_dependencies", "tool_dependencies.xml") + strings_displayed = [ + "The settings for name, version and type from a contained tool configuration" + ] + self.add_file_to_repository(repository, target, strings_displayed=strings_displayed) def test_0035_upload_valid_tool_dependency_xml(self): """Upload tool_dependencies.xml defining version 0.9.4_9696d0ce8a962f7bb61c4791be5ce44312b81cf8 of the freebayes package. @@ -162,17 +119,8 @@ def test_0035_upload_valid_tool_dependency_xml(self): At this stage, there should be no errors on the upload page, as every missing or invalid file has been corrected. """ repository = self._get_repository_by_name_and_owner(repository_name, common.test_user_1_name) - self.upload_file( - repository, - filename=os.path.join("freebayes", "tool_dependencies.xml"), - filepath=None, - valid_tools_only=True, - uncompress_file=False, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded valid tool dependency XML.", - strings_displayed=[], - strings_not_displayed=[], - ) + target = os.path.join("freebayes", "tool_dependencies.xml") + self.add_file_to_repository(repository, target) def test_0040_verify_tool_dependencies(self): """Verify that the uploaded tool_dependencies.xml specifies the correct package versions. diff --git a/lib/tool_shed/test/functional/test_0020_basic_repository_dependencies.py b/lib/tool_shed/test/functional/test_0020_basic_repository_dependencies.py index bd4fcd822d3a..47fe9b0a0b84 100644 --- a/lib/tool_shed/test/functional/test_0020_basic_repository_dependencies.py +++ b/lib/tool_shed/test/functional/test_0020_basic_repository_dependencies.py @@ -38,17 +38,7 @@ def test_0010_create_column_maker_repository(self): category=category, strings_displayed=[], ) - self.upload_file( - column_maker_repository, - filename="column_maker/column_maker.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded column_maker tarball.", - strings_displayed=[], - strings_not_displayed=[], - ) + self.commit_tar_to_repository(column_maker_repository, "column_maker/column_maker.tar") def test_0020_create_emboss_5_repository_and_upload_files(self): """Create and populate the emboss_5_0020 repository.""" @@ -61,17 +51,7 @@ def test_0020_create_emboss_5_repository_and_upload_files(self): category=category, strings_displayed=[], ) - self.upload_file( - repository, - filename="emboss/emboss.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded emboss.tar", - strings_displayed=[], - strings_not_displayed=[], - ) + self.commit_tar_to_repository(repository, "emboss/emboss.tar") def test_0025_generate_and_upload_repository_dependencies_xml(self): """Generate and upload the repository_dependencies.xml file""" diff --git a/lib/tool_shed/test/functional/test_0030_repository_dependency_revisions.py b/lib/tool_shed/test/functional/test_0030_repository_dependency_revisions.py index 17a403bea00f..a1a54a06efd2 100644 --- a/lib/tool_shed/test/functional/test_0030_repository_dependency_revisions.py +++ b/lib/tool_shed/test/functional/test_0030_repository_dependency_revisions.py @@ -39,16 +39,8 @@ def test_0010_create_emboss_5_repository(self): owner=common.test_user_1_name, category=category, ) - self.upload_file( - repository, - filename="emboss/emboss.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=False, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded tool tarball.", - strings_displayed=[], - strings_not_displayed=[], + self.commit_tar_to_repository( + repository, "emboss/emboss.tar", commit_message="Uploaded tool tarball." ) def test_0015_create_emboss_6_repository(self): @@ -62,16 +54,8 @@ def test_0015_create_emboss_6_repository(self): owner=common.test_user_1_name, category=category, ) - self.upload_file( - repository, - filename="emboss/emboss.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=False, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded tool tarball.", - strings_displayed=[], - strings_not_displayed=[], + self.commit_tar_to_repository( + repository, "emboss/emboss.tar", commit_message="Uploaded tool tarball.", ) def test_0020_create_dependent_repository(self): @@ -86,16 +70,8 @@ def test_0020_create_dependent_repository(self): category=category, ) if self.repository_is_new(repository): - self.upload_file( - repository, - filename="column_maker/column_maker.tar", - filepath=None, - valid_tools_only=False, - uncompress_file=True, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded bismark tarball.", - strings_displayed=[], - strings_not_displayed=[], + self.commit_tar_to_repository( + repository, "column_maker/column_maker.tar", commit_message="Uploaded bismark tarball.", ) def test_0025_create_emboss_repository(self): @@ -109,16 +85,8 @@ def test_0025_create_emboss_repository(self): owner=common.test_user_1_name, category=category, ) - self.upload_file( - repository, - filename="emboss/emboss.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=False, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded the tool tarball.", - strings_displayed=[], - strings_not_displayed=[], + self.commit_tar_to_repository( + repository, "emboss/emboss.tar", commit_message="Uploaded the tool tarball.", ) def test_0030_generate_repository_dependencies_for_emboss_5(self): diff --git a/lib/tool_shed/test/functional/test_0040_repository_circular_dependencies.py b/lib/tool_shed/test/functional/test_0040_repository_circular_dependencies.py index c469597b738b..d0382b3de0cc 100644 --- a/lib/tool_shed/test/functional/test_0040_repository_circular_dependencies.py +++ b/lib/tool_shed/test/functional/test_0040_repository_circular_dependencies.py @@ -42,16 +42,10 @@ def test_0010_create_freebayes_repository(self): category=category, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( repository, - filename="freebayes/freebayes.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "freebayes/freebayes.tar", commit_message="Uploaded the tool tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0015_create_filtering_repository(self): @@ -66,16 +60,10 @@ def test_0015_create_filtering_repository(self): category=category, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( repository, - filename="filtering/filtering_1.1.0.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "filtering/filtering_1.1.0.tar", commit_message="Uploaded the tool tarball for filtering 1.1.0.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0020_create_dependency_on_freebayes(self): diff --git a/lib/tool_shed/test/functional/test_0050_circular_dependencies_4_levels.py b/lib/tool_shed/test/functional/test_0050_circular_dependencies_4_levels.py index 1178402c6b2c..6a0609262b8b 100644 --- a/lib/tool_shed/test/functional/test_0050_circular_dependencies_4_levels.py +++ b/lib/tool_shed/test/functional/test_0050_circular_dependencies_4_levels.py @@ -51,16 +51,10 @@ def test_0005_create_convert_repository(self): category=category, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( repository, - filename="convert_chars/convert_chars.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "convert_chars/convert_chars.tar", commit_message="Uploaded convert_chars tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0010_create_column_repository(self): @@ -74,16 +68,10 @@ def test_0010_create_column_repository(self): category=category, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( repository, - filename="column_maker/column_maker.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "column_maker/column_maker.tar", commit_message="Uploaded column_maker tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0015_create_emboss_datatypes_repository(self): @@ -101,16 +89,10 @@ def test_0020_create_emboss_repository(self): category=category, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( repository, - filename="emboss/emboss.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "emboss/emboss.tar", commit_message="Uploaded emboss tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0025_create_filtering_repository(self): @@ -124,16 +106,10 @@ def test_0025_create_filtering_repository(self): category=category, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( filtering_repository, - filename="filtering/filtering_1.1.0.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "filtering/filtering_1.1.0.tar", commit_message="Uploaded filtering 1.1.0 tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0030_create_freebayes_repository(self): @@ -147,16 +123,10 @@ def test_0030_create_freebayes_repository(self): category=category, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( repository, - filename="freebayes/freebayes.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "freebayes/freebayes.tar", commit_message="Uploaded freebayes tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0035_create_bismark_repository(self): @@ -170,17 +140,7 @@ def test_0035_create_bismark_repository(self): category=category, strings_displayed=[], ) - self.upload_file( - repository, - filename="bismark/bismark.tar", - filepath=None, - valid_tools_only=False, - uncompress_file=True, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded bismark tarball.", - strings_displayed=[], - strings_not_displayed=[], - ) + self.user_populator().setup_bismark_repo(repository, end=1) def test_0040_create_and_upload_dependency_definitions(self): column_repository = self._get_repository_by_name_and_owner(column_repository_name, common.test_user_1_name) diff --git a/lib/tool_shed/test/functional/test_0070_invalid_tool.py b/lib/tool_shed/test/functional/test_0070_invalid_tool.py index b3fcd3842d5f..9462bdb3b251 100644 --- a/lib/tool_shed/test/functional/test_0070_invalid_tool.py +++ b/lib/tool_shed/test/functional/test_0070_invalid_tool.py @@ -30,30 +30,9 @@ def test_0005_create_category_and_repository(self): category=category, strings_displayed=[], ) - self.upload_file( - repository, - filename="bismark/bismark.tar", - filepath=None, - valid_tools_only=False, - uncompress_file=True, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded bismark tarball.", - strings_displayed=[], - strings_not_displayed=[], - ) + self.user_populator().setup_bismark_repo(repository) + invalid_revision = self.get_repository_first_revision(repository) self.display_manage_repository_page(repository, strings_displayed=["Invalid tools"]) - invalid_revision = self.get_repository_tip(repository) - self.upload_file( - repository, - filename="bismark/bismark_methylation_extractor.xml", - filepath=None, - valid_tools_only=False, - uncompress_file=False, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded an updated tool xml.", - strings_displayed=[], - strings_not_displayed=[], - ) valid_revision = self.get_repository_tip(repository) tool_guid = f"{self.url.replace('http://', '').rstrip('/')}/repos/user1/bismark_0070/bismark_methylation_extractor/0.7.7.3" tool_metadata_strings_displayed = [ diff --git a/lib/tool_shed/test/functional/test_0080_advanced_circular_dependencies.py b/lib/tool_shed/test/functional/test_0080_advanced_circular_dependencies.py index c00941209860..1d063d2e0f9d 100644 --- a/lib/tool_shed/test/functional/test_0080_advanced_circular_dependencies.py +++ b/lib/tool_shed/test/functional/test_0080_advanced_circular_dependencies.py @@ -35,16 +35,10 @@ def test_0005_create_column_repository(self): category=category, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( repository, - filename="column_maker/column_maker.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "column_maker/column_maker.tar", commit_message="Uploaded column_maker tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0005_create_convert_repository(self): @@ -60,16 +54,10 @@ def test_0005_create_convert_repository(self): category=category, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( repository, - filename="convert_chars/convert_chars.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "convert_chars/convert_chars.tar", commit_message="Uploaded convert_chars tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0020_create_repository_dependencies(self): diff --git a/lib/tool_shed/test/functional/test_0090_tool_search.py b/lib/tool_shed/test/functional/test_0090_tool_search.py index 566629d42fb5..2ceb5c982509 100644 --- a/lib/tool_shed/test/functional/test_0090_tool_search.py +++ b/lib/tool_shed/test/functional/test_0090_tool_search.py @@ -47,16 +47,10 @@ def test_0005_create_bwa_base_repository(self): category=category, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( repository, - filename="bwa/bwa_base.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "bwa/bwa_base.tar", commit_message="Uploaded BWA tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0010_create_bwa_color_repository(self): @@ -71,16 +65,10 @@ def test_0010_create_bwa_color_repository(self): category=category, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( repository, - filename="bwa/bwa_color.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "bwa/bwa_color.tar", commit_message="Uploaded BWA color tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0020_create_emboss_repository(self): @@ -94,16 +82,10 @@ def test_0020_create_emboss_repository(self): category=category, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( repository, - filename="emboss/emboss.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=False, - remove_repo_files_not_in_tar=False, + "emboss/emboss.tar", commit_message="Uploaded emboss tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0025_create_filtering_repository(self): @@ -117,16 +99,10 @@ def test_0025_create_filtering_repository(self): category=category, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( filtering_repository, - filename="filtering/filtering_1.1.0.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "filtering/filtering_1.1.0.tar", commit_message="Uploaded filtering 1.1.0 tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0030_create_freebayes_repository(self): @@ -140,16 +116,10 @@ def test_0030_create_freebayes_repository(self): category=category, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( repository, - filename="freebayes/freebayes.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "freebayes/freebayes.tar", commit_message="Uploaded freebayes tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0035_create_and_upload_dependency_definitions(self): diff --git a/lib/tool_shed/test/functional/test_0100_complex_repository_dependencies.py b/lib/tool_shed/test/functional/test_0100_complex_repository_dependencies.py index f7841a849445..6ff5a4a431ed 100644 --- a/lib/tool_shed/test/functional/test_0100_complex_repository_dependencies.py +++ b/lib/tool_shed/test/functional/test_0100_complex_repository_dependencies.py @@ -43,17 +43,7 @@ def test_0005_create_bwa_package_repository(self): category=category, strings_displayed=[], ) - self.upload_file( - repository, - filename="bwa/complex/tool_dependencies.xml", - filepath=None, - valid_tools_only=True, - uncompress_file=False, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded tool_dependencies.xml.", - strings_displayed=["This repository currently contains a single file named tool_dependencies.xml"], - strings_not_displayed=[], - ) + self.add_file_to_repository(repository, "bwa/complex/tool_dependencies.xml") # Visit the manage repository page for package_bwa_0_5_9_0100. self.display_manage_repository_page( repository, strings_displayed=["Tool dependencies", "will not be", "to this repository"] @@ -73,16 +63,10 @@ def test_0010_create_bwa_base_repository(self): strings_displayed=[], ) # Populate the repository named bwa_base_repository_0100 with a bwa_base tool archive. - self.upload_file( + self.commit_tar_to_repository( repository, - filename="bwa/complex/bwa_base.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "bwa/complex/bwa_base.tar", commit_message="Uploaded bwa_base.tar with tool wrapper XML, but without tool dependency XML.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0015_generate_complex_repository_dependency_invalid_shed_url(self): @@ -217,17 +201,7 @@ def test_0040_generate_tool_dependency(self): open(xml_filename, "w").write( open(old_tool_dependency).read().replace("__PATH__", self.get_filename("bwa/complex")) ) - self.upload_file( - tool_repository, - filename=xml_filename, - filepath=new_tool_dependency_path, - valid_tools_only=True, - uncompress_file=False, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded new tool_dependencies.xml.", - strings_displayed=[], - strings_not_displayed=[], - ) + self.add_file_to_repository(tool_repository, xml_filename, "tool_dependencies.xml") # Verify that the dependency display has been updated as a result of the new tool_dependencies.xml file. repository_tip = self.get_repository_tip(tool_repository) strings_displayed = ["bwa", "0.5.9", "package"] diff --git a/lib/tool_shed/test/functional/test_0110_invalid_simple_repository_dependencies.py b/lib/tool_shed/test/functional/test_0110_invalid_simple_repository_dependencies.py index a2762fe0c007..60b4ad2a0912 100644 --- a/lib/tool_shed/test/functional/test_0110_invalid_simple_repository_dependencies.py +++ b/lib/tool_shed/test/functional/test_0110_invalid_simple_repository_dependencies.py @@ -39,16 +39,10 @@ def test_0010_create_emboss_datatypes_repository_and_upload_tarball(self): category=category, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( column_maker_repository, - filename="column_maker/column_maker.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "column_maker/column_maker.tar", commit_message="Uploaded column_maker tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0020_create_emboss_5_repository_and_upload_files(self): @@ -62,16 +56,10 @@ def test_0020_create_emboss_5_repository_and_upload_files(self): category=category, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( repository, - filename="emboss/emboss.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "emboss/emboss.tar", commit_message="Uploaded emboss tool tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0025_generate_repository_dependency_with_invalid_url(self): diff --git a/lib/tool_shed/test/functional/test_0120_simple_repository_dependency_multiple_owners.py b/lib/tool_shed/test/functional/test_0120_simple_repository_dependency_multiple_owners.py index 670f61f1a25a..71faea2f7758 100644 --- a/lib/tool_shed/test/functional/test_0120_simple_repository_dependency_multiple_owners.py +++ b/lib/tool_shed/test/functional/test_0120_simple_repository_dependency_multiple_owners.py @@ -51,16 +51,10 @@ def test_0005_create_datatypes_repository(self): category=category, strings_displayed=strings_displayed, ) - self.upload_file( + self.commit_tar_to_repository( repository, - filename="blast/blast_datatypes.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "blast/blast_datatypes.tar", commit_message="Uploaded blast_datatypes tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0010_verify_datatypes_repository(self): @@ -100,16 +94,10 @@ def test_0015_create_tool_repository(self): category=category, strings_displayed=strings_displayed, ) - self.upload_file( + self.commit_tar_to_repository( repository, - filename="blast/blastxml_to_top_descr.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "blast/blastxml_to_top_descr.tar", commit_message="Uploaded blastxml_to_top_descr tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0020_verify_tool_repository(self): diff --git a/lib/tool_shed/test/functional/test_0140_tool_help_images.py b/lib/tool_shed/test/functional/test_0140_tool_help_images.py index 1bc8e1050b8c..bce376d06a6c 100644 --- a/lib/tool_shed/test/functional/test_0140_tool_help_images.py +++ b/lib/tool_shed/test/functional/test_0140_tool_help_images.py @@ -50,16 +50,10 @@ def test_0005_create_htseq_count_repository(self): strings_displayed=[], ) # Upload htseq_count.tar to the repository. - self.upload_file( + self.commit_tar_to_repository( repository, - filename="htseq_count/htseq_count.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=False, - remove_repo_files_not_in_tar=False, + "htseq_count/htseq_count.tar", commit_message="Uploaded htseq_count.tar.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0010_load_tool_page(self): diff --git a/lib/tool_shed/test/functional/test_0150_prior_installation_required.py b/lib/tool_shed/test/functional/test_0150_prior_installation_required.py index e8d832eede17..22800d42c359 100644 --- a/lib/tool_shed/test/functional/test_0150_prior_installation_required.py +++ b/lib/tool_shed/test/functional/test_0150_prior_installation_required.py @@ -49,16 +49,10 @@ def test_0005_create_convert_repository(self): category=category, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( repository, - filename="convert_chars/convert_chars.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "convert_chars/convert_chars.tar", commit_message="Uploaded convert_chars tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0010_create_column_repository(self): @@ -72,16 +66,10 @@ def test_0010_create_column_repository(self): category=category, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( repository, - filename="column_maker/column_maker.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "column_maker/column_maker.tar", commit_message="Uploaded column_maker tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0015_create_repository_dependency(self): diff --git a/lib/tool_shed/test/functional/test_0160_circular_prior_installation_required.py b/lib/tool_shed/test/functional/test_0160_circular_prior_installation_required.py index 822758a6a712..5ce2a7d0f7d6 100644 --- a/lib/tool_shed/test/functional/test_0160_circular_prior_installation_required.py +++ b/lib/tool_shed/test/functional/test_0160_circular_prior_installation_required.py @@ -48,16 +48,10 @@ def test_0005_create_convert_repository(self): category=category, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( repository, - filename="convert_chars/convert_chars.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "convert_chars/convert_chars.tar", commit_message="Uploaded convert_chars tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0010_create_column_repository(self): @@ -71,16 +65,11 @@ def test_0010_create_column_repository(self): category=category, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( repository, - filename="column_maker/column_maker.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "column_maker/column_maker.tar", commit_message="Uploaded column_maker tarball.", strings_displayed=[], - strings_not_displayed=[], ) def test_0015_create_filtering_repository(self): @@ -94,16 +83,10 @@ def test_0015_create_filtering_repository(self): category=category, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( repository, - filename="filtering/filtering_1.1.0.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "filtering/filtering_1.1.0.tar", commit_message="Uploaded filtering 1.1.0 tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0020_create_repository_dependency(self): diff --git a/lib/tool_shed/test/functional/test_0170_complex_prior_installation_required.py b/lib/tool_shed/test/functional/test_0170_complex_prior_installation_required.py index e37888b4e02f..6b2b78107609 100644 --- a/lib/tool_shed/test/functional/test_0170_complex_prior_installation_required.py +++ b/lib/tool_shed/test/functional/test_0170_complex_prior_installation_required.py @@ -52,16 +52,11 @@ def test_0005_create_matplotlib_repository(self): category=category, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( repository, - filename="package_matplotlib/package_matplotlib_1_2.tar", - filepath=None, - valid_tools_only=False, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "package_matplotlib/package_matplotlib_1_2.tar", commit_message="Uploaded matplotlib tool dependency tarball.", - strings_displayed=["This repository currently contains a single file named tool_dependencies.xml"], - strings_not_displayed=[], + strings_displayed=["tool_dependencies.xml"], ) def test_0010_create_numpy_repository(self): @@ -79,16 +74,10 @@ def test_0010_create_numpy_repository(self): category=category, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( repository, - filename="package_numpy/package_numpy_1_7.tar", - filepath=None, - valid_tools_only=False, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "package_numpy/package_numpy_1_7.tar", commit_message="Uploaded numpy tool dependency tarball.", - strings_displayed=["This repository currently contains a single file named tool_dependencies.xml"], - strings_not_displayed=[], ) def test_0015_create_complex_repository_dependency(self): @@ -120,17 +109,7 @@ def test_0015_create_complex_repository_dependency(self): new_xml_file = os.path.join(dependency_xml_path, "tool_dependencies.xml") open(new_xml_file, "w").write(original_xml.replace("", processed_xml)) # Upload the generated complex repository dependency XML to the matplotlib repository. - self.upload_file( - matplotlib_repository, - filename="tool_dependencies.xml", - filepath=dependency_xml_path, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded complex repository dependency on numpy 1.7.", - strings_displayed=[], - strings_not_displayed=[], - ) + self.add_file_to_repository(matplotlib_repository, new_xml_file, "tool_dependencies.xml") def test_0020_verify_generated_dependency(self): """Verify that matplotlib now has a package tool dependency and a complex repository dependency. diff --git a/lib/tool_shed/test/functional/test_0300_reset_all_metadata.py b/lib/tool_shed/test/functional/test_0300_reset_all_metadata.py index c920dd5c58ff..f20c29b08cbd 100644 --- a/lib/tool_shed/test/functional/test_0300_reset_all_metadata.py +++ b/lib/tool_shed/test/functional/test_0300_reset_all_metadata.py @@ -65,28 +65,12 @@ def test_0005_create_filtering_repository(self): ) if self.repository_is_new(repository): running_standalone = True - self.upload_file( + self.commit_tar_to_repository( repository, - filename="filtering/filtering_1.1.0.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "filtering/filtering_1.1.0.tar", commit_message="Uploaded filtering 1.1.0 tarball.", - strings_displayed=[], - strings_not_displayed=[], - ) - self.upload_file( - repository, - filename="filtering/filtering_2.2.0.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded filtering 2.2.0 tarball.", - strings_displayed=[], - strings_not_displayed=[], ) + self.add_tar_to_repository(repository, "filtering/filtering_2.2.0.tar") def test_0010_create_freebayes_repository(self): """Create and populate the freebayes_0010 repository.""" @@ -106,50 +90,7 @@ def test_0010_create_freebayes_repository(self): strings_displayed=[], ) if running_standalone: - self.upload_file( - repository, - filename="freebayes/freebayes.xml", - filepath=None, - valid_tools_only=False, - uncompress_file=True, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded freebayes.xml.", - strings_displayed=[], - strings_not_displayed=[], - ) - self.upload_file( - repository, - filename="freebayes/tool_data_table_conf.xml.sample", - filepath=None, - valid_tools_only=False, - uncompress_file=True, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded tool_data_table_conf.xml.sample", - strings_displayed=[], - strings_not_displayed=[], - ) - self.upload_file( - repository, - filename="freebayes/sam_fa_indices.loc.sample", - filepath=None, - valid_tools_only=False, - uncompress_file=True, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded sam_fa_indices.loc.sample", - strings_displayed=[], - strings_not_displayed=[], - ) - self.upload_file( - repository, - filename="freebayes/tool_dependencies.xml", - filepath=None, - valid_tools_only=False, - uncompress_file=True, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded tool_dependencies.xml", - strings_displayed=[], - strings_not_displayed=[], - ) + self.setup_freebayes_0010_repo(repository) def test_0015_create_datatypes_0020_repository(self): """Create and populate the column_maker_0020 repository.""" @@ -169,16 +110,10 @@ def test_0015_create_datatypes_0020_repository(self): category=category_0020, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( repository, - filename="column_maker/column_maker.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "column_maker/column_maker.tar", commit_message="Uploaded column_maker tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0020_create_emboss_0020_repository(self): @@ -199,16 +134,10 @@ def test_0020_create_emboss_0020_repository(self): category=category_0020, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( repository, - filename="emboss/emboss.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "emboss/emboss.tar", commit_message="Uploaded emboss.tar", - strings_displayed=[], - strings_not_displayed=[], ) def test_0025_create_emboss_datatypes_0030_repository(self): @@ -229,16 +158,10 @@ def test_0025_create_emboss_datatypes_0030_repository(self): category=category_0030, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( column_maker_repository, - filename="column_maker/column_maker.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "column_maker/column_maker.tar", commit_message="Uploaded column_maker tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0030_create_emboss_5_repository(self): @@ -259,16 +182,10 @@ def test_0030_create_emboss_5_repository(self): category=category_0030, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( emboss_5_repository, - filename="emboss/emboss.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "emboss/emboss.tar", commit_message="Uploaded emboss.tar", - strings_displayed=[], - strings_not_displayed=[], ) def test_0035_create_emboss_6_repository(self): @@ -289,16 +206,10 @@ def test_0035_create_emboss_6_repository(self): category=category_0030, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( emboss_6_repository, - filename="emboss/emboss.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "emboss/emboss.tar", commit_message="Uploaded emboss.tar", - strings_displayed=[], - strings_not_displayed=[], ) def test_0040_create_emboss_0030_repository(self): @@ -319,16 +230,10 @@ def test_0040_create_emboss_0030_repository(self): category=category_0030, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( emboss_repository, - filename="emboss/emboss.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "emboss/emboss.tar", commit_message="Uploaded emboss.tar", - strings_displayed=[], - strings_not_displayed=[], ) def test_0045_create_repository_dependencies_for_0030(self): @@ -395,16 +300,10 @@ def test_0050_create_freebayes_repository(self): strings_displayed=[], ) if running_standalone: - self.upload_file( + self.commit_tar_to_repository( repository, - filename="freebayes/freebayes.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "freebayes/freebayes.tar", commit_message="Uploaded freebayes tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0055_create_filtering_repository(self): @@ -425,16 +324,10 @@ def test_0055_create_filtering_repository(self): category=category_0040, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( repository, - filename="filtering/filtering_1.1.0.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "filtering/filtering_1.1.0.tar", commit_message="Uploaded filtering 1.1.0 tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0060_create_dependency_structure(self): @@ -482,16 +375,10 @@ def test_0065_create_convert_repository(self): category=category, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( repository, - filename="convert_chars/convert_chars.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "convert_chars/convert_chars.tar", commit_message="Uploaded convert_chars tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0070_create_column_repository(self): @@ -509,16 +396,10 @@ def test_0070_create_column_repository(self): category=category, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( repository, - filename="column_maker/column_maker.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "column_maker/column_maker.tar", commit_message="Uploaded column_maker tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0075_create_emboss_datatypes_repository(self): @@ -539,16 +420,10 @@ def test_0080_create_emboss_repository(self): category=category, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( repository, - filename="emboss/emboss.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "emboss/emboss.tar", commit_message="Uploaded emboss tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0085_create_filtering_repository(self): @@ -566,16 +441,10 @@ def test_0085_create_filtering_repository(self): category=category, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( filtering_repository, - filename="filtering/filtering_1.1.0.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "filtering/filtering_1.1.0.tar", commit_message="Uploaded filtering 1.1.0 tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0090_create_freebayes_repository(self): @@ -593,16 +462,10 @@ def test_0090_create_freebayes_repository(self): category=category, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( repository, - filename="freebayes/freebayes.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "freebayes/freebayes.tar", commit_message="Uploaded freebayes tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0095_create_bismark_repository(self): @@ -620,17 +483,7 @@ def test_0095_create_bismark_repository(self): category=category, strings_displayed=[], ) - self.upload_file( - repository, - filename="bismark/bismark.tar", - filepath=None, - valid_tools_only=False, - uncompress_file=True, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded bismark tarball.", - strings_displayed=[], - strings_not_displayed=[], - ) + self.user_populator().setup_bismark_repo(repository, end=1) def test_0100_create_and_upload_dependency_definitions(self): """Create the dependency structure for test 0050.""" diff --git a/lib/tool_shed/test/functional/test_0310_hg_api_features.py b/lib/tool_shed/test/functional/test_0310_hg_api_features.py index c108a2532db5..589d077ef2b5 100644 --- a/lib/tool_shed/test/functional/test_0310_hg_api_features.py +++ b/lib/tool_shed/test/functional/test_0310_hg_api_features.py @@ -46,27 +46,10 @@ def test_0005_create_filtering_repository(self): category=category, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( repository, - filename="filtering/filtering_1.1.0.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=True, + "filtering/filtering_1.1.0.tar", commit_message="Uploaded filtering 1.1.0.", - strings_displayed=[], - strings_not_displayed=[], - ) - self.upload_file( - repository, - filename="filtering/filtering_test_data.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded filtering test data.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0010_clone(self): diff --git a/lib/tool_shed/test/functional/test_0420_citable_urls_for_repositories.py b/lib/tool_shed/test/functional/test_0420_citable_urls_for_repositories.py index 323ef273d692..31d06c129fe2 100644 --- a/lib/tool_shed/test/functional/test_0420_citable_urls_for_repositories.py +++ b/lib/tool_shed/test/functional/test_0420_citable_urls_for_repositories.py @@ -59,16 +59,10 @@ def test_0005_create_repository(self): category=category, strings_displayed=strings_displayed, ) - self.upload_file( + self.commit_tar_to_repository( repository, - filename="filtering/filtering_2.2.0.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "filtering/filtering_2.2.0.tar", commit_message="Uploaded filtering 2.2.0 tarball.", - strings_displayed=[], - strings_not_displayed=[], ) # We'll be checking for this hash later, after uploading another file to the repository, making get_repository_tip() not usable. first_changeset_hash = self.get_repository_tip(repository) @@ -81,17 +75,7 @@ def test_0010_upload_new_file_to_repository(self): The repository should now contain two changeset revisions, 0: and 1:. """ repository = self._get_repository_by_name_and_owner(repository_name, common.test_user_1_name) - self.upload_file( - repository, - filename="readme.txt", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded readme.txt.", - strings_displayed=[], - strings_not_displayed=[], - ) + self.add_file_to_repository(repository, "readme.txt") def test_0015_load_user_view_page(self): """Load the /view/ page amd check for strings. diff --git a/lib/tool_shed/test/functional/test_0430_browse_utilities.py b/lib/tool_shed/test/functional/test_0430_browse_utilities.py index 4a817c031199..104d2e0e28b3 100644 --- a/lib/tool_shed/test/functional/test_0430_browse_utilities.py +++ b/lib/tool_shed/test/functional/test_0430_browse_utilities.py @@ -54,16 +54,10 @@ def test_0010_create_emboss_repository(self): category=category, strings_displayed=strings_displayed, ) - self.upload_file( + self.commit_tar_to_repository( emboss_repository, - filename="emboss/emboss.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "emboss/emboss.tar", commit_message="Uploaded emboss.tar.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0020_create_tool_dependency_repository(self): @@ -85,16 +79,10 @@ def test_0020_create_tool_dependency_repository(self): category=category, strings_displayed=strings_displayed, ) - self.upload_file( + self.commit_tar_to_repository( repository, - filename="freebayes/freebayes.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "freebayes/freebayes.tar", commit_message="Uploaded freebayes.tar.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0030_browse_tools(self): diff --git a/lib/tool_shed/test/functional/test_0440_deleting_dependency_definitions.py b/lib/tool_shed/test/functional/test_0440_deleting_dependency_definitions.py index 21313d07287f..1da5abd03751 100644 --- a/lib/tool_shed/test/functional/test_0440_deleting_dependency_definitions.py +++ b/lib/tool_shed/test/functional/test_0440_deleting_dependency_definitions.py @@ -83,16 +83,10 @@ def test_0005_create_column_maker_repository(self): category=category, strings_displayed=strings_displayed, ) - self.upload_file( + self.commit_tar_to_repository( repository, - filename="column_maker/column_maker.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "column_maker/column_maker.tar", commit_message="Uploaded column maker tool tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0010_create_convert_chars_repository(self): @@ -111,16 +105,10 @@ def test_0010_create_convert_chars_repository(self): category=category, strings_displayed=strings_displayed, ) - self.upload_file( + self.commit_tar_to_repository( repository, - filename="convert_chars/convert_chars.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "convert_chars/convert_chars.tar", commit_message="Uploaded convert chars tool tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0015_create_dependency_on_convert_chars(self): @@ -213,17 +201,7 @@ def test_0030_create_bwa_package_repository(self): category=category, strings_displayed=strings_displayed, ) - self.upload_file( - repository, - filename="bwa/complex/tool_dependencies.xml", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded package tool dependency definition.", - strings_displayed=[], - strings_not_displayed=[], - ) + self.add_file_to_repository(repository, "bwa/complex/tool_dependencies.xml") def test_0035_create_bwa_base_repository(self): """Create and populate the bwa_base_0440 repository. @@ -242,16 +220,10 @@ def test_0035_create_bwa_base_repository(self): category=category, strings_displayed=strings_displayed, ) - self.upload_file( + self.commit_tar_to_repository( repository, - filename="bwa/complex/bwa_base.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "bwa/complex/bwa_base.tar", commit_message="Uploaded BWA nucleotide space mapping tool tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0040_create_dependency_on_bwa_package_repository(self): @@ -342,17 +314,7 @@ def test_0055_create_bwa_tool_dependency_repository(self): category=category, strings_displayed=strings_displayed, ) - self.upload_file( - repository, - filename="bwa/complex/tool_dependencies.xml", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded package tool dependency definition.", - strings_displayed=[], - strings_not_displayed=[], - ) + self.add_file_to_repository(repository, "bwa/complex/tool_dependencies.xml") def test_0060_delete_bwa_tool_dependency_definition(self): """Delete the tool_dependencies.xml file from bwa_tool_dependency_0440. @@ -400,17 +362,7 @@ def test_0065_reupload_bwa_tool_dependency_definition(self): # Record the current tip, so we can verify that it's still not a downloadable revision after tool_dependencies.xml # is re-uploaded and a new downloadable revision is created. old_changeset_revision = self.get_repository_tip(repository) - self.upload_file( - repository, - filename="bwa/complex/tool_dependencies.xml", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded package tool dependency definition.", - strings_displayed=[], - strings_not_displayed=[], - ) + self.add_file_to_repository(repository, "bwa/complex/tool_dependencies.xml") new_changeset_revision = self.get_repository_tip(repository) # Check that the old changeset revision is still downloadable. metadata_record = self._get_repository_metadata_by_changeset_revision(repository, old_changeset_revision) diff --git a/lib/tool_shed/test/functional/test_0460_upload_to_repository.py b/lib/tool_shed/test/functional/test_0460_upload_to_repository.py index 07b43d6f9d28..5c8cb5ea9907 100644 --- a/lib/tool_shed/test/functional/test_0460_upload_to_repository.py +++ b/lib/tool_shed/test/functional/test_0460_upload_to_repository.py @@ -87,16 +87,10 @@ def test_0010_create_bwa_package_repository(self): category=category, strings_displayed=[], ) - self.upload_file( + self.add_file_to_repository( repository, - filename="bwa/complex/tool_dependencies.xml", - filepath=None, - valid_tools_only=True, - uncompress_file=False, - remove_repo_files_not_in_tar=False, + "bwa/complex/tool_dependencies.xml", commit_message="Populate package_bwa_0_5_9_0460 with a tool dependency definition.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0015_create_tool_dependency_repositories(self): @@ -134,17 +128,7 @@ def test_0020_populate_complex_dependency_test_1_0460(self): """ repository = self._get_repository_by_name_and_owner("complex_dependency_test_1_0460", common.test_user_1_name) package_repository = self._get_repository_by_name_and_owner("package_bwa_0_5_9_0460", common.test_user_1_name) - self.upload_file( - repository, - filename="0460_files/tool_dependencies.xml", - filepath=None, - valid_tools_only=True, - uncompress_file=False, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded complex repository dependency definition.", - strings_displayed=[], - strings_not_displayed=[], - ) + self.add_file_to_repository(repository, "0460_files/tool_dependencies.xml") changeset_revision = self.get_repository_tip(package_repository) strings_displayed = ["package_bwa_0_5_9_0460", "bwa", "0.5.9", "package", changeset_revision] self.display_manage_repository_page(repository, strings_displayed=strings_displayed) @@ -160,16 +144,10 @@ def test_0025_populate_complex_dependency_test_2_0460(self): """ repository = self._get_repository_by_name_and_owner("complex_dependency_test_2_0460", common.test_user_1_name) package_repository = self._get_repository_by_name_and_owner("package_bwa_0_5_9_0460", common.test_user_1_name) - self.upload_file( + self.commit_tar_to_repository( repository, - filename="0460_files/tool_dependencies_in_root.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=False, - remove_repo_files_not_in_tar=True, + "0460_files/tool_dependencies_in_root.tar", commit_message="Uploaded complex repository dependency definition.", - strings_displayed=[], - strings_not_displayed=[], ) changeset_revision = self.get_repository_tip(package_repository) strings_displayed = ["package_bwa_0_5_9_0460", "bwa", "0.5.9", "package", changeset_revision] @@ -186,16 +164,10 @@ def test_0030_populate_complex_dependency_test_3_0460(self): """ repository = self._get_repository_by_name_and_owner("complex_dependency_test_3_0460", common.test_user_1_name) package_repository = self._get_repository_by_name_and_owner("package_bwa_0_5_9_0460", common.test_user_1_name) - self.upload_file( + self.commit_tar_to_repository( repository, - filename="0460_files/tool_dependencies_in_subfolder.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=False, - remove_repo_files_not_in_tar=True, + "0460_files/tool_dependencies_in_subfolder.tar", commit_message="Uploaded complex repository dependency definition.", - strings_displayed=[], - strings_not_displayed=[], ) changeset_revision = self.get_repository_tip(package_repository) strings_displayed = ["package_bwa_0_5_9_0460", "bwa", "0.5.9", "package", changeset_revision] @@ -218,17 +190,7 @@ def test_0035_create_repositories_for_url_upload(self): category=category, strings_displayed=[], ) - self.upload_file( - repository, - filename="0460_files/tool_dependencies.xml", - filepath=None, - valid_tools_only=True, - uncompress_file=False, - remove_repo_files_not_in_tar=False, - commit_message="Populate hg_tool_dependency_0460 with a tool dependency definition.", - strings_displayed=[], - strings_not_displayed=[], - ) + self.add_file_to_repository(repository, "0460_files/tool_dependencies.xml") repository = self.get_or_create_repository( name="hg_subfolder_tool_dependency_0460", description=bwa_repository_description, @@ -237,68 +199,10 @@ def test_0035_create_repositories_for_url_upload(self): category=category, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( repository, - filename="0460_files/tool_dependencies_in_subfolder.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=False, - remove_repo_files_not_in_tar=False, + "0460_files/tool_dependencies_in_subfolder.tar", commit_message="Populate hg_subfolder_tool_dependency_0460 with a tool dependency definition.", - strings_displayed=[], - strings_not_displayed=[], - ) - - def test_0040_url_upload_to_complex_test(self): - """Populate complex_dependency_test_4_0460. - - This is step 8 - Upload to complex_dependency_test_4_0460 using the url hg:///repos/user1/hg_tool_dependency_0460. - """ - url = f"hg://{self.host}:{self.port}/repos/user1/hg_tool_dependency_0460" - repository = self._get_repository_by_name_and_owner("complex_dependency_test_4_0460", common.test_user_1_name) - package_repository = self._get_repository_by_name_and_owner("package_bwa_0_5_9_0460", common.test_user_1_name) - self.upload_url( - repository, - url=url, - filepath=None, - valid_tools_only=True, - uncompress_file=False, - remove_repo_files_not_in_tar=True, - commit_message="Uploaded complex repository dependency definition.", - strings_displayed=[], - strings_not_displayed=[], - ) - changeset_revision = self.get_repository_tip(package_repository) - strings_displayed = ["package_bwa_0_5_9_0460", "bwa", "0.5.9", "package", changeset_revision] - self.display_manage_repository_page(repository, strings_displayed=strings_displayed) - self.display_repository_file_contents( - repository, filename="tool_dependencies.xml", strings_displayed=[changeset_revision] - ) - - def test_0045_url_upload_to_complex_test(self): - """Populate complex_dependency_test_4_0460. - - This is step 9 - Upload to complex_dependency_test_5_0460 using the url hg:///repos/user1/hg_subfolder_tool_dependency_0460. - """ - url = f"hg://{self.host}:{self.port}/repos/user1/hg_subfolder_tool_dependency_0460" - repository = self._get_repository_by_name_and_owner("complex_dependency_test_5_0460", common.test_user_1_name) - package_repository = self._get_repository_by_name_and_owner("package_bwa_0_5_9_0460", common.test_user_1_name) - self.upload_url( - repository, - url=url, - filepath=None, - valid_tools_only=True, - uncompress_file=False, - remove_repo_files_not_in_tar=True, - commit_message="Uploaded complex repository dependency definition.", - strings_displayed=[], - strings_not_displayed=[], - ) - changeset_revision = self.get_repository_tip(package_repository) - strings_displayed = ["package_bwa_0_5_9_0460", "bwa", "0.5.9", "package", changeset_revision] - self.display_manage_repository_page(repository, strings_displayed=strings_displayed) - self.display_repository_file_contents( - repository, filename="tool_dependencies.xml", filepath="subfolder", strings_displayed=[changeset_revision] ) def test_0050_create_repositories_for_simple_dependencies(self): @@ -338,17 +242,7 @@ def test_0055_populate_repository_dependency_test_1_0460(self): "repository_dependency_test_1_0460", common.test_user_1_name ) package_repository = self._get_repository_by_name_and_owner(bwa_repository_name, common.test_user_1_name) - self.upload_file( - repository, - filename="0460_files/repository_dependencies.xml", - filepath=None, - valid_tools_only=True, - uncompress_file=False, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded complex repository dependency definition.", - strings_displayed=[], - strings_not_displayed=[], - ) + self.add_file_to_repository(repository, "0460_files/repository_dependencies.xml") changeset_revision = self.get_repository_tip(package_repository) strings_displayed = [bwa_repository_name, "user1", changeset_revision] self.display_manage_repository_page(repository, strings_displayed=strings_displayed) @@ -365,16 +259,10 @@ def test_0060_populate_repository_dependency_test_2_0460(self): "repository_dependency_test_2_0460", common.test_user_1_name ) package_repository = self._get_repository_by_name_and_owner(bwa_repository_name, common.test_user_1_name) - self.upload_file( + self.commit_tar_to_repository( repository, - filename="0460_files/in_root/repository_dependencies_in_root.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=False, - remove_repo_files_not_in_tar=True, + "0460_files/in_root/repository_dependencies_in_root.tar", commit_message="Uploaded complex repository dependency definition.", - strings_displayed=[], - strings_not_displayed=[], ) changeset_revision = self.get_repository_tip(package_repository) strings_displayed = [bwa_repository_name, "user1", changeset_revision] @@ -393,16 +281,10 @@ def test_0065_populate_repository_dependency_test_3_0460(self): "repository_dependency_test_3_0460", common.test_user_1_name ) package_repository = self._get_repository_by_name_and_owner(bwa_repository_name, common.test_user_1_name) - self.upload_file( + self.commit_tar_to_repository( repository, - filename="0460_files/in_subfolder/repository_dependencies_in_subfolder.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=False, - remove_repo_files_not_in_tar=True, + "0460_files/in_subfolder/repository_dependencies_in_subfolder.tar", commit_message="Uploaded complex repository dependency definition.", - strings_displayed=[], - strings_not_displayed=[], ) changeset_revision = self.get_repository_tip(package_repository) strings_displayed = [bwa_repository_name, "user1", changeset_revision] @@ -429,17 +311,7 @@ def test_0070_create_repositories_for_url_upload(self): category=category, strings_displayed=[], ) - self.upload_file( - repository, - filename="0460_files/repository_dependencies.xml", - filepath=None, - valid_tools_only=True, - uncompress_file=False, - remove_repo_files_not_in_tar=False, - commit_message="Populate hg_repository_dependency_0460 with a tool dependency definition.", - strings_displayed=[], - strings_not_displayed=[], - ) + self.add_file_to_repository(repository, "0460_files/repository_dependencies.xml") repository = self.get_or_create_repository( name="hg_subfolder_repository_dependency_0460", description=bwa_repository_description, @@ -448,75 +320,8 @@ def test_0070_create_repositories_for_url_upload(self): category=category, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( repository, - filename="0460_files/in_subfolder/repository_dependencies_in_subfolder.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=False, - remove_repo_files_not_in_tar=False, + "0460_files/in_subfolder/repository_dependencies_in_subfolder.tar", commit_message="Populate hg_subfolder_repository_dependency_0460 with a tool dependency definition.", - strings_displayed=[], - strings_not_displayed=[], - ) - - def test_0075_url_upload_to_complex_test(self): - """Populate repository_dependency_test_4_0460. - - This is step 15 - Upload to repository_dependency_test_4_0460 using the url - hg:///repos/user1/hg_repository_dependency_0460. - """ - url = f"hg://{self.host}:{self.port}/repos/user1/hg_repository_dependency_0460" - repository = self._get_repository_by_name_and_owner( - "repository_dependency_test_4_0460", common.test_user_1_name - ) - package_repository = self._get_repository_by_name_and_owner(bwa_repository_name, common.test_user_1_name) - self.upload_url( - repository, - url=url, - filepath=None, - valid_tools_only=True, - uncompress_file=False, - remove_repo_files_not_in_tar=True, - commit_message="Uploaded repository dependency definition.", - strings_displayed=[], - strings_not_displayed=[], - ) - changeset_revision = self.get_repository_tip(package_repository) - strings_displayed = [bwa_repository_name, "user1", changeset_revision] - self.display_manage_repository_page(repository, strings_displayed=strings_displayed) - self.display_repository_file_contents( - repository, filename="repository_dependencies.xml", strings_displayed=[changeset_revision] - ) - - def test_0080_url_upload_to_complex_test(self): - """Populate repository_dependency_test_4_0460. - - This is step 16 - Upload to repository_dependency_test_5_0460 using the url - hg:///repos/user1/hg_subfolder_repository_dependency_0460. - """ - url = f"hg://{self.host}:{self.port}/repos/user1/hg_subfolder_repository_dependency_0460" - repository = self._get_repository_by_name_and_owner( - "repository_dependency_test_5_0460", common.test_user_1_name - ) - package_repository = self._get_repository_by_name_and_owner(bwa_repository_name, common.test_user_1_name) - self.upload_url( - repository, - url=url, - filepath=None, - valid_tools_only=True, - uncompress_file=False, - remove_repo_files_not_in_tar=True, - commit_message="Uploaded repository dependency definition.", - strings_displayed=[], - strings_not_displayed=[], - ) - changeset_revision = self.get_repository_tip(package_repository) - strings_displayed = [bwa_repository_name, "user1", changeset_revision] - self.display_manage_repository_page(repository, strings_displayed=strings_displayed) - self.display_repository_file_contents( - repository, - filename="repository_dependencies.xml", - filepath="subfolder", - strings_displayed=[changeset_revision], ) diff --git a/lib/tool_shed/test/functional/test_0470_tool_dependency_repository_type.py b/lib/tool_shed/test/functional/test_0470_tool_dependency_repository_type.py index 8086ac37b12b..79df2baff835 100644 --- a/lib/tool_shed/test/functional/test_0470_tool_dependency_repository_type.py +++ b/lib/tool_shed/test/functional/test_0470_tool_dependency_repository_type.py @@ -90,17 +90,7 @@ def test_0005_create_libx11_repository(self): strings_displayed=[], ) # Upload the tool dependency definition to the package_x11_client_1_5_proto_7_0_0470 repository. - self.upload_file( - repository, - filename="emboss/libx11_proto/first_tool_dependency/tool_dependencies.xml", - filepath=None, - valid_tools_only=True, - uncompress_file=False, - remove_repo_files_not_in_tar=False, - commit_message="Populate package_x11_client_1_5_proto_7_0_0470 with tool dependency definitions.", - strings_displayed=[], - strings_not_displayed=[], - ) + self.user_populator().setup_test_data_repo("libx11_proto", repository, end=1) def test_0010_create_emboss_5_0_0_repository(self): """Create and populate package_emboss_5_0_0_0470. @@ -121,17 +111,7 @@ def test_0010_create_emboss_5_0_0_repository(self): strings_displayed=[], ) # Upload the edited tool dependency definition to the package_emboss_5_0_0 repository. - self.upload_file( - repository, - filename="emboss/emboss_5_0_0/first_tool_dependency/tool_dependencies.xml", - filepath=None, - valid_tools_only=True, - uncompress_file=False, - remove_repo_files_not_in_tar=False, - commit_message="Populate package_emboss_5_0_0_0470 with tool dependency definitions.", - strings_displayed=[], - strings_not_displayed=[], - ) + self.user_populator().setup_test_data_repo("package_emboss_5_0_0_0470", repository, end=1) def test_0015_create_emboss_5_repository(self): """Create and populate emboss_5_0470. @@ -152,17 +132,7 @@ def test_0015_create_emboss_5_repository(self): strings_displayed=[], ) # Populate emboss_5 with tool and dependency definitions. - self.upload_file( - repository, - filename="emboss/0470_files/emboss_complex_dependency.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, - commit_message="Populate emboss_5 with tool and dependency definitions.", - strings_displayed=[], - strings_not_displayed=[], - ) + self.user_populator().setup_test_data_repo("emboss_5_0470", repository, end=1) def test_0020_upload_updated_tool_dependency_to_package_x11(self): """Upload a new tool_dependencies.xml to package_x11_client_1_5_proto_7_0_0470. @@ -174,17 +144,7 @@ def test_0020_upload_updated_tool_dependency_to_package_x11(self): package_libx11_repository_name, common.test_user_1_name ) # Upload the tool dependency definition to the package_x11_client_1_5_proto_7_0_0470 repository. - self.upload_file( - package_x11_repository, - filename="emboss/libx11_proto/second_tool_dependency/tool_dependencies.xml", - filepath=None, - valid_tools_only=True, - uncompress_file=False, - remove_repo_files_not_in_tar=False, - commit_message="Populate package_x11_client_1_5_proto_7_0_0470 with tool dependency definitions.", - strings_displayed=[], - strings_not_displayed=[], - ) + self.user_populator().setup_test_data_repo("libx11_proto", package_x11_repository, start=1, end=2) count = self._get_metadata_revision_count(package_x11_repository) assert count == 1, ( "package_x11_client_1_5_proto_7_0_0470 has incorrect number of metadata revisions, expected 1 but found %d" @@ -202,16 +162,8 @@ def test_0025_upload_updated_tool_dependency_to_package_emboss(self): package_emboss_repository_name, common.test_user_1_name ) # Populate package_emboss_5_0_0_0470 with updated tool dependency definition. - self.upload_file( - package_emboss_repository, - filename="emboss/emboss_5_0_0/second_tool_dependency/tool_dependencies.xml", - filepath=None, - valid_tools_only=True, - uncompress_file=False, - remove_repo_files_not_in_tar=False, - commit_message="Populate package_emboss_5_0_0_0470 with tool dependency definitions.", - strings_displayed=[], - strings_not_displayed=[], + self.user_populator().setup_test_data_repo( + "package_emboss_5_0_0_0470", package_emboss_repository, start=1, end=2 ) count = self._get_metadata_revision_count(package_emboss_repository) assert count == 2, ( @@ -227,17 +179,7 @@ def test_0030_upload_updated_tool_dependency_to_emboss_5_repository(self): """ emboss_repository = self._get_repository_by_name_and_owner(emboss_repository_name, common.test_user_1_name) # Populate package_emboss_5_0_0_0470 with updated tool dependency definition. - self.upload_file( - emboss_repository, - filename="emboss/0470_files/tool_dependencies.xml", - filepath=None, - valid_tools_only=True, - uncompress_file=False, - remove_repo_files_not_in_tar=False, - commit_message="Upload updated complex repository dependency definition to emboss_5_0470.", - strings_displayed=[], - strings_not_displayed=[], - ) + self.user_populator().setup_test_data_repo("emboss_5_0470", emboss_repository, start=1, end=2) count = self._get_metadata_revision_count(emboss_repository) assert count == 2, "package_emboss_5_0_0_0470 has incorrect number of metadata revisions" diff --git a/lib/tool_shed/test/functional/test_0480_tool_dependency_xml_verification.py b/lib/tool_shed/test/functional/test_0480_tool_dependency_xml_verification.py index 45510dd92d5d..48f121ead36b 100644 --- a/lib/tool_shed/test/functional/test_0480_tool_dependency_xml_verification.py +++ b/lib/tool_shed/test/functional/test_0480_tool_dependency_xml_verification.py @@ -48,17 +48,7 @@ def test_0005_create_tool_dependency_repository(self): category=category, strings_displayed=[], ) - self.upload_file( - repository, - filename="0480_files/tool_dependencies.xml", - filepath=None, - valid_tools_only=False, - uncompress_file=False, - remove_repo_files_not_in_tar=False, - commit_message="Populate package_invalid_tool_dependency_xml_1_0_0 with an improperly defined tool dependency.", - strings_displayed=["package cannot be installed because", "missing either an <actions> tag set"], - strings_not_displayed=[], - ) + self.user_populator().setup_test_data_repo("0480", repository, assert_ok=False) def test_0010_populate_tool_dependency_repository(self): """Verify package_invalid_tool_dependency_xml_1_0_0. diff --git a/lib/tool_shed/test/functional/test_0530_repository_admin_feature.py b/lib/tool_shed/test/functional/test_0530_repository_admin_feature.py index 3465af3c582c..19294287a283 100644 --- a/lib/tool_shed/test/functional/test_0530_repository_admin_feature.py +++ b/lib/tool_shed/test/functional/test_0530_repository_admin_feature.py @@ -57,16 +57,10 @@ def test_0005_create_filtering_repository(self): category=category, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( repository, - filename="filtering/filtering_1.1.0.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "filtering/filtering_1.1.0.tar", commit_message="Uploaded filtering 1.1.0 tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0010_verify_repository_admin_role_exists(self): diff --git a/lib/tool_shed/test/functional/test_0550_metadata_updated_dependencies.py b/lib/tool_shed/test/functional/test_0550_metadata_updated_dependencies.py index 2787292505f1..f622b05ac2ab 100644 --- a/lib/tool_shed/test/functional/test_0550_metadata_updated_dependencies.py +++ b/lib/tool_shed/test/functional/test_0550_metadata_updated_dependencies.py @@ -67,16 +67,9 @@ def test_0005_freebayes_repository(self): strings_displayed=[], ) assert freebayes is not None, f"Error creating freebayes {repositories['freebayes']['name']}" - self.upload_file( + self.commit_tar_to_repository( freebayes, - filename="0550_files/package_freebayes_1_0550.tgz", - filepath=None, - valid_tools_only=True, - uncompress_file=False, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded.", - strings_displayed=["has been successfully uploaded to the repository", "contains a single file"], - strings_not_displayed=None, + "0550_files/package_freebayes_1_0550.tgz", ) # Visit the manage repository page for package_freebayes_0_5_9_0100. self.display_manage_repository_page( @@ -95,16 +88,10 @@ def test_0010_create_samtools_repository(self): strings_displayed=[], ) assert samtools is not None, f"Error creating samtools {repositories['samtools']['name']}" - self.upload_file( + self.commit_tar_to_repository( samtools, - filename="0550_files/package_samtools_1_0550.tgz", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "0550_files/package_samtools_1_0550.tgz", commit_message="Uploaded samtools 1.0.", - strings_displayed=["has been successfully uncompressed and uploaded to the repository"], - strings_not_displayed=[], ) def test_0015_create_filtering_repository(self): @@ -119,16 +106,10 @@ def test_0015_create_filtering_repository(self): strings_displayed=[], ) assert repository is not None, f"Error creating repository {repositories['filtering']['name']}" - self.upload_file( + self.commit_tar_to_repository( repository, - filename="0550_files/filtering_1.0.tgz", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "0550_files/filtering_1.0.tgz", commit_message="Uploaded filtering 1.0.", - strings_displayed=["has been successfully uncompressed and uploaded to the repository"], - strings_not_displayed=[], ) def test_0020_check_repository_dependency(self): @@ -146,27 +127,15 @@ def test_0025_update_dependent_repositories(self): freebayes = self._get_repository_by_name_and_owner(repositories["freebayes"]["name"], common.test_user_1_name) samtools = self._get_repository_by_name_and_owner(repositories["samtools"]["name"], common.test_user_1_name) filtering = self._get_repository_by_name_and_owner(repositories["filtering"]["name"], common.test_user_1_name) - self.upload_file( + self.commit_tar_to_repository( freebayes, - filename="0550_files/package_freebayes_2_0550.tgz", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "0550_files/package_freebayes_2_0550.tgz", commit_message="Uploaded freebayes 2.0.", - strings_displayed=["has been successfully uncompressed and uploaded to the repository"], - strings_not_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( samtools, - filename="0550_files/package_samtools_2_0550.tgz", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "0550_files/package_samtools_2_0550.tgz", commit_message="Uploaded samtools 2.0.", - strings_displayed=["has been successfully uncompressed and uploaded to the repository"], - strings_not_displayed=[], ) strings_displayed = [ repositories["freebayes"]["name"], diff --git a/lib/tool_shed/test/functional/test_1000_install_basic_repository.py b/lib/tool_shed/test/functional/test_1000_install_basic_repository.py index 0b6ec2c89dcc..81760f008c34 100644 --- a/lib/tool_shed/test/functional/test_1000_install_basic_repository.py +++ b/lib/tool_shed/test/functional/test_1000_install_basic_repository.py @@ -37,50 +37,14 @@ def test_0005_ensure_repositories_and_categories_exist(self): category=category, ) if self.repository_is_new(repository): - self.upload_file( + self.commit_tar_to_repository( repository, - filename="filtering/filtering_1.1.0.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "filtering/filtering_1.1.0.tar", commit_message="Uploaded filtering 1.1.0 tarball.", - strings_displayed=[], - strings_not_displayed=[], - ) - self.upload_file( - repository, - filename="filtering/filtering_0000.txt", - filepath=None, - valid_tools_only=True, - uncompress_file=False, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded readme for 1.1.0", - strings_displayed=[], - strings_not_displayed=[], - ) - self.upload_file( - repository, - filename="filtering/filtering_2.2.0.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded filtering 2.2.0 tarball.", - strings_displayed=[], - strings_not_displayed=[], - ) - self.upload_file( - repository, - filename="readme.txt", - filepath=None, - valid_tools_only=True, - uncompress_file=False, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded readme for 2.2.0", - strings_displayed=[], - strings_not_displayed=[], ) + self.add_file_to_repository(repository, "filtering/filtering_0000.txt") + self.add_tar_to_repository(repository, "filtering/filtering_2.2.0.tar") + self.add_file_to_repository(repository, "readme.txt") def test_0010_browse_tool_sheds(self): """Browse the available tool sheds in this Galaxy instance.""" diff --git a/lib/tool_shed/test/functional/test_1010_install_repository_with_tool_dependencies.py b/lib/tool_shed/test/functional/test_1010_install_repository_with_tool_dependencies.py index 7348eda4f8a4..2df6669aee2f 100644 --- a/lib/tool_shed/test/functional/test_1010_install_repository_with_tool_dependencies.py +++ b/lib/tool_shed/test/functional/test_1010_install_repository_with_tool_dependencies.py @@ -1,5 +1,4 @@ import logging -import os from ..base.twilltestcase import ( common, @@ -37,78 +36,7 @@ def test_0005_ensure_repositories_and_categories_exist(self): category=category, ) if self.repository_is_new(repository): - self.upload_file( - repository, - filename="freebayes/freebayes.xml", - filepath=None, - valid_tools_only=False, - uncompress_file=False, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded the tool xml.", - strings_displayed=[ - "Metadata may have been defined", - "This file requires an entry", - "tool_data_table_conf", - ], - strings_not_displayed=[], - ) - self.upload_file( - repository, - filename="freebayes/tool_data_table_conf.xml.sample", - filepath=None, - valid_tools_only=False, - uncompress_file=False, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded the tool data table sample file.", - strings_displayed=[], - strings_not_displayed=[], - ) - self.upload_file( - repository, - filename="freebayes/sam_fa_indices.loc.sample", - filepath=None, - valid_tools_only=True, - uncompress_file=False, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded tool data table .loc file.", - strings_displayed=[], - strings_not_displayed=[], - ) - self.upload_file( - repository, - filename=os.path.join("freebayes", "malformed_tool_dependencies", "tool_dependencies.xml"), - filepath=None, - valid_tools_only=False, - uncompress_file=False, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded malformed tool dependency XML.", - strings_displayed=["Exception attempting to parse", "invalid element name"], - strings_not_displayed=[], - ) - self.upload_file( - repository, - filename=os.path.join("freebayes", "invalid_tool_dependencies", "tool_dependencies.xml"), - filepath=None, - valid_tools_only=False, - uncompress_file=False, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded invalid tool dependency XML.", - strings_displayed=[ - "The settings for name, version and type from a contained tool configuration" - ], - strings_not_displayed=[], - ) - self.upload_file( - repository, - filename=os.path.join("freebayes", "tool_dependencies.xml"), - filepath=None, - valid_tools_only=True, - uncompress_file=False, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded valid tool dependency XML.", - strings_displayed=[], - strings_not_displayed=[], - ) + self.setup_freebayes_0010_repo(repository) def test_0010_browse_tool_shed(self): """Browse the available tool sheds in this Galaxy instance and preview the freebayes tool.""" diff --git a/lib/tool_shed/test/functional/test_1020_install_repository_with_repository_dependencies.py b/lib/tool_shed/test/functional/test_1020_install_repository_with_repository_dependencies.py index 29b938967af8..fbcfbb28d3be 100644 --- a/lib/tool_shed/test/functional/test_1020_install_repository_with_repository_dependencies.py +++ b/lib/tool_shed/test/functional/test_1020_install_repository_with_repository_dependencies.py @@ -37,16 +37,10 @@ def test_0005_ensure_repositories_and_categories_exist(self): strings_displayed=[], ) if self.repository_is_new(column_maker_repository): - self.upload_file( + self.commit_tar_to_repository( column_maker_repository, - filename="column_maker/column_maker.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "column_maker/column_maker.tar", commit_message="Uploaded column_maker tarball.", - strings_displayed=[], - strings_not_displayed=[], ) emboss_repository = self.get_or_create_repository( name=emboss_repository_name, @@ -56,16 +50,10 @@ def test_0005_ensure_repositories_and_categories_exist(self): category=category, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( emboss_repository, - filename="emboss/emboss.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "emboss/emboss.tar", commit_message="Uploaded emboss.tar", - strings_displayed=[], - strings_not_displayed=[], ) repository_dependencies_path = self.generate_temp_path("test_1020", additional_paths=["emboss", "5"]) repository_tuple = ( diff --git a/lib/tool_shed/test/functional/test_1030_install_repository_with_dependency_revisions.py b/lib/tool_shed/test/functional/test_1030_install_repository_with_dependency_revisions.py index 9bd1eb5f7826..17cebc758712 100644 --- a/lib/tool_shed/test/functional/test_1030_install_repository_with_dependency_revisions.py +++ b/lib/tool_shed/test/functional/test_1030_install_repository_with_dependency_revisions.py @@ -43,16 +43,10 @@ def test_0005_ensure_repositories_and_categories_exist(self): ) if self.repository_is_new(column_maker_repository): running_standalone = True - self.upload_file( + self.commit_tar_to_repository( column_maker_repository, - filename="column_maker/column_maker.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "column_maker/column_maker.tar", commit_message="Uploaded column_maker tarball.", - strings_displayed=[], - strings_not_displayed=[], ) emboss_5_repository = self.get_or_create_repository( name=emboss_5_repository_name, @@ -62,16 +56,10 @@ def test_0005_ensure_repositories_and_categories_exist(self): category=category, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( emboss_5_repository, - filename="emboss/emboss.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=False, - remove_repo_files_not_in_tar=False, + "emboss/emboss.tar", commit_message="Uploaded tool tarball.", - strings_displayed=[], - strings_not_displayed=[], ) repository_dependencies_path = self.generate_temp_path("test_1030", additional_paths=["emboss", "5"]) column_maker_tuple = ( @@ -93,16 +81,10 @@ def test_0005_ensure_repositories_and_categories_exist(self): category=category, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( emboss_6_repository, - filename="emboss/emboss.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=False, - remove_repo_files_not_in_tar=False, + "emboss/emboss.tar", commit_message="Uploaded tool tarball.", - strings_displayed=[], - strings_not_displayed=[], ) repository_dependencies_path = self.generate_temp_path("test_1030", additional_paths=["emboss", "6"]) column_maker_tuple = ( @@ -124,16 +106,10 @@ def test_0005_ensure_repositories_and_categories_exist(self): category=category, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( emboss_repository, - filename="emboss/emboss.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=False, - remove_repo_files_not_in_tar=False, + "emboss/emboss.tar", commit_message="Uploaded tool tarball.", - strings_displayed=[], - strings_not_displayed=[], ) repository_dependencies_path = self.generate_temp_path("test_1030", additional_paths=["emboss", "5"]) dependency_tuple = ( diff --git a/lib/tool_shed/test/functional/test_1040_install_repository_basic_circular_dependencies.py b/lib/tool_shed/test/functional/test_1040_install_repository_basic_circular_dependencies.py index c71ad0c31ca0..60748c5b2763 100644 --- a/lib/tool_shed/test/functional/test_1040_install_repository_basic_circular_dependencies.py +++ b/lib/tool_shed/test/functional/test_1040_install_repository_basic_circular_dependencies.py @@ -43,16 +43,10 @@ def test_0005_create_freebayes_repository(self): ) if self.repository_is_new(repository): running_standalone = True - self.upload_file( + self.commit_tar_to_repository( repository, - filename="freebayes/freebayes.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "freebayes/freebayes.tar", commit_message="Uploaded the tool tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0015_create_filtering_repository(self): @@ -72,16 +66,10 @@ def test_0015_create_filtering_repository(self): ) if self.repository_is_new(repository): running_standalone = True - self.upload_file( + self.commit_tar_to_repository( repository, - filename="filtering/filtering_1.1.0.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "filtering/filtering_1.1.0.tar", commit_message="Uploaded the tool tarball for filtering 1.1.0.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0020_create_repository_dependencies(self): diff --git a/lib/tool_shed/test/functional/test_1050_circular_dependencies_4_levels.py b/lib/tool_shed/test/functional/test_1050_circular_dependencies_4_levels.py index 713e7f7e84be..48e7039032a3 100644 --- a/lib/tool_shed/test/functional/test_1050_circular_dependencies_4_levels.py +++ b/lib/tool_shed/test/functional/test_1050_circular_dependencies_4_levels.py @@ -58,16 +58,10 @@ def test_0005_create_convert_repository(self): ) if self.repository_is_new(repository): running_standalone = True - self.upload_file( + self.commit_tar_to_repository( repository, - filename="convert_chars/convert_chars.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "convert_chars/convert_chars.tar", commit_message="Uploaded convert_chars tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0010_create_column_repository(self): @@ -82,16 +76,10 @@ def test_0010_create_column_repository(self): strings_displayed=[], ) if self.repository_is_new(repository): - self.upload_file( + self.commit_tar_to_repository( repository, - filename="column_maker/column_maker.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "column_maker/column_maker.tar", commit_message="Uploaded column_maker tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0015_create_emboss_datatypes_repository(self): @@ -110,16 +98,10 @@ def test_0020_create_emboss_repository(self): strings_displayed=[], ) if self.repository_is_new(repository): - self.upload_file( + self.commit_tar_to_repository( repository, - filename="emboss/emboss.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "emboss/emboss.tar", commit_message="Uploaded emboss tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0025_create_filtering_repository(self): @@ -134,16 +116,10 @@ def test_0025_create_filtering_repository(self): strings_displayed=[], ) if self.repository_is_new(repository): - self.upload_file( + self.commit_tar_to_repository( repository, - filename="filtering/filtering_1.1.0.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "filtering/filtering_1.1.0.tar", commit_message="Uploaded filtering 1.1.0 tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0030_create_freebayes_repository(self): @@ -158,16 +134,10 @@ def test_0030_create_freebayes_repository(self): strings_displayed=[], ) if self.repository_is_new(repository): - self.upload_file( + self.commit_tar_to_repository( repository, - filename="freebayes/freebayes.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "freebayes/freebayes.tar", commit_message="Uploaded freebayes tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0035_create_bismark_repository(self): @@ -182,17 +152,7 @@ def test_0035_create_bismark_repository(self): strings_displayed=[], ) if self.repository_is_new(repository): - self.upload_file( - repository, - filename="bismark/bismark.tar", - filepath=None, - valid_tools_only=False, - uncompress_file=True, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded bismark tarball.", - strings_displayed=[], - strings_not_displayed=[], - ) + self.user_populator().setup_bismark_repo(repository, end=1) def test_0040_create_and_upload_dependency_definitions(self): """Set up the dependency structure.""" diff --git a/lib/tool_shed/test/functional/test_1070_invalid_tool.py b/lib/tool_shed/test/functional/test_1070_invalid_tool.py index 8b720697215b..0f8e08e822e1 100644 --- a/lib/tool_shed/test/functional/test_1070_invalid_tool.py +++ b/lib/tool_shed/test/functional/test_1070_invalid_tool.py @@ -34,28 +34,7 @@ def test_0005_ensure_existence_of_repository_and_category(self): strings_displayed=[], ) if self.repository_is_new(repository): - self.upload_file( - repository, - filename="bismark/bismark.tar", - filepath=None, - valid_tools_only=False, - uncompress_file=True, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded bismark tarball.", - strings_displayed=[], - strings_not_displayed=[], - ) - self.upload_file( - repository, - filename="bismark/bismark_methylation_extractor.xml", - filepath=None, - valid_tools_only=False, - uncompress_file=False, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded an updated tool xml.", - strings_displayed=[], - strings_not_displayed=[], - ) + self.user_populator().setup_bismark_repo(repository) def test_0010_browse_tool_shed(self): """Browse the available tool sheds in this Galaxy instance and preview the bismark repository.""" diff --git a/lib/tool_shed/test/functional/test_1080_advanced_circular_dependency_installation.py b/lib/tool_shed/test/functional/test_1080_advanced_circular_dependency_installation.py index f9352bcb05f3..b9f82f4c098c 100644 --- a/lib/tool_shed/test/functional/test_1080_advanced_circular_dependency_installation.py +++ b/lib/tool_shed/test/functional/test_1080_advanced_circular_dependency_installation.py @@ -45,16 +45,10 @@ def test_0005_create_and_populate_column_repository(self): strings_displayed=[], ) if self.repository_is_new(repository): - self.upload_file( + self.commit_tar_to_repository( repository, - filename="column_maker/column_maker.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "column_maker/column_maker.tar", commit_message="Uploaded column_maker tarball.", - strings_displayed=[], - strings_not_displayed=[], ) running_standalone = True @@ -73,16 +67,10 @@ def test_0010_create_and_populate_convert_repository(self): strings_displayed=[], ) if self.repository_is_new(repository): - self.upload_file( + self.commit_tar_to_repository( repository, - filename="convert_chars/convert_chars.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "convert_chars/convert_chars.tar", commit_message="Uploaded convert_chars tarball.", - strings_displayed=[], - strings_not_displayed=[], ) running_standalone = True diff --git a/lib/tool_shed/test/functional/test_1090_repository_dependency_handling.py b/lib/tool_shed/test/functional/test_1090_repository_dependency_handling.py index 0a249455275c..37192cf55b45 100644 --- a/lib/tool_shed/test/functional/test_1090_repository_dependency_handling.py +++ b/lib/tool_shed/test/functional/test_1090_repository_dependency_handling.py @@ -42,16 +42,10 @@ def test_0005_create_and_populate_column_repository(self): strings_displayed=[], ) if self.repository_is_new(repository): - self.upload_file( + self.commit_tar_to_repository( repository, - filename="column_maker/column_maker.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "column_maker/column_maker.tar", commit_message="Uploaded column_maker tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0010_create_and_populate_convert_repository(self): @@ -67,16 +61,10 @@ def test_0010_create_and_populate_convert_repository(self): strings_displayed=[], ) if self.repository_is_new(repository): - self.upload_file( + self.commit_tar_to_repository( repository, - filename="convert_chars/convert_chars.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "convert_chars/convert_chars.tar", commit_message="Uploaded convert_chars tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0015_create_and_upload_dependency_files(self): diff --git a/lib/tool_shed/test/functional/test_1100_install_updated_repository_dependencies.py b/lib/tool_shed/test/functional/test_1100_install_updated_repository_dependencies.py index 3cd7e9b6b89d..b824593a34d4 100644 --- a/lib/tool_shed/test/functional/test_1100_install_updated_repository_dependencies.py +++ b/lib/tool_shed/test/functional/test_1100_install_updated_repository_dependencies.py @@ -38,16 +38,10 @@ def test_0005_create_and_populate_column_repository(self): strings_displayed=[], ) if self.repository_is_new(repository): - self.upload_file( + self.commit_tar_to_repository( repository, - filename="column_maker/column_maker.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "column_maker/column_maker.tar", commit_message="Uploaded column_maker tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0010_create_and_populate_convert_repository(self): @@ -64,16 +58,10 @@ def test_0010_create_and_populate_convert_repository(self): strings_displayed=[], ) if self.repository_is_new(repository): - self.upload_file( + self.commit_tar_to_repository( repository, - filename="convert_chars/convert_chars.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "convert_chars/convert_chars.tar", commit_message="Uploaded convert_chars tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0015_install_and_uninstall_column_repository(self): diff --git a/lib/tool_shed/test/functional/test_1120_install_repository_with_complex_dependencies.py b/lib/tool_shed/test/functional/test_1120_install_repository_with_complex_dependencies.py index b3fb64a7d6d7..a4c0fcf55550 100644 --- a/lib/tool_shed/test/functional/test_1120_install_repository_with_complex_dependencies.py +++ b/lib/tool_shed/test/functional/test_1120_install_repository_with_complex_dependencies.py @@ -53,19 +53,7 @@ def test_0005_create_bwa_package_repository(self): open(xml_filename, "w").write( open(old_tool_dependency).read().replace("__PATH__", self.get_filename("bwa/complex")) ) - self.upload_file( - repository, - filename=xml_filename, - filepath=new_tool_dependency_path, - valid_tools_only=True, - uncompress_file=False, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded tool_dependencies.xml.", - strings_displayed=[ - "This repository currently contains a single file named tool_dependencies.xml" - ], - strings_not_displayed=[], - ) + self.add_file_to_repository(repository, xml_filename, "tool_dependencies.xml") self.display_manage_repository_page( repository, strings_displayed=["Tool dependencies", "consider setting its type"] ) @@ -85,16 +73,10 @@ def test_0010_create_bwa_base_repository(self): strings_displayed=[], ) self._get_repository_by_name_and_owner(bwa_package_repository_name, common.test_user_1_name) - self.upload_file( + self.commit_tar_to_repository( repository, - filename="bwa/complex/bwa_base.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "bwa/complex/bwa_base.tar", commit_message="Uploaded bwa_base.tar with tool wrapper XML, but without tool dependency XML.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0015_generate_complex_repository_dependency_invalid_shed_url(self): @@ -237,17 +219,7 @@ def test_0040_update_tool_repository(self): open(xml_filename, "w").write( open(old_tool_dependency).read().replace("__PATH__", self.get_filename("bwa/complex")) ) - self.upload_file( - tool_repository, - filename=xml_filename, - filepath=new_tool_dependency_path, - valid_tools_only=True, - uncompress_file=False, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded new tool_dependencies.xml.", - strings_displayed=[], - strings_not_displayed=[], - ) + self.add_file_to_repository(tool_repository, xml_filename, "tool_dependencies.xml") # Verify that the dependency display has been updated as a result of the new tool_dependencies.xml file. self.display_manage_repository_page( base_repository, diff --git a/lib/tool_shed/test/functional/test_1130_install_repository_with_invalid_repository_dependency.py b/lib/tool_shed/test/functional/test_1130_install_repository_with_invalid_repository_dependency.py index d59f45563ce8..db5139d4247d 100644 --- a/lib/tool_shed/test/functional/test_1130_install_repository_with_invalid_repository_dependency.py +++ b/lib/tool_shed/test/functional/test_1130_install_repository_with_invalid_repository_dependency.py @@ -45,16 +45,10 @@ def test_0010_create_emboss_dependendent_column_maker_repository_and_upload_tarb ) if self.repository_is_new(column_maker_repository): running_standalone = True - self.upload_file( + self.commit_tar_to_repository( column_maker_repository, - filename="column_maker/column_maker.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "column_maker/column_maker.tar", commit_message="Uploaded column_maker tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0020_create_emboss_5_repository_and_upload_files(self): @@ -70,16 +64,10 @@ def test_0020_create_emboss_5_repository_and_upload_files(self): category=category, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( repository, - filename="emboss/emboss.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "emboss/emboss.tar", commit_message="Uploaded emboss tool tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0025_generate_repository_dependency_with_invalid_url(self): diff --git a/lib/tool_shed/test/functional/test_1140_simple_repository_dependency_multiple_owners.py b/lib/tool_shed/test/functional/test_1140_simple_repository_dependency_multiple_owners.py index 83b5a6d0c4de..557f20fa8ed6 100644 --- a/lib/tool_shed/test/functional/test_1140_simple_repository_dependency_multiple_owners.py +++ b/lib/tool_shed/test/functional/test_1140_simple_repository_dependency_multiple_owners.py @@ -60,16 +60,10 @@ def test_0005_create_datatypes_repository(self): strings_displayed=strings_displayed, ) if self.repository_is_new(repository): - self.upload_file( + self.commit_tar_to_repository( repository, - filename="blast/blast_datatypes.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "blast/blast_datatypes.tar", commit_message="Uploaded blast_datatypes tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0010_verify_datatypes_repository(self): @@ -112,13 +106,13 @@ def test_0015_create_tool_repository(self): ) if self.repository_is_new(repository): running_standalone = True - self.upload_file( + self.commit_tar_to_repository( repository, - filename="blast/blastxml_to_top_descr.tar", + "blast/blastxml_to_top_descr.tar", filepath=None, valid_tools_only=True, uncompress_file=True, - remove_repo_files_not_in_tar=False, + remove_repo_files_not_in_tar=True, commit_message="Uploaded blastxml_to_top_descr tarball.", strings_displayed=[], strings_not_displayed=[], diff --git a/lib/tool_shed/test/functional/test_1160_tool_help_images.py b/lib/tool_shed/test/functional/test_1160_tool_help_images.py index 632788755e43..dd46f0dbae92 100644 --- a/lib/tool_shed/test/functional/test_1160_tool_help_images.py +++ b/lib/tool_shed/test/functional/test_1160_tool_help_images.py @@ -49,16 +49,10 @@ def test_0005_create_htseq_count_repository(self): ) if self.repository_is_new(repository): # Upload htseq_count.tar to the repository if it hasn't already been populated. - self.upload_file( + self.commit_tar_to_repository( repository, - filename="htseq_count/htseq_count.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=False, - remove_repo_files_not_in_tar=False, + "htseq_count/htseq_count.tar", commit_message="Uploaded htseq_count.tar.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0010_load_tool_page(self): diff --git a/lib/tool_shed/test/functional/test_1170_prior_installation_required.py b/lib/tool_shed/test/functional/test_1170_prior_installation_required.py index 2894b96fcecc..c126a29ca4b8 100644 --- a/lib/tool_shed/test/functional/test_1170_prior_installation_required.py +++ b/lib/tool_shed/test/functional/test_1170_prior_installation_required.py @@ -60,16 +60,10 @@ def test_0005_create_convert_repository(self): ) if self.repository_is_new(repository): running_standalone = True - self.upload_file( + self.commit_tar_to_repository( repository, - filename="convert_chars/convert_chars.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "convert_chars/convert_chars.tar", commit_message="Uploaded convert_chars tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0010_create_column_repository(self): @@ -85,16 +79,10 @@ def test_0010_create_column_repository(self): strings_displayed=[], ) if running_standalone: - self.upload_file( + self.commit_tar_to_repository( repository, - filename="column_maker/column_maker.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "column_maker/column_maker.tar", commit_message="Uploaded column_maker tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0015_create_repository_dependency(self): diff --git a/lib/tool_shed/test/functional/test_1180_circular_prior_installation_required.py b/lib/tool_shed/test/functional/test_1180_circular_prior_installation_required.py index 1b1b29ddf974..755c823211cc 100644 --- a/lib/tool_shed/test/functional/test_1180_circular_prior_installation_required.py +++ b/lib/tool_shed/test/functional/test_1180_circular_prior_installation_required.py @@ -73,16 +73,10 @@ def test_0005_create_convert_repository(self): ) if self.repository_is_new(repository): running_standalone = True - self.upload_file( + self.commit_tar_to_repository( repository, - filename="convert_chars/convert_chars.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "convert_chars/convert_chars.tar", commit_message="Uploaded convert_chars tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0010_create_column_repository(self): @@ -98,16 +92,10 @@ def test_0010_create_column_repository(self): strings_displayed=[], ) if running_standalone: - self.upload_file( + self.commit_tar_to_repository( repository, - filename="column_maker/column_maker.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "column_maker/column_maker.tar", commit_message="Uploaded column_maker tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0015_create_filtering_repository(self): @@ -123,16 +111,10 @@ def test_0015_create_filtering_repository(self): strings_displayed=[], ) if running_standalone: - self.upload_file( + self.commit_tar_to_repository( repository, - filename="filtering/filtering_1.1.0.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "filtering/filtering_1.1.0.tar", commit_message="Uploaded filtering 1.1.0 tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0020_create_repository_dependency(self): diff --git a/lib/tool_shed/test/functional/test_1190_complex_prior_installation_required.py b/lib/tool_shed/test/functional/test_1190_complex_prior_installation_required.py index c3d049890108..57714179cd0e 100644 --- a/lib/tool_shed/test/functional/test_1190_complex_prior_installation_required.py +++ b/lib/tool_shed/test/functional/test_1190_complex_prior_installation_required.py @@ -61,18 +61,10 @@ def test_0005_create_matplotlib_repository(self): ) if self.repository_is_new(repository): running_standalone = True - self.upload_file( + self.commit_tar_to_repository( repository, - filename="package_matplotlib/package_matplotlib_1_2.tar", - filepath=None, - valid_tools_only=False, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "package_matplotlib/package_matplotlib_1_2.tar", commit_message="Uploaded matplotlib tool dependency tarball.", - strings_displayed=[ - "This repository currently contains a single file named tool_dependencies.xml" - ], - strings_not_displayed=[], ) def test_0010_create_numpy_repository(self): @@ -92,18 +84,10 @@ def test_0010_create_numpy_repository(self): category=category, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( repository, - filename="package_numpy/package_numpy_1_7.tar", - filepath=None, - valid_tools_only=False, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "package_numpy/package_numpy_1_7.tar", commit_message="Uploaded numpy tool dependency tarball.", - strings_displayed=[ - "This repository currently contains a single file named tool_dependencies.xml" - ], - strings_not_displayed=[], ) def test_0015_create_complex_repository_dependency(self): @@ -136,18 +120,7 @@ def test_0015_create_complex_repository_dependency(self): dependency_xml_path = self.generate_temp_path("test_0170", additional_paths=["matplotlib"]) new_xml_file = os.path.join(dependency_xml_path, "tool_dependencies.xml") open(new_xml_file, "w").write(original_xml.replace("", processed_xml)) - # Upload the generated complex repository dependency XML to the matplotlib repository. - self.upload_file( - matplotlib_repository, - filename="tool_dependencies.xml", - filepath=dependency_xml_path, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded complex repository dependency on numpy 1.7.", - strings_displayed=[], - strings_not_displayed=[], - ) + self.add_file_to_repository(matplotlib_repository, dependency_xml_path, "tool_dependencies.xml") def test_0020_verify_generated_dependency(self): """Verify that matplotlib now has a package tool dependency and a complex repository dependency. diff --git a/lib/tool_shed/test/functional/test_1200_uninstall_and_reinstall_basic_repository.py b/lib/tool_shed/test/functional/test_1200_uninstall_and_reinstall_basic_repository.py index ee8ae3610bcb..ca7d95a38507 100644 --- a/lib/tool_shed/test/functional/test_1200_uninstall_and_reinstall_basic_repository.py +++ b/lib/tool_shed/test/functional/test_1200_uninstall_and_reinstall_basic_repository.py @@ -31,50 +31,14 @@ def test_0005_ensure_repositories_and_categories_exist(self): category=category, ) if self.repository_is_new(repository): - self.upload_file( + self.commit_tar_to_repository( repository, - filename="filtering/filtering_1.1.0.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "filtering/filtering_1.1.0.tar", commit_message="Uploaded filtering 1.1.0 tarball.", - strings_displayed=[], - strings_not_displayed=[], - ) - self.upload_file( - repository, - filename="filtering/filtering_0000.txt", - filepath=None, - valid_tools_only=True, - uncompress_file=False, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded readme for 1.1.0", - strings_displayed=[], - strings_not_displayed=[], - ) - self.upload_file( - repository, - filename="filtering/filtering_2.2.0.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded filtering 2.2.0 tarball.", - strings_displayed=[], - strings_not_displayed=[], - ) - self.upload_file( - repository, - filename="readme.txt", - filepath=None, - valid_tools_only=True, - uncompress_file=False, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded readme for 2.2.0", - strings_displayed=[], - strings_not_displayed=[], ) + self.add_file_to_repository(repository, "filtering/filtering_0000.txt") + self.add_tar_to_repository(repository, "filtering/filtering_2.2.0.tar") + self.add_file_to_repository(repository, "readme.txt") def test_0010_install_filtering_repository(self): """Install the filtering repository into the Galaxy instance.""" diff --git a/lib/tool_shed/test/functional/test_1210_uninstall_reinstall_repository_with_tool_dependencies.py b/lib/tool_shed/test/functional/test_1210_uninstall_reinstall_repository_with_tool_dependencies.py index c18a1ab6fd9e..66be32573652 100644 --- a/lib/tool_shed/test/functional/test_1210_uninstall_reinstall_repository_with_tool_dependencies.py +++ b/lib/tool_shed/test/functional/test_1210_uninstall_reinstall_repository_with_tool_dependencies.py @@ -31,78 +31,28 @@ def test_0005_ensure_repositories_and_categories_exist(self): category=category, ) if self.repository_is_new(repository): - self.upload_file( - repository, - filename="freebayes/freebayes.xml", - filepath=None, - valid_tools_only=False, - uncompress_file=False, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded the tool xml.", - strings_displayed=[ - "Metadata may have been defined", - "This file requires an entry", - "tool_data_table_conf", - ], - strings_not_displayed=[], + strings_displayed = [ + "Metadata may have been defined", + "This file requires an entry", + "tool_data_table_conf", + ] + self.add_file_to_repository(repository, "freebayes/freebayes.xml", strings_displayed=strings_displayed) + strings_displayed = ["Upload a file named sam_fa_indices.loc.sample"] + self.add_file_to_repository( + repository, "freebayes/tool_data_table_conf.xml.sample", strings_displayed=strings_displayed ) - self.upload_file( - repository, - filename="freebayes/tool_data_table_conf.xml.sample", - filepath=None, - valid_tools_only=False, - uncompress_file=False, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded the tool data table sample file.", - strings_displayed=[], - strings_not_displayed=[], - ) - self.upload_file( - repository, - filename="freebayes/sam_fa_indices.loc.sample", - filepath=None, - valid_tools_only=True, - uncompress_file=False, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded tool data table .loc file.", - strings_displayed=[], - strings_not_displayed=[], - ) - self.upload_file( - repository, - filename=os.path.join("freebayes", "malformed_tool_dependencies", "tool_dependencies.xml"), - filepath=None, - valid_tools_only=False, - uncompress_file=False, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded malformed tool dependency XML.", - strings_displayed=["Exception attempting to parse", "invalid element name"], - strings_not_displayed=[], - ) - self.upload_file( - repository, - filename=os.path.join("freebayes", "invalid_tool_dependencies", "tool_dependencies.xml"), - filepath=None, - valid_tools_only=False, - uncompress_file=False, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded invalid tool dependency XML.", - strings_displayed=[ - "The settings for name, version and type from a contained tool configuration" - ], - strings_not_displayed=[], - ) - self.upload_file( - repository, - filename=os.path.join("freebayes", "tool_dependencies.xml"), - filepath=None, - valid_tools_only=True, - uncompress_file=False, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded valid tool dependency XML.", - strings_displayed=[], - strings_not_displayed=[], + self.add_file_to_repository(repository, "freebayes/sam_fa_indices.loc.sample") + target = os.path.join("freebayes", "malformed_tool_dependencies", "tool_dependencies.xml") + self.add_file_to_repository( + repository, target, strings_displayed=["Exception attempting to parse", "invalid element name"] ) + target = os.path.join("freebayes", "invalid_tool_dependencies", "tool_dependencies.xml") + strings_displayed = [ + "The settings for name, version and type from a contained tool configuration" + ] + self.add_file_to_repository(repository, target, strings_displayed=strings_displayed) + target = os.path.join("freebayes", "tool_dependencies.xml") + self.add_file_to_repository(repository, target) def test_0010_install_freebayes_repository(self): """Install the freebayes repository into the Galaxy instance.""" diff --git a/lib/tool_shed/test/functional/test_1230_uninstall_reinstall_repository_with_dependency_revisions.py b/lib/tool_shed/test/functional/test_1230_uninstall_reinstall_repository_with_dependency_revisions.py index 787e3247f3e6..91720f67bfe1 100644 --- a/lib/tool_shed/test/functional/test_1230_uninstall_reinstall_repository_with_dependency_revisions.py +++ b/lib/tool_shed/test/functional/test_1230_uninstall_reinstall_repository_with_dependency_revisions.py @@ -52,16 +52,10 @@ def test_0005_ensure_repositories_and_categories_exist(self): category=category, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( column_maker_repository, - filename="column_maker/column_maker.tar", - filepath=None, - valid_tools_only=False, - uncompress_file=True, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded bismark tarball.", - strings_displayed=[], - strings_not_displayed=[], + "column_maker/column_maker.tar", + commit_message="Uploaded column maker tarball.", ) repository_dependencies_path = self.generate_temp_path("test_1030", additional_paths=["emboss", "5"]) column_maker_tuple = ( @@ -83,16 +77,10 @@ def test_0005_ensure_repositories_and_categories_exist(self): category=category, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( emboss_6_repository, - filename="emboss/emboss.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=False, - remove_repo_files_not_in_tar=False, + "emboss/emboss.tar", commit_message="Uploaded tool tarball.", - strings_displayed=[], - strings_not_displayed=[], ) repository_dependencies_path = self.generate_temp_path("test_1030", additional_paths=["emboss", "6"]) column_maker_tuple = ( @@ -114,16 +102,10 @@ def test_0005_ensure_repositories_and_categories_exist(self): category=category, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( emboss_repository, - filename="emboss/emboss.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=False, - remove_repo_files_not_in_tar=False, + "emboss/emboss.tar", commit_message="Uploaded tool tarball.", - strings_displayed=[], - strings_not_displayed=[], ) repository_dependencies_path = self.generate_temp_path("test_1030", additional_paths=["emboss", "5"]) dependency_tuple = ( diff --git a/lib/tool_shed/test/functional/test_1300_reset_all_metadata.py b/lib/tool_shed/test/functional/test_1300_reset_all_metadata.py index eda3d6dc693b..4b5571c74146 100644 --- a/lib/tool_shed/test/functional/test_1300_reset_all_metadata.py +++ b/lib/tool_shed/test/functional/test_1300_reset_all_metadata.py @@ -81,28 +81,12 @@ def test_0010_create_repositories_from_0000_series(self): ) if self.repository_is_new(repository): running_standalone = True - self.upload_file( + self.commit_tar_to_repository( repository, - filename="filtering/filtering_1.1.0.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "filtering/filtering_1.1.0.tar", commit_message="Uploaded filtering 1.1.0 tarball.", - strings_displayed=[], - strings_not_displayed=[], - ) - self.upload_file( - repository, - filename="filtering/filtering_2.2.0.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded filtering 2.2.0 tarball.", - strings_displayed=[], - strings_not_displayed=[], ) + self.add_tar_to_repository(repository, "filtering/filtering_2.2.0.tar") def test_0015_create_repositories_from_0010_series(self): """Create repository freebayes_0010.""" @@ -116,50 +100,7 @@ def test_0015_create_repositories_from_0010_series(self): strings_displayed=[], ) if self.repository_is_new(repository): - self.upload_file( - repository, - filename="freebayes/freebayes.xml", - filepath=None, - valid_tools_only=False, - uncompress_file=True, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded freebayes.xml.", - strings_displayed=[], - strings_not_displayed=[], - ) - self.upload_file( - repository, - filename="freebayes/tool_data_table_conf.xml.sample", - filepath=None, - valid_tools_only=False, - uncompress_file=True, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded tool_data_table_conf.xml.sample", - strings_displayed=[], - strings_not_displayed=[], - ) - self.upload_file( - repository, - filename="freebayes/sam_fa_indices.loc.sample", - filepath=None, - valid_tools_only=False, - uncompress_file=True, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded sam_fa_indices.loc.sample", - strings_displayed=[], - strings_not_displayed=[], - ) - self.upload_file( - repository, - filename="freebayes/tool_dependencies.xml", - filepath=None, - valid_tools_only=False, - uncompress_file=True, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded tool_dependencies.xml", - strings_displayed=[], - strings_not_displayed=[], - ) + self.setup_freebayes_0010_repo(repository) def test_0020_create_repositories_from_0020_series(self): """Create repositories emboss_0020 and column_maker_0020 if necessary.""" @@ -173,16 +114,10 @@ def test_0020_create_repositories_from_0020_series(self): strings_displayed=[], ) if self.repository_is_new(column_maker_repository): - self.upload_file( + self.commit_tar_to_repository( column_maker_repository, - filename="column_maker/column_maker.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "column_maker/column_maker.tar", commit_message="Uploaded column_maker tarball.", - strings_displayed=[], - strings_not_displayed=[], ) repository = self.get_or_create_repository( name="emboss_0020", @@ -192,16 +127,10 @@ def test_0020_create_repositories_from_0020_series(self): category=category, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( repository, - filename="emboss/emboss.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "emboss/emboss.tar", commit_message="Uploaded emboss.tar", - strings_displayed=[], - strings_not_displayed=[], ) def test_0025_create_repositories_from_0030_series(self): @@ -217,16 +146,10 @@ def test_0025_create_repositories_from_0030_series(self): strings_displayed=[], ) if self.repository_is_new(column_maker_repository): - self.upload_file( + self.commit_tar_to_repository( column_maker_repository, - filename="column_maker/column_maker.tar", - filepath=None, - valid_tools_only=False, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "column_maker/column_maker.tar", commit_message="Uploaded bismark tarball.", - strings_displayed=[], - strings_not_displayed=[], ) emboss_5_repository = self.get_or_create_repository( name="emboss_5_0030", @@ -236,16 +159,10 @@ def test_0025_create_repositories_from_0030_series(self): category=category, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( emboss_5_repository, - filename="emboss/emboss.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "emboss/emboss.tar", commit_message="Uploaded emboss.tar", - strings_displayed=[], - strings_not_displayed=[], ) repository_dependencies_path = self.generate_temp_path("test_0330", additional_paths=["emboss", "5"]) dependency_tuple = ( @@ -267,16 +184,10 @@ def test_0025_create_repositories_from_0030_series(self): category=category, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( emboss_6_repository, - filename="emboss/emboss.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "emboss/emboss.tar", commit_message="Uploaded emboss.tar", - strings_displayed=[], - strings_not_displayed=[], ) repository_dependencies_path = self.generate_temp_path("test_0330", additional_paths=["emboss", "6"]) dependency_tuple = ( @@ -298,16 +209,10 @@ def test_0025_create_repositories_from_0030_series(self): category=category, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( emboss_repository, - filename="emboss/emboss.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "emboss/emboss.tar", commit_message="Uploaded emboss.tar", - strings_displayed=[], - strings_not_displayed=[], ) repository_dependencies_path = self.generate_temp_path("test_0330", additional_paths=["emboss", "5"]) dependency_tuple = ( @@ -345,16 +250,10 @@ def test_0030_create_repositories_from_0040_series(self): strings_displayed=[], ) if self.repository_is_new(repository): - self.upload_file( + self.commit_tar_to_repository( repository, - filename="freebayes/freebayes.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "freebayes/freebayes.tar", commit_message="Uploaded the tool tarball.", - strings_displayed=[], - strings_not_displayed=[], ) repository = self.get_or_create_repository( name="filtering_0040", @@ -363,17 +262,11 @@ def test_0030_create_repositories_from_0040_series(self): owner=common.test_user_1_name, category=category, strings_displayed=[], - ) - self.upload_file( + ) + self.commit_tar_to_repository( repository, - filename="filtering/filtering_1.1.0.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "filtering/filtering_1.1.0.tar", commit_message="Uploaded the tool tarball for filtering 1.1.0.", - strings_displayed=[], - strings_not_displayed=[], ) repository = self._get_repository_by_name_and_owner("freebayes_0040", common.test_user_1_name) filtering_repository = self._get_repository_by_name_and_owner("filtering_0040", common.test_user_1_name) @@ -432,38 +325,20 @@ def test_0035_create_repositories_from_0050_series(self): category=category, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( emboss_repository, - filename="emboss/emboss.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "emboss/emboss.tar", commit_message="Uploaded emboss.tar", - strings_displayed=[], - strings_not_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( freebayes_repository, - filename="freebayes/freebayes.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "freebayes/freebayes.tar", commit_message="Uploaded freebayes tarball.", - strings_displayed=[], - strings_not_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( filtering_repository, - filename="filtering/filtering_1.1.0.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "filtering/filtering_1.1.0.tar", commit_message="Uploaded filtering 1.1.0 tarball.", - strings_displayed=[], - strings_not_displayed=[], ) repository_dependencies_path = self.generate_temp_path("test_0350", additional_paths=["emboss"]) repository_dependencies_path = self.generate_temp_path("test_0350", additional_paths=["filtering"]) diff --git a/lib/tool_shed/test/functional/test_1410_update_manager.py b/lib/tool_shed/test/functional/test_1410_update_manager.py index ba4112d7cd24..7debeaf57c90 100644 --- a/lib/tool_shed/test/functional/test_1410_update_manager.py +++ b/lib/tool_shed/test/functional/test_1410_update_manager.py @@ -53,16 +53,10 @@ def test_0005_create_filtering_repository(self): owner=common.test_user_1_name, category=category, ) - self.upload_file( + self.commit_tar_to_repository( repository, - filename="filtering/filtering_1.1.0.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=True, + "filtering/filtering_1.1.0.tar", commit_message="Uploaded filtering 1.1.0", - strings_displayed=[], - strings_not_displayed=[], ) def test_0010_install_filtering_repository(self): @@ -89,17 +83,7 @@ def test_0015_upload_readme_file(self): """ self.login(email=common.test_user_1_email, username=common.test_user_1_name) repository = self._get_repository_by_name_and_owner(repository_name, common.test_user_1_name) - self.upload_file( - repository, - filename="readme.txt", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded readme.txt", - strings_displayed=[], - strings_not_displayed=[], - ) + self.add_file_to_repository(repository, "readme.txt") def test_0020_check_for_displayed_update(self): """Browse installed repositories and verify update. diff --git a/lib/tool_shed/test/functional/test_1430_repair_installed_repository.py b/lib/tool_shed/test/functional/test_1430_repair_installed_repository.py index 5c0829ede090..01cbdbea57e5 100644 --- a/lib/tool_shed/test/functional/test_1430_repair_installed_repository.py +++ b/lib/tool_shed/test/functional/test_1430_repair_installed_repository.py @@ -62,16 +62,10 @@ def test_0005_create_filter_repository(self): category=category, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( repository, - filename="filtering/filtering_1.1.0.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "filtering/filtering_1.1.0.tar", commit_message="Populate filter_1430 with version 1.1.0.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0010_create_column_repository(self): @@ -90,16 +84,10 @@ def test_0010_create_column_repository(self): category=category, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( repository, - filename="column_maker/column_maker.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "column_maker/column_maker.tar", commit_message="Populate column_1430 with tool definitions.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0015_create_repository_dependency(self): diff --git a/lib/tool_shed/test/functional/test_1460_data_managers.py b/lib/tool_shed/test/functional/test_1460_data_managers.py index 7269e72fd12d..42ad60b4ea6d 100644 --- a/lib/tool_shed/test/functional/test_1460_data_managers.py +++ b/lib/tool_shed/test/functional/test_1460_data_managers.py @@ -55,17 +55,12 @@ def test_0010_create_data_manager_repository(self): category=category, strings_displayed=[], ) + assert repository, "No repository created with name {commit_tar_to_repository}" # Upload the data manager files to the repository. - self.upload_file( + self.commit_tar_to_repository( repository, - filename=data_manager_tar_file, - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + data_manager_tar_file, commit_message=f"Populate {data_manager_repository_name} with a data manager configuration.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0020_install_data_manager_repository(self): diff --git a/lib/tool_shed/test/functional/test_1470_updating_installed_repositories.py b/lib/tool_shed/test/functional/test_1470_updating_installed_repositories.py index 2019b0c8f477..fa3ac3cf55be 100644 --- a/lib/tool_shed/test/functional/test_1470_updating_installed_repositories.py +++ b/lib/tool_shed/test/functional/test_1470_updating_installed_repositories.py @@ -48,16 +48,10 @@ def test_0005_create_filtering_repository(self): category=category, strings_displayed=[], ) - self.upload_file( + self.commit_tar_to_repository( repository, - filename="filtering/filtering_1.1.0.tar", - filepath=None, - valid_tools_only=True, - uncompress_file=True, - remove_repo_files_not_in_tar=False, + "filtering/filtering_1.1.0.tar", commit_message="Uploaded filtering 1.1.0 tarball.", - strings_displayed=[], - strings_not_displayed=[], ) def test_0010_install_filtering_to_galaxy(self): @@ -85,17 +79,7 @@ def test_0015_update_repository(self): """ self.login(email=common.test_user_1_email, username=common.test_user_1_name) repository = self._get_repository_by_name_and_owner(repository_name, common.test_user_1_name) - self.upload_file( - repository, - filename="filtering/readme.txt", - filepath=None, - valid_tools_only=True, - uncompress_file=False, - remove_repo_files_not_in_tar=False, - commit_message="Uploaded readme.", - strings_displayed=[], - strings_not_displayed=[], - ) + self.add_file_to_repository(repository, "filtering/readme.txt") def test_0020_get_repository_updates(self): """Get updates to the installed repository. diff --git a/lib/tool_shed/test/test_data/bismark/bismark.tar b/lib/tool_shed/test/test_data/bismark/bismark.tar deleted file mode 100644 index e24183c72963..000000000000 Binary files a/lib/tool_shed/test/test_data/bismark/bismark.tar and /dev/null differ diff --git a/lib/tool_shed/test/test_data/emboss/0470_files/emboss_complex_dependency.tar b/lib/tool_shed/test/test_data/emboss/0470_files/emboss_complex_dependency.tar deleted file mode 100644 index 27189a12120e..000000000000 Binary files a/lib/tool_shed/test/test_data/emboss/0470_files/emboss_complex_dependency.tar and /dev/null differ diff --git a/lib/tool_shed/test/test_data/0480_files/tool_dependencies.xml b/lib/tool_shed/test/test_data/repos/0480/0/tool_dependencies.xml similarity index 100% rename from lib/tool_shed/test/test_data/0480_files/tool_dependencies.xml rename to lib/tool_shed/test/test_data/repos/0480/0/tool_dependencies.xml diff --git a/lib/tool_shed/test/test_data/repos/bismark/0/bismark_bowtie2_wrapper.xml b/lib/tool_shed/test/test_data/repos/bismark/0/bismark_bowtie2_wrapper.xml new file mode 100644 index 000000000000..68238f4ddd85 --- /dev/null +++ b/lib/tool_shed/test/test_data/repos/bismark/0/bismark_bowtie2_wrapper.xml @@ -0,0 +1,616 @@ + + + bisulfite mapper (bowtie2) + + + SCRIPT_PATH + bowtie + bowtie2 + + + + bismark_wrapper.py + + ## Change this to accommodate the number of threads you have available. + --num-threads 4 + + --bismark_path \$SCRIPT_PATH + + --bowtie2 + + ## + ## Bismark Genome Preparation, if desired. + ## + + ## Handle reference file. + #if $refGenomeSource.genomeSource == "history": + --own-file=$refGenomeSource.ownFile + #else: + --indexes-path ${refGenomeSource.index.fields.path} + #end if + + + ## + ## Input parameters + ## + + + #if $singlePaired.sPaired == "single": + --single-paired $singlePaired.input_singles + + #if $singlePaired.input_singles.ext == "fastqillumina": + --phred64-quals + --fastq + #elif $singlePaired.input_singles.ext == "fastqsanger": + --fastq + #elif $singlePaired.input_singles.ext == "fasta": + --fasta + #end if + #else: + --mate-paired + --mate1 $singlePaired.input_mate1 + --mate2 $singlePaired.input_mate2 + + #if $singlePaired.input_mate1.ext == "fastqillumina": + --phred64-quals + --fastq + #elif $singlePaired.input_mate1.ext == "fastqsanger": + --fastq + #elif $singlePaired.input_mate1.ext == "fasta": + --fasta + #end if + + -I $singlePaired.minInsert + -X $singlePaired.maxInsert + #end if + + + ## for now hardcode the value for the required memory per thread in --best mode + --chunkmbs 512 + + + #if $params.settingsType == "custom": + + ## default 20 + --seed-len $params.seed_len + ## default 0 + --seed-mismatches $params.seed_mismatches + ## default 15 + --seed-extention-attempts $params.seed_extention_attempts + ## default 2 + --max-reseed $params.max_reseed + + ## default 70 + ##--maqerr $params.maqerr + + ## default unlimited + #if $params.qupto != 0: + --qupto $params.qupto + #end if + #if $params.skip_reads != 0: + --skip-reads $params.skip_reads + #end if + + ## if set, disable the original behaviour + $params.no_mixed + ## if set, disable the original behaviour + $params.no_discordant + + + ###if str($params.isReportOutput) == "yes": + ## --output-report-file $report_file + ###end if + + #end if + + ## + ## Output parameters. + ## + --output $output + $suppress_header + + #if str( $singlePaired.sPaired ) == "single" + #if $output_unmapped_reads_l + --output-unmapped-reads $output_unmapped_reads_l + #end if + #if $output_suppressed_reads_l + --output-suppressed-reads $output_suppressed_reads_l + #end if + #else + #if $output_unmapped_reads_l and $output_unmapped_reads_r + --output-unmapped-reads-l $output_unmapped_reads_l + --output-unmapped-reads-r $output_unmapped_reads_r + #end if + #if $output_suppressed_reads_l and $output_suppressed_reads_l + --output-suppressed-reads-l $output_suppressed_reads_l + --output-suppressed-reads-r $output_suppressed_reads_r + #end if + #end if + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + (( + params['settingsType'] == "custom" and + params['suppressed_read_file'] is True + )) + + + + + + + + + + + + + + + + + singlePaired['sPaired'] == "paired" + params['settingsType'] == "custom" + params['supressed_read_file'] is True + + + + + + + + + + + + + + + + + + (( + params['settingsType'] == "custom" and + params['unmapped_read_file'] is True + )) + + + + + + + + + + + + + + + + singlePaired['sPaired'] == "paired" + params['settingsType'] == "custom" + params['unmapped_read_file'] is True + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +Bismark_ is a bisulfite mapper and methylation caller. Bismark takes in FastA or FastQ files and aligns the +reads to a specified bisulfite genome. Sequence reads are transformed into a bisulfite converted forward strand +version (C->T conversion) or into a bisulfite treated reverse strand (G->A conversion of the forward strand). +Each of these reads are then aligned to bisulfite treated forward strand index of a reference genome +(C->T converted) and a bisulfite treated reverse strand index of the genome (G->A conversion of the +forward strand, by doing this alignments will produce the same positions). These 4 instances of Bowtie (1 or 2) +are run in parallel. The sequence file(s) are then read in again sequence by sequence to pull out the original +sequence from the genome and determine if there were any protected C's present or not. + +.. _Bismark: http://www.bioinformatics.babraham.ac.uk/projects/bismark/ + +As of version 0.7.0 Bismark will only run 2 alignment threads for OT and OB in parallel, the 4 strand mode can be +re-enabled by using non_directional mode. + +It is developed by Krueger F and Andrews SR. at the Babraham Institute. Krueger F, Andrews SR. (2011) Bismark: a flexible aligner and methylation caller for Bisulfite-Seq applications. Bioinformatics, 27, 1571-2. + +------ + +**Know what you are doing** + +.. class:: warningmark + +There is no such thing (yet) as an automated gearshift in short read mapping. It is all like stick-shift driving in San Francisco. In other words = running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy. + + .. __: http://www.bioinformatics.babraham.ac.uk/projects/bismark/ + +------ + +**Input formats** + +Bismark accepts files in either Sanger FASTQ format (galaxy type *fastqsanger*), Illumina FASTQ format (galaxy type *fastqillumina*) or FASTA format (galaxy type *fasta*). Use the FASTQ Groomer to prepare your files. + +------ + +**A Note on Built-in Reference Genomes** + +The default variant for all genomes is "Full", defined as all primary chromosomes (or scaffolds/contigs) including mitochondrial plus associated unmapped, plasmid, and other segments. When only one version of a genome is available in this tool, it represents the default "Full" variant. Some genomes will have more than one variant available. The "Canonical Male" or sometimes simply "Canonical" variant contains the primary chromosomes for a genome. For example a human "Canonical" variant contains chr1-chr22, chrX, chrY, and chrM. The "Canonical Female" variant contains the primary chromosomes excluding chrY. + +------ + +The final output of Bismark is in SAM format by default. + +**Outputs** + +The output is in SAM format, and has the following columns:: + + Column Description + -------- -------------------------------------------------------- + 1 QNAME seq-ID + 2 FLAG this flag tries to take the strand a bisulfite read + originated from into account + (this is different from ordinary DNA alignment flags!) + 3 RNAME chromosome + 4 POS start position + 5 MAPQ always 255 + 6 CIGAR extended CIGAR string + 7 MRNM Mate Reference sequence NaMe ('=' if same as RNAME) + 8 MPOS 1-based Mate POSition + 9 ISIZE Inferred insert SIZE + 10 SEQ query SEQuence on the same strand as the reference + 11 QUAL Phred33 scale + 12 NM-tag edit distance to the reference) + 13 XX-tag base-by-base mismatches to the reference. + This does not include indels. + 14 XM-tag methylation call string + 15 XR-tag read conversion state for the alignment + 16 XG-tag genome conversion state for the alignment + + +Each read of paired-end alignments is written out in a separate line in the above format. + + +It looks like this (scroll sideways to see the entire example):: + + QNAME FLAG RNAME POS MAPQ CIAGR MRNM MPOS ISIZE SEQ QUAL OPT + HWI-EAS91_1_30788AAXX:1:1:1761:343 4 * 0 0 * * 0 0 AAAAAAANNAAAAAAAAAAAAAAAAAAAAAAAAAAACNNANNGAGTNGNNNNNNNGCTTCCCACAGNNCTGG hhhhhhh;;hhhhhhhhhhh^hOhhhhghhhfhhhgh;;h;;hhhh;h;;;;;;;hhhhhhghhhh;;Phhh + HWI-EAS91_1_30788AAXX:1:1:1578:331 4 * 0 0 * * 0 0 GTATAGANNAATAAGAAAAAAAAAAATGAAGACTTTCNNANNTCTGNANNNNNNNTCTTTTTTCAGNNGTAG hhhhhhh;;hhhhhhhhhhhhhhhhhhhhhhhhhhhh;;h;;hhhh;h;;;;;;;hhhhhhhhhhh;;hhVh + +------- + +**Bismark settings** + +All of the options have a default value. You can change any of them. If any Bismark function is missing please contact the tool author or your Galaxy admin. + +------ + +**Bismark parameter list** + +This is an exhaustive list of Bismark options: + +------ + +**OPTIONS** + + +Input:: + + --singles A comma- or space-separated list of files containing the reads to be aligned (e.g. + lane1.fq,lane2.fq lane3.fq). Reads may be a mix of different lengths. Bismark will + produce one mapping result and one report file per input file. + + -1 mates1 Comma-separated list of files containing the #1 mates (filename usually includes + "_1"), e.g. flyA_1.fq,flyB_1.fq). Sequences specified with this option must + correspond file-for-file and read-for-read with those specified in mates2. + Reads may be a mix of different lengths. Bismark will produce one mapping result + and one report file per paired-end input file pair. + + -2 mates2 Comma-separated list of files containing the #2 mates (filename usually includes + "_2"), e.g. flyA_1.fq,flyB_1.fq). Sequences specified with this option must + correspond file-for-file and read-for-read with those specified in mates1. + Reads may be a mix of different lengths. + + -q/--fastq The query input files (specified as mate1,mate2 or singles are FASTQ + files (usually having extension .fg or .fastq). This is the default. See also + --solexa-quals. + + -f/--fasta The query input files (specified as mate1,mate2 or singles are FASTA + files (usually havin extension .fa, .mfa, .fna or similar). All quality values + are assumed to be 40 on the Phred scale. + + -s/--skip INT Skip (i.e. do not align) the first INT reads or read pairs from the input. + + -u/--upto INT Only aligns the first INT reads or read pairs from the input. Default: no limit. + + --phred33-quals FASTQ qualities are ASCII chars equal to the Phred quality plus 33. Default: on. + + --phred64-quals FASTQ qualities are ASCII chars equal to the Phred quality plus 64. Default: off. + + --solexa-quals Convert FASTQ qualities from solexa-scaled (which can be negative) to phred-scaled + (which can't). The formula for conversion is: + phred-qual = 10 * log(1 + 10 ** (solexa-qual/10.0)) / log(10). Used with -q. This + is usually the right option for use with (unconverted) reads emitted by the GA + Pipeline versions prior to 1.3. Works only for Bowtie 1. Default: off. + + --solexa1.3-quals Same as --phred64-quals. This is usually the right option for use with (unconverted) + reads emitted by GA Pipeline version 1.3 or later. Default: off. + + +Alignment:: + + -n/--seedmms INT The maximum number of mismatches permitted in the "seed", i.e. the first L base pairs + of the read (where L is set with -l/--seedlen). This may be 0, 1, 2 or 3 and the + default is 1. This option is only available for Bowtie 1 (for Bowtie 2 see -N). + + -l/--seedlen The "seed length"; i.e., the number of bases of the high quality end of the read to + which the -n ceiling applies. The default is 28. Bowtie (and thus Bismark) is faster for + larger values of -l. This option is only available for Bowtie 1 (for Bowtie 2 see -L). + + -e/--maqerr INT Maximum permitted total of quality values at all mismatched read positions throughout + the entire alignment, not just in the "seed". The default is 70. Like Maq, bowtie rounds + quality values to the nearest 10 and saturates at 30. This value is not relevant for + Bowtie 2. + + --chunkmbs INT The number of megabytes of memory a given thread is given to store path descriptors in + --best mode. Best-first search must keep track of many paths at once to ensure it is + always extending the path with the lowest cumulative cost. Bowtie tries to minimize the + memory impact of the descriptors, but they can still grow very large in some cases. If + you receive an error message saying that chunk memory has been exhausted in --best mode, + try adjusting this parameter up to dedicate more memory to the descriptors. This value + is not relevant for Bowtie 2. Default: 512. + + -I/--minins INT The minimum insert size for valid paired-end alignments. E.g. if -I 60 is specified and + a paired-end alignment consists of two 20-bp alignments in the appropriate orientation + with a 20-bp gap between them, that alignment is considered valid (as long as -X is also + satisfied). A 19-bp gap would not be valid in that case. Default: 0. + + -X/--maxins INT The maximum insert size for valid paired-end alignments. E.g. if -X 100 is specified and + a paired-end alignment consists of two 20-bp alignments in the proper orientation with a + 60-bp gap between them, that alignment is considered valid (as long as -I is also satisfied). + A 61-bp gap would not be valid in that case. Default: 500. + + + +Output:: + + --non_directional The sequencing library was constructed in a non strand-specific manner, alignments to all four + bisulfite strands will be reported. Default: OFF. + + (The current Illumina protocol for BS-Seq is directional, in which case the strands complementary + to the original strands are merely theoretical and should not exist in reality. Specifying directional + alignments (which is the default) will only run 2 alignment threads to the original top (OT) + or bottom (OB) strands in parallel and report these alignments. This is the recommended option + for sprand-specific libraries). + + --sam-no-hd Suppress SAM header lines (starting with @). This might be useful when very large input files are + split up into several smaller files to run concurrently and the output files are to be merged. + + --quiet Print nothing besides alignments. + + --vanilla Performs bisulfite mapping with Bowtie 1 and prints the 'old' output (as in Bismark 0.5.X) instead + of SAM format output. + + -un/--unmapped Write all reads that could not be aligned to a file in the output directory. Written reads will + appear as they did in the input, without any translation of quality values that may have + taken place within Bowtie or Bismark. Paired-end reads will be written to two parallel files with _1 + and _2 inserted in their filenames, i.e. _unmapped_reads_1.txt and unmapped_reads_2.txt. Reads + with more than one valid alignment with the same number of lowest mismatches (ambiguous mapping) + are also written to _unmapped_reads.txt unless the option --ambiguous is specified as well. + + --ambiguous Write all reads which produce more than one valid alignment with the same number of lowest + mismatches or other reads that fail to align uniquely to a file in the output directory. + Written reads will appear as they did in the input, without any of the translation of quality + values that may have taken place within Bowtie or Bismark. Paired-end reads will be written to two + parallel files with _1 and _2 inserted in theit filenames, i.e. _ambiguous_reads_1.txt and + _ambiguous_reads_2.txt. These reads are not written to the file specified with --un. + + -o/--output_dir DIR Write all output files into this directory. By default the output files will be written into + the same folder as the input file(s). If the specified folder does not exist, Bismark will attempt + to create it first. The path to the output folder can be either relative or absolute. + + --temp_dir DIR Write temporary files to this directory instead of into the same directory as the input files. If + the specified folder does not exist, Bismark will attempt to create it first. The path to the + temporary folder can be either relative or absolute. + +------ + +Bowtie 2 alignment options:: + + -N INT Sets the number of mismatches to allowed in a seed alignment during multiseed alignment. + Can be set to 0 or 1. Setting this higher makes alignment slower (often much slower) + but increases sensitivity. Default: 0. This option is only available for Bowtie 2 (for + Bowtie 1 see -n). + + -L INT Sets the length of the seed substrings to align during multiseed alignment. Smaller values + make alignment slower but more senstive. Default: the --sensitive preset of Bowtie 2 is + used by default, which sets -L to 20. This option is only available for Bowtie 2 (for + Bowtie 1 see -l). + + --ignore-quals When calculating a mismatch penalty, always consider the quality value at the mismatched + position to be the highest possible, regardless of the actual value. I.e. input is treated + as though all quality values are high. This is also the default behavior when the input + doesn't specify quality values (e.g. in -f mode). This option is invariable and on by default. + + +Bowtie 2 paired-end options:: + + --no-mixed This option disables Bowtie 2's behavior to try to find alignments for the individual mates if + it cannot find a concordant or discordant alignment for a pair. This option is invariable and + and on by default. + + --no-discordant Normally, Bowtie 2 looks for discordant alignments if it cannot find any concordant alignments. + A discordant alignment is an alignment where both mates align uniquely, but that does not + satisfy the paired-end constraints (--fr/--rf/--ff, -I, -X). This option disables that behavior + and it is on by default. + + +Bowtie 2 effort options:: + + -D INT Up to INT consecutive seed extension attempts can "fail" before Bowtie 2 moves on, using + the alignments found so far. A seed extension "fails" if it does not yield a new best or a + new second-best alignment. Default: 15. + + -R INT INT is the maximum number of times Bowtie 2 will "re-seed" reads with repetitive seeds. + When "re-seeding," Bowtie 2 simply chooses a new set of reads (same length, same number of + mismatches allowed) at different offsets and searches for more alignments. A read is considered + to have repetitive seeds if the total number of seed hits divided by the number of seeds + that aligned at least once is greater than 300. Default: 2. + + +Bowtie 2 Scoring options:: + + --score_min "func" Sets a function governing the minimum alignment score needed for an alignment to be considered + "valid" (i.e. good enough to report). This is a function of read length. For instance, specifying + L,0,-0.2 sets the minimum-score function f to f(x) = 0 + -0.2 * x, where x is the read length. + See also: setting function options at http://bowtie-bio.sourceforge.net/bowtie2. The default is + L,0,-0.2. + + +Bowtie 2 Reporting options:: + + --most_valid_alignments INT This used to be the Bowtie 2 parameter -M. As of Bowtie 2 version 2.0.0 beta7 the option -M is + deprecated. It will be removed in subsequent versions. What used to be called -M mode is still the + default mode, but adjusting the -M setting is deprecated. Use the -D and -R options to adjust the + effort expended to find valid alignments. + + For reference, this used to be the old (now deprecated) description of -M: + Bowtie 2 searches for at most INT+1 distinct, valid alignments for each read. The search terminates when it + can't find more distinct valid alignments, or when it finds INT+1 distinct alignments, whichever + happens first. Only the best alignment is reported. Information from the other alignments is used to + estimate mapping quality and to set SAM optional fields, such as AS:i and XS:i. Increasing -M makes + Bowtie 2 slower, but increases the likelihood that it will pick the correct alignment for a read that + aligns many places. For reads that have more than INT+1 distinct, valid alignments, Bowtie 2 does not + guarantee that the alignment reported is the best possible in terms of alignment score. -M is + always used and its default value is set to 10. + + + diff --git a/lib/tool_shed/test/test_data/repos/bismark/0/bismark_bowtie_wrapper.xml b/lib/tool_shed/test/test_data/repos/bismark/0/bismark_bowtie_wrapper.xml new file mode 100644 index 000000000000..6e4e4def6200 --- /dev/null +++ b/lib/tool_shed/test/test_data/repos/bismark/0/bismark_bowtie_wrapper.xml @@ -0,0 +1,614 @@ + + + bisulfite mapper (bowtie) + + + SCRIPT_PATH + bowtie + bowtie2 + + + + bismark_wrapper.py + + ## Change this to accommodate the number of threads you have available. + --num-threads 4 + + --bismark_path \$SCRIPT_PATH + + ## + ## Bismark Genome Preparation, if desired. + ## + + ## Handle reference file. + #if $refGenomeSource.genomeSource == "history": + --own-file=$refGenomeSource.ownFile + #else: + --indexes-path ${refGenomeSource.index.fields.path} + #end if + + + ## + ## Input parameters + ## + + + #if $singlePaired.sPaired == "single": + --single-paired $singlePaired.input_singles + + #if $singlePaired.input_singles.ext == "fastqillumina": + --phred64-quals + --fastq + #elif $singlePaired.input_singles.ext == "fastqsanger": + --fastq + #elif $singlePaired.input_singles.ext == "fasta": + --fasta + #end if + #else: + --mate-paired + --mate1 $singlePaired.input_mate1 + --mate2 $singlePaired.input_mate2 + + #if $singlePaired.input_mate1.ext == "fastqillumina": + --phred64-quals + --fastq + #elif $singlePaired.input_mate1.ext == "fastqsanger": + --fastq + #elif $singlePaired.input_mate1.ext == "fasta": + --fasta + #end if + + -I $singlePaired.minInsert + -X $singlePaired.maxInsert + #end if + + + ## for now hardcode the value for the required memory per thread in --best mode + --chunkmbs 512 + + + #if $params.settingsType == "custom": + + ## default 20 + --seed-len $params.seed_len + ## default 0 + --seed-mismatches $params.seed_mismatches + ## default 15 + --seed-extention-attempts $params.seed_extention_attempts + ## default 2 + --max-reseed $params.max_reseed + + ## default 70 + ##--maqerr $params.maqerr + + ## default unlimited + #if $params.qupto != 0: + --qupto $params.qupto + #end if + #if $params.skip_reads != 0: + --skip-reads $params.skip_reads + #end if + + ## if set, disable the original behaviour + $params.no_mixed + ## if set, disable the original behaviour + $params.no_discordant + + + ###if str($params.isReportOutput) == "yes": + ## --output-report-file $report_file + ###end if + + #end if + + ## + ## Output parameters. + ## + --output $output + $suppress_header + + #if str( $singlePaired.sPaired ) == "single" + #if $output_unmapped_reads_l + --output-unmapped-reads $output_unmapped_reads_l + #end if + #if $output_suppressed_reads_l + --output-suppressed-reads $output_suppressed_reads_l + #end if + #else + #if $output_unmapped_reads_l and $output_unmapped_reads_r + --output-unmapped-reads-l $output_unmapped_reads_l + --output-unmapped-reads-r $output_unmapped_reads_r + #end if + #if $output_suppressed_reads_l and $output_suppressed_reads_l + --output-suppressed-reads-l $output_suppressed_reads_l + --output-suppressed-reads-r $output_suppressed_reads_r + #end if + #end if + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + (( + params['settingsType'] == "custom" and + params['suppressed_read_file'] is True + )) + + + + + + + + + + + + + + + + + singlePaired['sPaired'] == "paired" + params['settingsType'] == "custom" + params['supressed_read_file'] is True + + + + + + + + + + + + + + + + + + (( + params['settingsType'] == "custom" and + params['unmapped_read_file'] is True + )) + + + + + + + + + + + + + + + + singlePaired['sPaired'] == "paired" + params['settingsType'] == "custom" + params['unmapped_read_file'] is True + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +Bismark_ is a bisulfite mapper and methylation caller. Bismark takes in FastA or FastQ files and aligns the +reads to a specified bisulfite genome. Sequence reads are transformed into a bisulfite converted forward strand +version (C->T conversion) or into a bisulfite treated reverse strand (G->A conversion of the forward strand). +Each of these reads are then aligned to bisulfite treated forward strand index of a reference genome +(C->T converted) and a bisulfite treated reverse strand index of the genome (G->A conversion of the +forward strand, by doing this alignments will produce the same positions). These 4 instances of Bowtie (1 or 2) +are run in parallel. The sequence file(s) are then read in again sequence by sequence to pull out the original +sequence from the genome and determine if there were any protected C's present or not. + +.. _Bismark: http://www.bioinformatics.babraham.ac.uk/projects/bismark/ + +As of version 0.7.0 Bismark will only run 2 alignment threads for OT and OB in parallel, the 4 strand mode can be +re-enabled by using non_directional mode. + +It is developed by Krueger F and Andrews SR. at the Babraham Institute. Krueger F, Andrews SR. (2011) Bismark: a flexible aligner and methylation caller for Bisulfite-Seq applications. Bioinformatics, 27, 1571-2. + +------ + +**Know what you are doing** + +.. class:: warningmark + +There is no such thing (yet) as an automated gearshift in short read mapping. It is all like stick-shift driving in San Francisco. In other words = running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy. + + .. __: http://www.bioinformatics.babraham.ac.uk/projects/bismark/ + +------ + +**Input formats** + +Bismark accepts files in either Sanger FASTQ format (galaxy type *fastqsanger*), Illumina FASTQ format (galaxy type *fastqillumina*) or FASTA format (galaxy type *fasta*). Use the FASTQ Groomer to prepare your files. + +------ + +**A Note on Built-in Reference Genomes** + +The default variant for all genomes is "Full", defined as all primary chromosomes (or scaffolds/contigs) including mitochondrial plus associated unmapped, plasmid, and other segments. When only one version of a genome is available in this tool, it represents the default "Full" variant. Some genomes will have more than one variant available. The "Canonical Male" or sometimes simply "Canonical" variant contains the primary chromosomes for a genome. For example a human "Canonical" variant contains chr1-chr22, chrX, chrY, and chrM. The "Canonical Female" variant contains the primary chromosomes excluding chrY. + +------ + +The final output of Bismark is in SAM format by default. + +**Outputs** + +The output is in SAM format, and has the following columns:: + + Column Description + -------- -------------------------------------------------------- + 1 QNAME seq-ID + 2 FLAG this flag tries to take the strand a bisulfite read + originated from into account + (this is different from ordinary DNA alignment flags!) + 3 RNAME chromosome + 4 POS start position + 5 MAPQ always 255 + 6 CIGAR extended CIGAR string + 7 MRNM Mate Reference sequence NaMe ('=' if same as RNAME) + 8 MPOS 1-based Mate POSition + 9 ISIZE Inferred insert SIZE + 10 SEQ query SEQuence on the same strand as the reference + 11 QUAL Phred33 scale + 12 NM-tag edit distance to the reference) + 13 XX-tag base-by-base mismatches to the reference. + This does not include indels. + 14 XM-tag methylation call string + 15 XR-tag read conversion state for the alignment + 16 XG-tag genome conversion state for the alignment + + +Each read of paired-end alignments is written out in a separate line in the above format. + + +It looks like this (scroll sideways to see the entire example):: + + QNAME FLAG RNAME POS MAPQ CIAGR MRNM MPOS ISIZE SEQ QUAL OPT + HWI-EAS91_1_30788AAXX:1:1:1761:343 4 * 0 0 * * 0 0 AAAAAAANNAAAAAAAAAAAAAAAAAAAAAAAAAAACNNANNGAGTNGNNNNNNNGCTTCCCACAGNNCTGG hhhhhhh;;hhhhhhhhhhh^hOhhhhghhhfhhhgh;;h;;hhhh;h;;;;;;;hhhhhhghhhh;;Phhh + HWI-EAS91_1_30788AAXX:1:1:1578:331 4 * 0 0 * * 0 0 GTATAGANNAATAAGAAAAAAAAAAATGAAGACTTTCNNANNTCTGNANNNNNNNTCTTTTTTCAGNNGTAG hhhhhhh;;hhhhhhhhhhhhhhhhhhhhhhhhhhhh;;h;;hhhh;h;;;;;;;hhhhhhhhhhh;;hhVh + +------- + +**Bismark settings** + +All of the options have a default value. You can change any of them. If any Bismark function is missing please contact the tool author or your Galaxy admin. + +------ + +**Bismark parameter list** + +This is an exhaustive list of Bismark options: + +------ + +**OPTIONS** + + +Input:: + + --singles A comma- or space-separated list of files containing the reads to be aligned (e.g. + lane1.fq,lane2.fq lane3.fq). Reads may be a mix of different lengths. Bismark will + produce one mapping result and one report file per input file. + + -1 mates1 Comma-separated list of files containing the #1 mates (filename usually includes + "_1"), e.g. flyA_1.fq,flyB_1.fq). Sequences specified with this option must + correspond file-for-file and read-for-read with those specified in mates2. + Reads may be a mix of different lengths. Bismark will produce one mapping result + and one report file per paired-end input file pair. + + -2 mates2 Comma-separated list of files containing the #2 mates (filename usually includes + "_2"), e.g. flyA_1.fq,flyB_1.fq). Sequences specified with this option must + correspond file-for-file and read-for-read with those specified in mates1. + Reads may be a mix of different lengths. + + -q/--fastq The query input files (specified as mate1,mate2 or singles are FASTQ + files (usually having extension .fg or .fastq). This is the default. See also + --solexa-quals. + + -f/--fasta The query input files (specified as mate1,mate2 or singles are FASTA + files (usually havin extension .fa, .mfa, .fna or similar). All quality values + are assumed to be 40 on the Phred scale. + + -s/--skip INT Skip (i.e. do not align) the first INT reads or read pairs from the input. + + -u/--upto INT Only aligns the first INT reads or read pairs from the input. Default: no limit. + + --phred33-quals FASTQ qualities are ASCII chars equal to the Phred quality plus 33. Default: on. + + --phred64-quals FASTQ qualities are ASCII chars equal to the Phred quality plus 64. Default: off. + + --solexa-quals Convert FASTQ qualities from solexa-scaled (which can be negative) to phred-scaled + (which can't). The formula for conversion is: + phred-qual = 10 * log(1 + 10 ** (solexa-qual/10.0)) / log(10). Used with -q. This + is usually the right option for use with (unconverted) reads emitted by the GA + Pipeline versions prior to 1.3. Works only for Bowtie 1. Default: off. + + --solexa1.3-quals Same as --phred64-quals. This is usually the right option for use with (unconverted) + reads emitted by GA Pipeline version 1.3 or later. Default: off. + + +Alignment:: + + -n/--seedmms INT The maximum number of mismatches permitted in the "seed", i.e. the first L base pairs + of the read (where L is set with -l/--seedlen). This may be 0, 1, 2 or 3 and the + default is 1. This option is only available for Bowtie 1 (for Bowtie 2 see -N). + + -l/--seedlen The "seed length"; i.e., the number of bases of the high quality end of the read to + which the -n ceiling applies. The default is 28. Bowtie (and thus Bismark) is faster for + larger values of -l. This option is only available for Bowtie 1 (for Bowtie 2 see -L). + + -e/--maqerr INT Maximum permitted total of quality values at all mismatched read positions throughout + the entire alignment, not just in the "seed". The default is 70. Like Maq, bowtie rounds + quality values to the nearest 10 and saturates at 30. This value is not relevant for + Bowtie 2. + + --chunkmbs INT The number of megabytes of memory a given thread is given to store path descriptors in + --best mode. Best-first search must keep track of many paths at once to ensure it is + always extending the path with the lowest cumulative cost. Bowtie tries to minimize the + memory impact of the descriptors, but they can still grow very large in some cases. If + you receive an error message saying that chunk memory has been exhausted in --best mode, + try adjusting this parameter up to dedicate more memory to the descriptors. This value + is not relevant for Bowtie 2. Default: 512. + + -I/--minins INT The minimum insert size for valid paired-end alignments. E.g. if -I 60 is specified and + a paired-end alignment consists of two 20-bp alignments in the appropriate orientation + with a 20-bp gap between them, that alignment is considered valid (as long as -X is also + satisfied). A 19-bp gap would not be valid in that case. Default: 0. + + -X/--maxins INT The maximum insert size for valid paired-end alignments. E.g. if -X 100 is specified and + a paired-end alignment consists of two 20-bp alignments in the proper orientation with a + 60-bp gap between them, that alignment is considered valid (as long as -I is also satisfied). + A 61-bp gap would not be valid in that case. Default: 500. + + + +Output:: + + --non_directional The sequencing library was constructed in a non strand-specific manner, alignments to all four + bisulfite strands will be reported. Default: OFF. + + (The current Illumina protocol for BS-Seq is directional, in which case the strands complementary + to the original strands are merely theoretical and should not exist in reality. Specifying directional + alignments (which is the default) will only run 2 alignment threads to the original top (OT) + or bottom (OB) strands in parallel and report these alignments. This is the recommended option + for sprand-specific libraries). + + --sam-no-hd Suppress SAM header lines (starting with @). This might be useful when very large input files are + split up into several smaller files to run concurrently and the output files are to be merged. + + --quiet Print nothing besides alignments. + + --vanilla Performs bisulfite mapping with Bowtie 1 and prints the 'old' output (as in Bismark 0.5.X) instead + of SAM format output. + + -un/--unmapped Write all reads that could not be aligned to a file in the output directory. Written reads will + appear as they did in the input, without any translation of quality values that may have + taken place within Bowtie or Bismark. Paired-end reads will be written to two parallel files with _1 + and _2 inserted in their filenames, i.e. _unmapped_reads_1.txt and unmapped_reads_2.txt. Reads + with more than one valid alignment with the same number of lowest mismatches (ambiguous mapping) + are also written to _unmapped_reads.txt unless the option --ambiguous is specified as well. + + --ambiguous Write all reads which produce more than one valid alignment with the same number of lowest + mismatches or other reads that fail to align uniquely to a file in the output directory. + Written reads will appear as they did in the input, without any of the translation of quality + values that may have taken place within Bowtie or Bismark. Paired-end reads will be written to two + parallel files with _1 and _2 inserted in theit filenames, i.e. _ambiguous_reads_1.txt and + _ambiguous_reads_2.txt. These reads are not written to the file specified with --un. + + -o/--output_dir DIR Write all output files into this directory. By default the output files will be written into + the same folder as the input file(s). If the specified folder does not exist, Bismark will attempt + to create it first. The path to the output folder can be either relative or absolute. + + --temp_dir DIR Write temporary files to this directory instead of into the same directory as the input files. If + the specified folder does not exist, Bismark will attempt to create it first. The path to the + temporary folder can be either relative or absolute. + +------ + +Bowtie 2 alignment options:: + + -N INT Sets the number of mismatches to allowed in a seed alignment during multiseed alignment. + Can be set to 0 or 1. Setting this higher makes alignment slower (often much slower) + but increases sensitivity. Default: 0. This option is only available for Bowtie 2 (for + Bowtie 1 see -n). + + -L INT Sets the length of the seed substrings to align during multiseed alignment. Smaller values + make alignment slower but more senstive. Default: the --sensitive preset of Bowtie 2 is + used by default, which sets -L to 20. This option is only available for Bowtie 2 (for + Bowtie 1 see -l). + + --ignore-quals When calculating a mismatch penalty, always consider the quality value at the mismatched + position to be the highest possible, regardless of the actual value. I.e. input is treated + as though all quality values are high. This is also the default behavior when the input + doesn't specify quality values (e.g. in -f mode). This option is invariable and on by default. + + +Bowtie 2 paired-end options:: + + --no-mixed This option disables Bowtie 2's behavior to try to find alignments for the individual mates if + it cannot find a concordant or discordant alignment for a pair. This option is invariable and + and on by default. + + --no-discordant Normally, Bowtie 2 looks for discordant alignments if it cannot find any concordant alignments. + A discordant alignment is an alignment where both mates align uniquely, but that does not + satisfy the paired-end constraints (--fr/--rf/--ff, -I, -X). This option disables that behavior + and it is on by default. + + +Bowtie 2 effort options:: + + -D INT Up to INT consecutive seed extension attempts can "fail" before Bowtie 2 moves on, using + the alignments found so far. A seed extension "fails" if it does not yield a new best or a + new second-best alignment. Default: 15. + + -R INT INT is the maximum number of times Bowtie 2 will "re-seed" reads with repetitive seeds. + When "re-seeding," Bowtie 2 simply chooses a new set of reads (same length, same number of + mismatches allowed) at different offsets and searches for more alignments. A read is considered + to have repetitive seeds if the total number of seed hits divided by the number of seeds + that aligned at least once is greater than 300. Default: 2. + + +Bowtie 2 Scoring options:: + + --score_min "func" Sets a function governing the minimum alignment score needed for an alignment to be considered + "valid" (i.e. good enough to report). This is a function of read length. For instance, specifying + L,0,-0.2 sets the minimum-score function f to f(x) = 0 + -0.2 * x, where x is the read length. + See also: setting function options at http://bowtie-bio.sourceforge.net/bowtie2. The default is + L,0,-0.2. + + +Bowtie 2 Reporting options:: + + --most_valid_alignments INT This used to be the Bowtie 2 parameter -M. As of Bowtie 2 version 2.0.0 beta7 the option -M is + deprecated. It will be removed in subsequent versions. What used to be called -M mode is still the + default mode, but adjusting the -M setting is deprecated. Use the -D and -R options to adjust the + effort expended to find valid alignments. + + For reference, this used to be the old (now deprecated) description of -M: + Bowtie 2 searches for at most INT+1 distinct, valid alignments for each read. The search terminates when it + can't find more distinct valid alignments, or when it finds INT+1 distinct alignments, whichever + happens first. Only the best alignment is reported. Information from the other alignments is used to + estimate mapping quality and to set SAM optional fields, such as AS:i and XS:i. Increasing -M makes + Bowtie 2 slower, but increases the likelihood that it will pick the correct alignment for a read that + aligns many places. For reads that have more than INT+1 distinct, valid alignments, Bowtie 2 does not + guarantee that the alignment reported is the best possible in terms of alignment score. -M is + always used and its default value is set to 10. + + + diff --git a/lib/tool_shed/test/test_data/repos/bismark/0/bismark_genome_preparation b/lib/tool_shed/test/test_data/repos/bismark/0/bismark_genome_preparation new file mode 100755 index 000000000000..1895a296632c --- /dev/null +++ b/lib/tool_shed/test/test_data/repos/bismark/0/bismark_genome_preparation @@ -0,0 +1,492 @@ +#!/usr/bin/perl -- +use strict; +use warnings; +use Cwd; +use File::Path qw(rmtree); +$|++; + + +## This program is Copyright (C) 2010-12, Felix Krueger (felix.krueger@bbsrc.ac.uk) + +## This program is free software: you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation, either version 3 of the License, or +## (at your option) any later version. + +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. + +## You should have received a copy of the GNU General Public License +## along with this program. If not, see . + +use Getopt::Long; +use Cwd; + +my $verbose; +my $help; +my $version; +my $man; +my $path_to_bowtie; +my $multi_fasta; +my $single_fasta; +my $bowtie2; + +my $bismark_version = 'v0.7.7'; + +GetOptions ('verbose' => \$verbose, + 'help' => \$help, + 'man' => \$man, + 'version' => \$version, + 'path_to_bowtie:s' => \$path_to_bowtie, + 'single_fasta' => \$single_fasta, + 'bowtie2' => \$bowtie2, + ); + +my $genome_folder = shift @ARGV; # mandatory +my $CT_dir; +my $GA_dir; + +if ($help or $man){ + print_helpfile(); + exit; +} + +if ($version){ + print << "VERSION"; + + Bismark - Bisulfite Mapper and Methylation Caller. + + Bismark Genome Preparation Version: $bismark_version + Copyright 2010-12 Felix Krueger, Babraham Bioinformatics + www.bioinformatics.babraham.ac.uk/projects/ + +VERSION + exit; +} + +if ($single_fasta){ + print "Writing individual genomes out into single-entry fasta files (one per chromosome)\n\n"; + $multi_fasta = 0; +} +else{ + print "Writing bisulfite genomes out into a single MFA (multi FastA) file\n\n"; + $single_fasta = 0; + $multi_fasta = 1; +} + +my @filenames = create_bisulfite_genome_folders(); + +process_sequence_files (); + +launch_bowtie_indexer(); + +sub launch_bowtie_indexer{ + if ($bowtie2){ + print "Bismark Genome Preparation - Step III: Launching the Bowtie 2 indexer\n"; + } + else{ + print "Bismark Genome Preparation - Step III: Launching the Bowtie (1) indexer\n"; + } + print "Please be aware that this process can - depending on genome size - take up to several hours!\n"; + sleep(5); + + ### if the path to bowtie was specfified explicitely + if ($path_to_bowtie){ + if ($bowtie2){ + $path_to_bowtie =~ s/$/bowtie2-build/; + } + else{ + $path_to_bowtie =~ s/$/bowtie-build/; + } + } + ### otherwise we assume that bowtie-build is in the path + else{ + if ($bowtie2){ + $path_to_bowtie = 'bowtie2-build'; + } + else{ + $path_to_bowtie = 'bowtie-build'; + } + } + + $verbose and print "\n"; + + ### Forking the program to run 2 instances of Bowtie-build or Bowtie2-build (= the Bowtie (1/2) indexer) + my $pid = fork(); + + # parent process + if ($pid){ + sleep(1); + chdir $CT_dir or die "Unable to change directory: $!\n"; + $verbose and warn "Preparing indexing of CT converted genome in $CT_dir\n"; + my @fasta_files = <*.fa>; + my $file_list = join (',',@fasta_files); + $verbose and print "Parent process: Starting to index C->T converted genome with the following command:\n\n"; + $verbose and print "$path_to_bowtie -f $file_list BS_CT\n\n"; + + sleep (11); + exec ("$path_to_bowtie","-f","$file_list","BS_CT"); + } + + # child process + elsif ($pid == 0){ + sleep(2); + chdir $GA_dir or die "Unable to change directory: $!\n"; + $verbose and warn "Preparing indexing of GA converted genome in $GA_dir\n"; + my @fasta_files = <*.fa>; + my $file_list = join (',',@fasta_files); + $verbose and print "Child process: Starting to index G->A converted genome with the following command:\n\n"; + $verbose and print "$path_to_bowtie -f $file_list BS_GA\n\n"; + $verbose and print "(starting in 10 seconds)\n"; + sleep(10); + exec ("$path_to_bowtie","-f","$file_list","BS_GA"); + } + + # if the platform doesn't support the fork command we will run the indexing processes one after the other + else{ + print "Forking process was not successful, therefore performing the indexing sequentially instead\n"; + sleep(10); + + ### moving to CT genome folder + $verbose and warn "Preparing to index CT converted genome in $CT_dir\n"; + chdir $CT_dir or die "Unable to change directory: $!\n"; + my @fasta_files = <*.fa>; + my $file_list = join (',',@fasta_files); + $verbose and print "$file_list\n\n"; + sleep(2); + system ("$path_to_bowtie","-f","$file_list","BS_CT"); + @fasta_files=(); + $file_list= ''; + + ### moving to GA genome folder + $verbose and warn "Preparing to index GA converted genome in $GA_dir\n"; + chdir $GA_dir or die "Unable to change directory: $!\n"; + @fasta_files = <*.fa>; + $file_list = join (',',@fasta_files); + $verbose and print "$file_list\n\n"; + sleep(2); + exec ("$path_to_bowtie","-f","$file_list","BS_GA"); + } +} + + +sub process_sequence_files { + + my ($total_CT_conversions,$total_GA_conversions) = (0,0); + $verbose and print "Bismark Genome Preparation - Step II: Bisulfite converting reference genome\n\n"; + sleep (3); + + $verbose and print "conversions performed:\n"; + $verbose and print join("\t",'chromosome','C->T','G->A'),"\n"; + + + ### If someone wants to index a genome which consists of thousands of contig and scaffold files we need to write the genome conversions into an MFA file + ### Otherwise the list of comma separated chromosomes we provide for bowtie-build will get too long for the kernel to handle + ### This is now the default option + + if ($multi_fasta){ + ### Here we just use one multi FastA file name, append .CT_conversion or .GA_conversion and print all sequence conversions into these files + my $bisulfite_CT_conversion_filename = "$CT_dir/genome_mfa.CT_conversion.fa"; + open (CT_CONVERT,'>',$bisulfite_CT_conversion_filename) or die "Can't write to file $bisulfite_CT_conversion_filename: $!\n"; + + my $bisulfite_GA_conversion_filename = "$GA_dir/genome_mfa.GA_conversion.fa"; + open (GA_CONVERT,'>',$bisulfite_GA_conversion_filename) or die "Can't write to file $bisulfite_GA_conversion_filename: $!\n"; + } + + foreach my $filename(@filenames){ + my ($chromosome_CT_conversions,$chromosome_GA_conversions) = (0,0); + open (IN,$filename) or die "Failed to read from sequence file $filename $!\n"; + # warn "Reading chromosome information from $filename\n\n"; + + ### first line needs to be a fastA header + my $first_line = ; + chomp $first_line; + + ### Extracting chromosome name from the FastA header + my $chromosome_name = extract_chromosome_name($first_line); + + ### alternatively, chromosomes can be written out into single-entry FastA files. This will only work for genomes with up to a few hundred chromosomes. + unless ($multi_fasta){ + my $bisulfite_CT_conversion_filename = "$CT_dir/$chromosome_name"; + $bisulfite_CT_conversion_filename =~ s/$/.CT_conversion.fa/; + open (CT_CONVERT,'>',$bisulfite_CT_conversion_filename) or die "Can't write to file $bisulfite_CT_conversion_filename: $!\n"; + + my $bisulfite_GA_conversion_filename = "$GA_dir/$chromosome_name"; + $bisulfite_GA_conversion_filename =~ s/$/.GA_conversion.fa/; + open (GA_CONVERT,'>',$bisulfite_GA_conversion_filename) or die "Can't write to file $bisulfite_GA_conversion_filename: $!\n"; + } + + print CT_CONVERT ">",$chromosome_name,"_CT_converted\n"; # first entry + print GA_CONVERT ">",$chromosome_name,"_GA_converted\n"; # first entry + + + while (){ + + ### in case the line is a new fastA header + if ($_ =~ /^>/){ + ### printing out the stats for the previous chromosome + $verbose and print join ("\t",$chromosome_name,$chromosome_CT_conversions,$chromosome_GA_conversions),"\n"; + ### resetting the chromosome transliteration counters + ($chromosome_CT_conversions,$chromosome_GA_conversions) = (0,0); + + ### Extracting chromosome name from the additional FastA header + $chromosome_name = extract_chromosome_name($_); + + ### alternatively, chromosomes can be written out into single-entry FastA files. This will only work for genomes with up to a few hundred chromosomes. + unless ($multi_fasta){ + my $bisulfite_CT_conversion_filename = "$CT_dir/$chromosome_name"; + $bisulfite_CT_conversion_filename =~ s/$/.CT_conversion.fa/; + open (CT_CONVERT,'>',$bisulfite_CT_conversion_filename) or die "Can't write to file $bisulfite_CT_conversion_filename: $!\n"; + + my $bisulfite_GA_conversion_filename = "$GA_dir/$chromosome_name"; + $bisulfite_GA_conversion_filename =~ s/$/.GA_conversion.fa/; + open (GA_CONVERT,'>',$bisulfite_GA_conversion_filename) or die "Can't write to file $bisulfite_GA_conversion_filename: $!\n"; + } + + print CT_CONVERT ">",$chromosome_name,"_CT_converted\n"; + print GA_CONVERT ">",$chromosome_name,"_GA_converted\n"; + } + + else{ + my $sequence = uc$_; + + ### (I) First replacing all ambiguous sequence characters (such as M,S,R....) by N (G,A,T,C,N and the line endings \r and \n are added to a character group) + + $sequence =~ s/[^ATCGN\n\r]/N/g; + + ### (II) Writing the chromosome out into a C->T converted version (equals forward strand conversion) + + my $CT_sequence = $sequence; + my $CT_transliterations_performed = ($CT_sequence =~ tr/C/T/); # converts all Cs into Ts + $total_CT_conversions += $CT_transliterations_performed; + $chromosome_CT_conversions += $CT_transliterations_performed; + + print CT_CONVERT $CT_sequence; + + ### (III) Writing the chromosome out in a G->A converted version of the forward strand (this is equivalent to reverse- + ### complementing the forward strand and then C->T converting it) + + my $GA_sequence = $sequence; + my $GA_transliterations_performed = ($GA_sequence =~ tr/G/A/); # converts all Gs to As on the forward strand + $total_GA_conversions += $GA_transliterations_performed; + $chromosome_GA_conversions += $GA_transliterations_performed; + + print GA_CONVERT $GA_sequence; + + } + } + $verbose and print join ("\t",$chromosome_name,$chromosome_CT_conversions,$chromosome_GA_conversions),"\n"; + } + close (CT_CONVERT) or die "Failed to close filehandle: $!\n"; + close (GA_CONVERT) or die "Failed to close filehandle: $!\n"; + + + print "\nTotal number of conversions performed:\n"; + print "C->T:\t$total_CT_conversions\n"; + print "G->A:\t$total_GA_conversions\n"; + + warn "\nStep II - Genome bisulfite conversions - completed\n\n\n"; +} + +sub extract_chromosome_name { + + my $header = shift; + + ## Bowtie extracts the first string after the initial > in the FASTA file, so we are doing this as well + + if ($header =~ s/^>//){ + my ($chromosome_name) = split (/\s+/,$header); + return $chromosome_name; + } + else{ + die "The specified chromosome file doesn't seem to be in FASTA format as required! $!\n"; + } +} + +sub create_bisulfite_genome_folders{ + + $verbose and print "Bismark Genome Preparation - Step I: Preparing folders\n\n"; + + # Ensuring a genome folder has been specified + if ($genome_folder){ + unless ($genome_folder =~ /\/$/){ + $genome_folder =~ s/$/\//; + } + $verbose and print "Path to genome folder specified: $genome_folder\n"; + chdir $genome_folder or die "Could't move to directory $genome_folder. Make sure the directory exists! $!"; + + # making the genome folder path abolsolute so it won't break if the path was specified relative + $genome_folder = getcwd; + unless ($genome_folder =~ /\/$/){ + $genome_folder =~ s/$/\//; + } + } + + else{ + $verbose and print "Genome folder was not provided as argument "; + while (1){ + print "Please specify a genome folder to be bisulfite converted:\n"; + $genome_folder = ; + chomp $genome_folder; + + # adding a trailing slash unless already present + unless ($genome_folder =~ /\/$/){ + $genome_folder =~ s/$/\//; + } + if (chdir $genome_folder){ + last; + } + else{ + warn "Could't move to directory $genome_folder! $!"; + } + } + } + + if ($path_to_bowtie){ + unless ($path_to_bowtie =~ /\/$/){ + $path_to_bowtie =~ s/$/\//; + } + if (chdir $path_to_bowtie){ + if ($bowtie2){ + $verbose and print "Path to Bowtie 2 specified: $path_to_bowtie\n"; + } + else{ + $verbose and print "Path to Bowtie (1) specified: $path_to_bowtie\n"; + } + } + else{ + die "There was an error with the path to bowtie: $!\n"; + } + } + + chdir $genome_folder or die "Could't move to directory $genome_folder. Make sure the directory exists! $!"; + + + # Exiting unless there are fastA files in the folder + my @filenames = <*.fa>; + + ### if there aren't any genomic files with the extension .fa we will look for files with the extension .fasta + unless (@filenames){ + @filenames = <*.fasta>; + } + + unless (@filenames){ + die "The specified genome folder $genome_folder does not contain any sequence files in FastA format (with .fa or .fasta file extensions\n"; + } + + warn "Bisulfite Genome Indexer version $bismark_version (last modified 17 Nov 2011)\n\n"; + sleep (3); + + # creating a directory inside the genome folder to store the bisfulfite genomes unless it already exists + my $bisulfite_dir = "${genome_folder}Bisulfite_Genome/"; + unless (-d $bisulfite_dir){ + mkdir $bisulfite_dir or die "Unable to create directory $bisulfite_dir $!\n"; + $verbose and print "Created Bisulfite Genome folder $bisulfite_dir\n"; + } + else{ + while (1){ + print "\nA directory called $bisulfite_dir already exists. Bisulfite converted sequences and/or already existing Bowtie (1 or 2) indexes might be overwritten!\nDo you want to continue anyway?\t"; + my $proceed = ; + chomp $proceed; + if ($proceed =~ /^y/i ){ + last; + } + elsif ($proceed =~ /^n/i){ + die "Terminated by user\n\n"; + } + } + } + + ### as of version 0.6.0 the Bismark indexer will no longer delete the Bisulfite_Genome directory if it was present already, since it could store the Bowtie 1 or 2 indexes already + # removing any existing files and subfolders in the bisulfite directory (the specified directory won't be deleted) + # rmtree($bisulfite_dir, {verbose => 1,keep_root => 1}); + # unless (-d $bisulfite_dir){ # had to add this after changing remove_tree to rmtree // suggested by Samantha Cooper @ Illumina + # mkdir $bisulfite_dir or die "Unable to create directory $bisulfite_dir $!\n"; + # } + # } + + chdir $bisulfite_dir or die "Unable to move to $bisulfite_dir\n"; + $CT_dir = "${bisulfite_dir}CT_conversion/"; + $GA_dir = "${bisulfite_dir}GA_conversion/"; + + # creating 2 subdirectories to store a C->T (forward strand conversion) and a G->A (reverse strand conversion) + # converted version of the genome + unless (-d $CT_dir){ + mkdir $CT_dir or die "Unable to create directory $CT_dir $!\n"; + $verbose and print "Created Bisulfite Genome folder $CT_dir\n"; + } + unless (-d $GA_dir){ + mkdir $GA_dir or die "Unable to create directory $GA_dir $!\n"; + $verbose and print "Created Bisulfite Genome folder $GA_dir\n"; + } + + # moving back to the original genome folder + chdir $genome_folder or die "Could't move to directory $genome_folder $!"; + # $verbose and print "Moved back to genome folder folder $genome_folder\n"; + warn "\nStep I - Prepare genome folders - completed\n\n\n"; + return @filenames; +} + +sub print_helpfile{ + print << 'HOW_TO'; + + +DESCRIPTION + +This script is supposed to convert a specified reference genome into two different bisulfite +converted versions and index them for alignments with Bowtie 1 (default), or Bowtie 2. The first +bisulfite genome will have all Cs converted to Ts (C->T), and the other one will have all Gs +converted to As (G->A). Both bisulfite genomes will be stored in subfolders within the reference +genome folder. Once the bisulfite conversion has been completed the program will fork and launch +two simultaneous instances of the bowtie 1 or 2 indexer (bowtie-build or bowtie2-build). Be aware +that the indexing process can take up to several hours; this will mainly depend on genome size +and system resources. + + + + +The following is a brief description of command line options and arguments to control the +Bismark Genome Preparation script: + + +USAGE: bismark_genome_preparation [options] + + +OPTIONS: + +--help/--man Displays this help filea and exits. + +--version Displays version information and exits. + +--verbose Print verbose output for more details or debugging. + +--path_to_bowtie The full path to the Bowtie 1 or Bowtie 2 installation on your system.If + the path is not provided as an option you will be prompted for it. + +--bowtie2 This will create bisulfite indexes for Bowtie 2. (Default: Bowtie 1). + +--single_fasta Instruct the Bismark Indexer to write the converted genomes into + single-entry FastA files instead of making one multi-FastA file (MFA) + per chromosome. This might be useful if individual bisulfite converted + chromosomes are needed (e.g. for debugging), however it can cause a + problem with indexing if the number of chromosomes is vast (this is likely + to be in the range of several thousand files; the operating system can + only handle lists up to a certain length, and some newly assembled + genomes may contain 20000-50000 contigs of scaffold files which do exceed + this list length limit). + + +ARGUMENTS: + + The path to the folder containing the genome to be bisulfite converted. + At the current time Bismark Genome Preparation expects one or more fastA + files in the folder (with the file extension: .fa or .fasta). If the path + is not provided as an argument you will be prompted for it. + + + +This script was last modified on 18 Nov 2011. +HOW_TO +} diff --git a/lib/tool_shed/test/test_data/repos/bismark/0/bismark_methylation_extractor.py b/lib/tool_shed/test/test_data/repos/bismark/0/bismark_methylation_extractor.py new file mode 100644 index 000000000000..cb79d1ecf590 --- /dev/null +++ b/lib/tool_shed/test/test_data/repos/bismark/0/bismark_methylation_extractor.py @@ -0,0 +1,186 @@ +#!/usr/bin/env python + +import argparse, os, shutil, subprocess, sys, tempfile, fileinput +import zipfile +from glob import glob + +def stop_err( msg ): + sys.stderr.write( "%s\n" % msg ) + sys.exit() + +def zipper(dir, zip_file): + zip = zipfile.ZipFile(zip_file, 'w', compression=zipfile.ZIP_DEFLATED) + root_len = len(os.path.abspath(dir)) + for root, dirs, files in os.walk(dir): + archive_root = os.path.abspath(root)[root_len:] + for f in files: + fullpath = os.path.join(root, f) + archive_name = os.path.join(archive_root, f) + zip.write(fullpath, archive_name, zipfile.ZIP_DEFLATED) + zip.close() + return zip_file + +def __main__(): + #Parse Command Line + parser = argparse.ArgumentParser(description='Wrapper for the bismark methylation caller.') + + # input options + parser.add_argument( '--infile', help='Input file in SAM format.' ) + parser.add_argument( '--single-end', dest='single_end', action="store_true" ) + parser.add_argument( '--paired-end', dest='paired_end', action="store_true" ) + + parser.add_argument( '--report-file', dest='report_file' ) + parser.add_argument( '--comprehensive', action="store_true" ) + parser.add_argument( '--merge-non-cpg', dest='merge_non_cpg', action="store_true" ) + parser.add_argument( '--no-overlap', dest='no_overlap', action="store_true" ) + parser.add_argument( '--compress' ) + parser.add_argument( '--ignore-bps', dest='ignore_bps', type=int ) + + # OT - original top strand + parser.add_argument( '--cpg_ot' ) + parser.add_argument( '--chg_ot' ) + parser.add_argument( '--chh_ot' ) + # CTOT - complementary to original top strand + parser.add_argument( '--cpg_ctot' ) + parser.add_argument( '--chg_ctot' ) + parser.add_argument( '--chh_ctot' ) + # OB - original bottom strand + parser.add_argument( '--cpg_ob' ) + parser.add_argument( '--chg_ob' ) + parser.add_argument( '--chh_ob' ) + # CTOT - complementary to original bottom strand + parser.add_argument( '--cpg_ctob' ) + parser.add_argument( '--chg_ctob' ) + parser.add_argument( '--chh_ctob' ) + + parser.add_argument( '--cpg_context' ) + parser.add_argument( '--chg_context' ) + parser.add_argument( '--chh_context' ) + + parser.add_argument( '--non_cpg_context' ) + + parser.add_argument( '--non_cpg_context_ot' ) + parser.add_argument( '--non_cpg_context_ctot' ) + parser.add_argument( '--non_cpg_context_ob' ) + parser.add_argument( '--non_cpg_context_ctob' ) + + args = parser.parse_args() + + + # Build methylation extractor command + output_dir = tempfile.mkdtemp() + cmd = 'bismark_methylation_extractor --no_header -o %s %s %s' + + additional_opts = '' + # Set up all options + if args.single_end: + additional_opts += ' --single-end ' + else: + additional_opts += ' --paired-end ' + if args.no_overlap: + additional_opts += ' --no_overlap ' + if args.ignore_bps: + additional_opts += ' --ignore %s ' % args.ignore_bps + if args.comprehensive: + additional_opts += ' --comprehensive ' + if args.merge_non_cpg: + additional_opts += ' --merge_non_CpG ' + if args.report_file: + additional_opts += ' --report ' + + + # Final command: + cmd = cmd % (output_dir, additional_opts, args.infile) + + # Run + try: + tmp_out = tempfile.NamedTemporaryFile().name + tmp_stdout = open( tmp_out, 'wb' ) + tmp_err = tempfile.NamedTemporaryFile().name + tmp_stderr = open( tmp_err, 'wb' ) + proc = subprocess.Popen( args=cmd, shell=True, cwd=".", stdout=tmp_stdout, stderr=tmp_stderr ) + returncode = proc.wait() + tmp_stderr.close() + # get stderr, allowing for case where it's very large + tmp_stderr = open( tmp_err, 'rb' ) + stderr = '' + buffsize = 1048576 + try: + while True: + stderr += tmp_stderr.read( buffsize ) + if not stderr or len( stderr ) % buffsize != 0: + break + except OverflowError: + pass + tmp_stdout.close() + tmp_stderr.close() + if returncode != 0: + raise Exception, stderr + + # TODO: look for errors in program output. + except Exception, e: + stop_err( 'Error in bismark methylation extractor:\n' + str( e ) ) + + + # collect and copy output files + + if args.compress: + zipper(output_dir, args.compress) + + + if args.cpg_ot: + shutil.move( glob(os.path.join( output_dir, '*CpG_OT_*'))[0], args.cpg_ot ) + if args.chg_ot: + shutil.move( glob(os.path.join( output_dir, '*CHG_OT_*'))[0], args.chg_ot ) + if args.chh_ot: + shutil.move( glob(os.path.join( output_dir, '*CHH_OT_*'))[0], args.chh_ot ) + if args.cpg_ctot: + shutil.move( glob(os.path.join( output_dir, '*CpG_CTOT_*'))[0], args.cpg_ctot ) + if args.chg_ctot: + shutil.move( glob(os.path.join( output_dir, '*CHG_CTOT_*'))[0], args.chg_ctot ) + if args.chh_ctot: + shutil.move( glob(os.path.join( output_dir, '*CHH_CTOT_*'))[0], args.chh_ctot ) + if args.cpg_ob: + shutil.move( glob(os.path.join( output_dir, '*CpG_OB_*'))[0], args.cpg_ob ) + if args.chg_ob: + shutil.move( glob(os.path.join( output_dir, '*CHG_OB_*'))[0], args.chg_ob ) + if args.chh_ob: + shutil.move( glob(os.path.join( output_dir, '*CHH_OB_*'))[0], args.chh_ob ) + if args.cpg_ctob: + shutil.move( glob(os.path.join( output_dir, '*CpG_CTOB_*'))[0], args.cpg_ctob ) + if args.chg_ctob: + shutil.move( glob(os.path.join( output_dir, '*CHG_CTOB_*'))[0], args.chg_ctob ) + if args.chh_ctob: + shutil.move( glob(os.path.join( output_dir, '*CHH_CTOB_*'))[0], args.chh_ctob ) + + # context-dependent methylation output files + if args.cpg_context: + shutil.move( glob(os.path.join( output_dir, '*CpG_context_*'))[0], args.cpg_context ) + if args.chg_context: + shutil.move( glob(os.path.join( output_dir, '*CHG_context_*'))[0], args.chg_context ) + if args.chh_context: + shutil.move( glob(os.path.join( output_dir, '*CHH_context_*'))[0], args.chh_context ) + + if args.non_cpg_context: + shutil.move( glob(os.path.join( output_dir, '*Non_CpG_context_*'))[0], args.non_cpg_context ) + + if args.non_cpg_context_ot: + shutil.move( glob(os.path.join( output_dir, '*Non_CpG_OT_*'))[0], args.non_cpg_context_ot ) + if args.non_cpg_context_ctot: + shutil.move( glob(os.path.join( output_dir, '*Non_CpG_CTOT_*'))[0], args.non_cpg_context_ctot ) + if args.non_cpg_context_ob: + shutil.move( glob(os.path.join( output_dir, '*Non_CpG_OB_*'))[0], args.non_cpg_context_ob ) + if args.non_cpg_context_ctob: + shutil.move( glob(os.path.join( output_dir, '*Non_CpG_CTOB_*'))[0], args.non_cpg_context_ctob ) + + + + if args.report_file: + shutil.move( glob(os.path.join( output_dir, '*_splitting_report*'))[0], args.report_file ) + + + # Clean up temp dirs + if os.path.exists( output_dir ): + shutil.rmtree( output_dir ) + +if __name__=="__main__": __main__() diff --git a/lib/tool_shed/test/test_data/repos/bismark/0/bismark_methylation_extractor.xml b/lib/tool_shed/test/test_data/repos/bismark/0/bismark_methylation_extractor.xml new file mode 100644 index 000000000000..141ec5805d7f --- /dev/null +++ b/lib/tool_shed/test/test_data/repos/bismark/0/bismark_methylation_extractor.xml @@ -0,0 +1,306 @@ + + + methylation extractor + + + SCRIPT_PATH + bowtie + bowtie2 + + + + bismark_methylation_extractor.py + + --infile $input + + --bismark_path \$SCRIPT_PATH + + #if $singlePaired.sPaired == "single": + --single-end + #else: + --paired-end + $no_overlap + #end if + + #if str($ignore_bps) != "0": + --ignore $ignore_bps + #end if + + #if $report: + --report-file $o_report + #end if + + #if $comprehensive: + --comprehensive + #end if + + #if $merge_non_cpg: + --merge-non-cpg + #end if + + #if $compress: + --compress $compressed_output + #else: + #if $comprehensive == False and $merge_non_cpg == False: + ##twelfe files + --cpg_ot $cpg_ot + --chg_ot $chg_ot + --chh_ot $chh_ot + --cpg_ctot $cpg_ctot + --chg_ctot $chg_ctot + --chh_ctot $chh_ctot + --cpg_ob $cpg_ob + --chg_ob $chg_ob + --chh_ob $chh_ob + --cpg_ctob $cpg_ctob + --chg_ctob $chg_ctob + --chh_ctob $chh_ctob + #elif $merge_non_cpg and $comprehensive: + ## two files + --non_cpg_context $non_cpg_context + --cpg_context $cpg_context + #elif $comprehensive: + ## three files + --cpg_context $cpg_context + --chg_context $chg_context + --chh_context $chh_context + #elif $merge_non_cpg: + ## eight files + --non_cpg_context_ctot $non_cpg_context_ctot + --non_cpg_context_ot $non_cpg_context_ot + --non_cpg_context_ob $non_cpg_context_ob + --non_cpg_context_ctob $non_cpg_context_ctob + --cpg_ot $cpg_ot + --cpg_ctot $cpg_ctot + --cpg_ob $cpg_ob + --cpg_ctob $cpg_ctob + #end if + ## end compress + #end if + + + + + + + + + + + + + + + + + + + + + + + + + + + ( report is True ) + + + + + ( compress == False and comprehensive == False and merge_non_CpG == False) + + + ( compress == False and comprehensive == False and merge_non_CpG == False) + + + ( compress == False and comprehensive == False and merge_non_CpG == False) + + + ( compress == False and comprehensive == False and merge_non_CpG == False) + + + ( compress == False and comprehensive == False and merge_non_CpG == False) + + + ( compress == False and comprehensive == False and merge_non_CpG == False) + + + + ( compress == False and comprehensive == False and merge_non_CpG == False) + + + ( compress == False and comprehensive == False and merge_non_CpG == False) + + + ( compress == False and comprehensive == False and merge_non_CpG == False) + + + ( compress == False and comprehensive == False and merge_non_CpG == False) + + + ( compress == False and comprehensive == False and merge_non_CpG == False) + + + ( compress == False and comprehensive == False and merge_non_CpG == False) + + + + + ( compress == False and comprehensive) + + + ( compress == False and comprehensive and merge_non_CpG == False) + + + ( compress == False and comprehensive and merge_non_CpG == False) + + + + ( compress == False and comprehensive and merge_non_cpg) + + + + ( compress == False and comprehensive == False and merge_non_cpg) + + + ( compress == False and comprehensive == False and merge_non_cpg) + + + ( compress == False and comprehensive == False and merge_non_cpg) + + + ( compress == False and comprehensive == False and merge_non_cpg) + + + + ( compress ) + + + + + + + + +**What it does** + +The following is a brief description of all options to control the Bismark_ +methylation extractor. The script reads in a bisulfite read alignment results file +produced by the Bismark bisulfite mapper and extracts the methylation information +for individual cytosines. This information is found in the methylation call field +which can contain the following characters: + + + - X = for methylated C in CHG context (was protected) + - x = for not methylated C CHG (was converted) + - H = for methylated C in CHH context (was protected) + - h = for not methylated C in CHH context (was converted) + - Z = for methylated C in CpG context (was protected) + - z = for not methylated C in CpG context (was converted) + - . = for any bases not involving cytosines + + +The methylation extractor outputs result files for cytosines in CpG, CHG and CHH +context (this distinction is actually already made in Bismark itself). As the methylation +information for every C analysed can produce files which easily have tens or even hundreds of +millions of lines, file sizes can become very large and more difficult to handle. The C +methylation info additionally splits cytosine methylation calls up into one of the four possible +strands a given bisulfite read aligned against: + + - OT = original top strand + - CTOT = complementary to original top strand + + - OB = original bottom strand + - CTOB = complementary to original bottom strand + +Thus, by default twelve individual output files are being generated per input file (unless +--comprehensive is specified, see below). The output files can be imported into a genome +viewer, such as SeqMonk, and re-combined into a single data group if desired (in fact +unless the bisulfite reads were generated preserving directionality it doesn't make any +sense to look at the data in a strand-specific manner). Strand-specific output files can +optionally be skipped, in which case only three output files for CpG, CHG or CHH context +will be generated. For both the strand-specific and comprehensive outputs there is also +the option to merge both non-CpG contexts (CHG and CHH) into one single non-CpG context. + + +.. _Bismark: http://www.bioinformatics.babraham.ac.uk/projects/bismark/ + + +It is developed by Krueger F and Andrews SR. at the Babraham Institute. Krueger F, Andrews SR. (2011) Bismark: a flexible aligner and methylation caller for Bisulfite-Seq applications. Bioinformatics, 27, 1571-2. + +------- + +**Bismark settings** + +All of the options have a default value. You can change any of them. If any Bismark function is missing please contact the tool author or your Galaxy admin. + +------ + +**Outputs** + +The output files are in the following format (tab delimited):: + + + Column Description + -------- -------------------------------------------------------- + 1 seq-ID + 2 strand + 3 chromosome + 4 position + 5 methylation call + + + * Methylated cytosines receive a '+' orientation, + * Unmethylated cytosines receive a '-' orientation. + +------ + +**OPTIONS** + +Input:: + + -s/--single-end Input file(s) are Bismark result file(s) generated from single-end + read data. Specifying either --single-end or --paired-end is + mandatory. + + -p/--paired-end Input file(s) are Bismark result file(s) generated from paired-end + read data. Specifying either --paired-end or --single-end is + mandatory. + + --no_overlap For paired-end reads it is theoretically possible that read_1 and + read_2 overlap. This option avoids scoring overlapping methylation + calls twice. Whilst this removes a bias towards more methylation calls + towards the center of sequenced fragments it can de facto remove + a good proportion of the data. + + --ignore INT Ignore the first INT bp at the 5' end of each read when processing the + methylation call string. This can remove e.g. a restriction enzyme site + at the start of each read. + +Output:: + + --comprehensive Specifying this option will merge all four possible strand-specific + methylation info into context-dependent output files. The default + contexts are: + - CpG context + - CHG context + - CHH context + + --merge_non_CpG This will produce two output files (in --comprehensive mode) or eight + strand-specific output files (default) for Cs in + - CpG context + - non-CpG context + + --report Prints out a short methylation summary as well as the paramaters used to run + this script. + + + + diff --git a/lib/tool_shed/test/test_data/repos/bismark/0/bismark_wrapper.py b/lib/tool_shed/test/test_data/repos/bismark/0/bismark_wrapper.py new file mode 100644 index 000000000000..606fa428bd77 --- /dev/null +++ b/lib/tool_shed/test/test_data/repos/bismark/0/bismark_wrapper.py @@ -0,0 +1,288 @@ +#!/usr/bin/env python + +import argparse, os, shutil, subprocess, sys, tempfile, fileinput +import fileinput +from glob import glob + +def stop_err( msg ): + sys.stderr.write( "%s\n" % msg ) + sys.exit() + +def __main__(): + #Parse Command Line + parser = argparse.ArgumentParser(description='Wrapper for the bismark bisulfite mapper.') + parser.add_argument( '-p', '--num-threads', dest='num_threads', + type=int, default=4, help='Use this many threads to align reads. The default is 4.' ) + + parser.add_argument( '--bismark_path', dest='bismark_path', help='Path to the bismark perl scripts' ) + + parser.add_argument( '--bowtie2', action='store_true', default=False, help='Running bismark with bowtie2 and not with bowtie.' ) + + # input options + parser.add_argument( '--own-file', dest='own_file', help='' ) + parser.add_argument( '-D', '--indexes-path', dest='index_path', help='Indexes directory; location of .ebwt and .fa files.' ) + parser.add_argument( '-O', '--output', dest='output' ) + parser.add_argument( '--output-report-file', dest='output_report_file' ) + parser.add_argument( '--suppress-header', dest='suppress_header', action="store_true" ) + + parser.add_argument( '--mate-paired', dest='mate_paired', action='store_true', help='Reads are mate-paired', default=False) + + + parser.add_argument( '-1', '--mate1', dest='mate1', + help='The forward reads file in Sanger FASTQ or FASTA format.' ) + parser.add_argument( '-2', '--mate2', dest='mate2', + help='The reverse reads file in Sanger FASTQ or FASTA format.' ) + + parser.add_argument( '--output-unmapped-reads', dest='output_unmapped_reads', + help='Additional output file with unmapped reads (single-end).' ) + parser.add_argument( '--output-unmapped-reads-l', dest='output_unmapped_reads_l', + help='File name for unmapped reads (left, paired-end).' ) + parser.add_argument( '--output-unmapped-reads-r', dest='output_unmapped_reads_r', + help='File name for unmapped reads (right, paired-end).' ) + + + parser.add_argument( '--output-suppressed-reads', dest='output_suppressed_reads', + help='Additional output file with suppressed reads (single-end).' ) + parser.add_argument( '--output-suppressed-reads-l', dest='output_suppressed_reads_l', + help='File name for suppressed reads (left, paired-end).' ) + parser.add_argument( '--output-suppressed-reads-r', dest='output_suppressed_reads_r', + help='File name for suppressed reads (right, paired-end).' ) + + + parser.add_argument( '--single-paired', dest='single_paired', + help='The single-end reads file in Sanger FASTQ or FASTA format.' ) + + parser.add_argument( '--fastq', action='store_true', help='Query filetype is in FASTQ format') + parser.add_argument( '--fasta', action='store_true', help='Query filetype is in FASTA format') + parser.add_argument( '--phred64-quals', dest='phred64', action="store_true" ) + + + parser.add_argument( '--skip-reads', dest='skip_reads', type=int ) + parser.add_argument( '--qupto', type=int) + + + # paired end options + parser.add_argument( '-I', '--minins', dest='min_insert' ) + parser.add_argument( '-X', '--maxins', dest='max_insert' ) + parser.add_argument( '--no-mixed', dest='no_mixed', action="store_true" ) + parser.add_argument( '--no-discordant', dest='no_discordant', action="store_true" ) + + #parse general options + # default 20 + parser.add_argument( '--seed-len', dest='seed_len', type=int) + # default 15 + parser.add_argument( '--seed-extention-attempts', dest='seed_extention_attempts', type=int ) + # default 0 + parser.add_argument( '--seed-mismatches', dest='seed_mismatches', type=int ) + # default 2 + parser.add_argument( '--max-reseed', dest='max_reseed', type=int ) + """ + # default 70 + parser.add_argument( '--maqerr', dest='maqerr', type=int ) + """ + + """ + The number of megabytes of memory a given thread is given to store path + descriptors in --best mode. Best-first search must keep track of many paths + at once to ensure it is always extending the path with the lowest cumulative + cost. Bowtie tries to minimize the memory impact of the descriptors, but + they can still grow very large in some cases. If you receive an error message + saying that chunk memory has been exhausted in --best mode, try adjusting + this parameter up to dedicate more memory to the descriptors. Default: 512. + """ + parser.add_argument( '--chunkmbs', type=int, default=512 ) + + args = parser.parse_args() + + # Create bismark index if necessary. + index_dir = "" + if args.own_file: + """ + Create a temporary index with the offered files from the user. + Utilizing the script: bismark_genome_preparation + bismark_genome_preparation --bowtie2 hg19/ + """ + tmp_index_dir = tempfile.mkdtemp() + index_path = os.path.join( tmp_index_dir, '.'.join( os.path.split( args.own_file )[1].split( '.' )[:-1] ) ) + try: + """ + Create a hard link pointing to args.own_file named 'index_path'.fa. + """ + os.symlink( args.own_file, index_path + '.fa' ) + except Exception, e: + if os.path.exists( tmp_index_dir ): + shutil.rmtree( tmp_index_dir ) + stop_err( 'Error in linking the reference database.\n' + str( e ) ) + # bismark_genome_preparation needs the complete path to the folder in which the database is stored + if args.bowtie2: + cmd_index = 'bismark_genome_preparation --bowtie2 %s ' % ( tmp_index_dir ) + else: + cmd_index = 'bismark_genome_preparation %s ' % ( tmp_index_dir ) + if args.bismark_path: + # add the path to the bismark perl scripts, that is needed for galaxy + cmd_index = '%s/%s' % (args.bismark_path, cmd_index) + try: + tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name + tmp_stderr = open( tmp, 'wb' ) + proc = subprocess.Popen( args=cmd_index, shell=True, cwd=tmp_index_dir, stdout=open(os.devnull, 'wb'), stderr=tmp_stderr.fileno() ) + returncode = proc.wait() + tmp_stderr.close() + # get stderr, allowing for case where it's very large + tmp_stderr = open( tmp, 'rb' ) + stderr = '' + buffsize = 1048576 + try: + while True: + stderr += tmp_stderr.read( buffsize ) + if not stderr or len( stderr ) % buffsize != 0: + break + except OverflowError: + pass + tmp_stderr.close() + if returncode != 0: + raise Exception, stderr + except Exception, e: + if os.path.exists( tmp_index_dir ): + shutil.rmtree( tmp_index_dir ) + stop_err( 'Error indexing reference sequence\n' + str( e ) ) + index_dir = tmp_index_dir + else: + index_dir = args.index_path + + # Build bismark command + tmp_bismark_dir = tempfile.mkdtemp() + output_dir = os.path.join( tmp_bismark_dir, 'results') + cmd = 'bismark %(args)s --temp_dir %(tmp_bismark_dir)s -o %(output_dir)s --quiet %(genome_folder)s %(reads)s' + if args.bismark_path: + # add the path to the bismark perl scripts, that is needed for galaxy + cmd = '%s/%s' % (args.bismark_path, cmd) + + arguments = { + 'genome_folder': index_dir, + 'args': '', + 'tmp_bismark_dir': tmp_bismark_dir, + 'output_dir': output_dir, + } + + additional_opts = '' + # Set up the reads + if args.mate_paired: + # paired-end reads library + reads = '-1 %s ' % ( args.mate1 ) + reads += ' -2 %s ' % ( args.mate2 ) + additional_opts += ' -I %s -X %s ' % (args.min_insert, args.max_insert) + else: + # single paired reads library + reads = ' %s ' % ( args.single_paired ) + + + if not args.bowtie2: + # use bowtie specific options + additional_opts += ' --best ' + if args.seed_mismatches: + # --seedmms + additional_opts += ' -n %s ' % args.seed_mismatches + if args.seed_len: + # --seedlen + additional_opts += ' -l %s ' % args.seed_len + + # alignment options + if args.bowtie2: + additional_opts += ' -p %s --bowtie2 ' % args.num_threads + if args.seed_mismatches: + additional_opts += ' -N %s ' % args.seed_mismatches + if args.seed_len: + additional_opts += ' -L %s ' % args.seed_len + if args.seed_extention_attempts: + additional_opts += ' -D %s ' % args.seed_extention_attempts + if args.max_reseed: + additional_opts += ' -R %s ' % args.max_reseed + if args.no_discordant: + additional_opts += ' --no-discordant ' + if args.no_mixed: + additional_opts += ' --no-mixed ' + """ + if args.maqerr: + additional_opts += ' --maqerr %s ' % args.maqerr + """ + if args.skip_reads: + additional_opts += ' --skip %s ' % args.skip_reads + if args.qupto: + additional_opts += ' --qupto %s ' % args.qupto + if args.phred64: + additional_opts += ' --phred64-quals ' + if args.suppress_header: + additional_opts += ' --sam-no-hd ' + if args.output_unmapped_reads or ( args.output_unmapped_reads_l and args.output_unmapped_reads_r): + additional_opts += ' --un ' + if args.output_suppressed_reads or ( args.output_suppressed_reads_l and args.output_suppressed_reads_r): + additional_opts += ' --ambiguous ' + + arguments.update( {'args': additional_opts, 'reads': reads} ) + + # Final command: + cmd = cmd % arguments + + # Run + try: + tmp_out = tempfile.NamedTemporaryFile().name + tmp_stdout = open( tmp_out, 'wb' ) + tmp_err = tempfile.NamedTemporaryFile().name + tmp_stderr = open( tmp_err, 'wb' ) + proc = subprocess.Popen( args=cmd, shell=True, cwd=".", stdout=tmp_stdout, stderr=tmp_stderr ) + returncode = proc.wait() + tmp_stderr.close() + # get stderr, allowing for case where it's very large + tmp_stderr = open( tmp_err, 'rb' ) + stderr = '' + buffsize = 1048576 + try: + while True: + stderr += tmp_stderr.read( buffsize ) + if not stderr or len( stderr ) % buffsize != 0: + break + except OverflowError: + pass + tmp_stdout.close() + tmp_stderr.close() + if returncode != 0: + raise Exception, stderr + + # TODO: look for errors in program output. + except Exception, e: + stop_err( 'Error in bismark:\n' + str( e ) ) + + + # collect and copy output files + """ + if args.output_report_file: + output_report_file = open(args.output_report_file, 'w+') + for line in fileinput.input(glob( os.path.join( output_dir, '*.txt') )): + output_report_file.write(line) + output_report_file.close() + """ + + if args.output_suppressed_reads: + shutil.move( glob(os.path.join( output_dir, '*ambiguous_reads.txt'))[0], args.output_suppressed_reads ) + if args.output_suppressed_reads_l: + shutil.move( glob(os.path.join( output_dir, '*ambiguous_reads_1.txt'))[0], args.output_suppressed_reads_l ) + if args.output_suppressed_reads_r: + shutil.move( glob(os.path.join( output_dir, '*ambiguous_reads_2.txt'))[0], args.output_suppressed_reads_r ) + + if args.output_unmapped_reads: + shutil.move( glob(os.path.join( output_dir, '*unmapped_reads.txt'))[0], args.output_unmapped_reads ) + if args.output_unmapped_reads_l: + shutil.move( glob(os.path.join( output_dir, '*unmapped_reads_1.txt'))[0], args.output_unmapped_reads_l ) + if args.output_unmapped_reads_r: + shutil.move( glob(os.path.join( output_dir, '*unmapped_reads_2.txt'))[0], args.output_unmapped_reads_r ) + + shutil.move( glob( os.path.join( output_dir, '*.sam'))[0] , args.output) + + # Clean up temp dirs + if args.own_file: + if os.path.exists( tmp_index_dir ): + shutil.rmtree( tmp_index_dir ) + if os.path.exists( tmp_bismark_dir ): + shutil.rmtree( tmp_bismark_dir ) + +if __name__=="__main__": __main__() diff --git a/lib/tool_shed/test/test_data/repos/bismark/0/bowtie2_indices.loc b/lib/tool_shed/test/test_data/repos/bismark/0/bowtie2_indices.loc new file mode 100755 index 000000000000..61663caa7c70 --- /dev/null +++ b/lib/tool_shed/test/test_data/repos/bismark/0/bowtie2_indices.loc @@ -0,0 +1,37 @@ +# bowtie2_indices.loc.sample +# This is a *.loc.sample file distributed with Galaxy that enables tools +# to use a directory of indexed data files. This one is for Bowtie2 and Tophat2. +# See the wiki: http://wiki.galaxyproject.org/Admin/NGS%20Local%20Setup +# First create these data files and save them in your own data directory structure. +# Then, create a bowtie_indices.loc file to use those indexes with tools. +# Copy this file, save it with the same name (minus the .sample), +# follow the format examples, and store the result in this directory. +# The file should include an one line entry for each index set. +# The path points to the "basename" for the set, not a specific file. +# It has four text columns seperated by TABS. +# +# +# +# So, for example, if you had hg18 indexes stored in: +# +# /depot/data2/galaxy/hg19/bowtie2/ +# +# containing hg19 genome and hg19.*.bt2 files, such as: +# -rw-rw-r-- 1 james james 914M Feb 10 18:56 hg19canon.fa +# -rw-rw-r-- 1 james james 914M Feb 10 18:56 hg19canon.1.bt2 +# -rw-rw-r-- 1 james james 683M Feb 10 18:56 hg19canon.2.bt2 +# -rw-rw-r-- 1 james james 3.3K Feb 10 16:54 hg19canon.3.bt2 +# -rw-rw-r-- 1 james james 683M Feb 10 16:54 hg19canon.4.bt2 +# -rw-rw-r-- 1 james james 914M Feb 10 20:45 hg19canon.rev.1.bt2 +# -rw-rw-r-- 1 james james 683M Feb 10 20:45 hg19canon.rev.2.bt2 +# +# then the bowtie2_indices.loc entry could look like this: +# +#hg19 hg19 Human (hg19) /depot/data2/galaxy/hg19/bowtie2/hg19canon +# +#More examples: +# +#mm10 mm10 Mouse (mm10) /depot/data2/galaxy/mm10/bowtie2/mm10 +#dm3 dm3 D. melanogaster (dm3) /depot/data2/galaxy/mm10/bowtie2/dm3 +# +# diff --git a/lib/tool_shed/test/test_data/repos/bismark/0/bowtie2_indices.loc.sample b/lib/tool_shed/test/test_data/repos/bismark/0/bowtie2_indices.loc.sample new file mode 100755 index 000000000000..61663caa7c70 --- /dev/null +++ b/lib/tool_shed/test/test_data/repos/bismark/0/bowtie2_indices.loc.sample @@ -0,0 +1,37 @@ +# bowtie2_indices.loc.sample +# This is a *.loc.sample file distributed with Galaxy that enables tools +# to use a directory of indexed data files. This one is for Bowtie2 and Tophat2. +# See the wiki: http://wiki.galaxyproject.org/Admin/NGS%20Local%20Setup +# First create these data files and save them in your own data directory structure. +# Then, create a bowtie_indices.loc file to use those indexes with tools. +# Copy this file, save it with the same name (minus the .sample), +# follow the format examples, and store the result in this directory. +# The file should include an one line entry for each index set. +# The path points to the "basename" for the set, not a specific file. +# It has four text columns seperated by TABS. +# +# +# +# So, for example, if you had hg18 indexes stored in: +# +# /depot/data2/galaxy/hg19/bowtie2/ +# +# containing hg19 genome and hg19.*.bt2 files, such as: +# -rw-rw-r-- 1 james james 914M Feb 10 18:56 hg19canon.fa +# -rw-rw-r-- 1 james james 914M Feb 10 18:56 hg19canon.1.bt2 +# -rw-rw-r-- 1 james james 683M Feb 10 18:56 hg19canon.2.bt2 +# -rw-rw-r-- 1 james james 3.3K Feb 10 16:54 hg19canon.3.bt2 +# -rw-rw-r-- 1 james james 683M Feb 10 16:54 hg19canon.4.bt2 +# -rw-rw-r-- 1 james james 914M Feb 10 20:45 hg19canon.rev.1.bt2 +# -rw-rw-r-- 1 james james 683M Feb 10 20:45 hg19canon.rev.2.bt2 +# +# then the bowtie2_indices.loc entry could look like this: +# +#hg19 hg19 Human (hg19) /depot/data2/galaxy/hg19/bowtie2/hg19canon +# +#More examples: +# +#mm10 mm10 Mouse (mm10) /depot/data2/galaxy/mm10/bowtie2/mm10 +#dm3 dm3 D. melanogaster (dm3) /depot/data2/galaxy/mm10/bowtie2/dm3 +# +# diff --git a/lib/tool_shed/test/test_data/repos/bismark/0/tool_data_table_conf.xml.sample b/lib/tool_shed/test/test_data/repos/bismark/0/tool_data_table_conf.xml.sample new file mode 100644 index 000000000000..9e62763c741d --- /dev/null +++ b/lib/tool_shed/test/test_data/repos/bismark/0/tool_data_table_conf.xml.sample @@ -0,0 +1,13 @@ + + + + + value, dbkey, name, path + +
+ + + value, dbkey, name, path + +
+
diff --git a/lib/tool_shed/test/test_data/repos/bismark/0/tool_dependencies.xml b/lib/tool_shed/test/test_data/repos/bismark/0/tool_dependencies.xml new file mode 100644 index 000000000000..776d2aa33442 --- /dev/null +++ b/lib/tool_shed/test/test_data/repos/bismark/0/tool_dependencies.xml @@ -0,0 +1,61 @@ + + + + $REPOSITORY_INSTALL_DIR + + + + + http://downloads.sourceforge.net/project/bowtie-bio/bowtie2/2.0.0-beta7/bowtie2-2.0.0-beta7-source.zip + make + + bowtie2 + $INSTALL_DIR/bin + + + bowtie2-align + $INSTALL_DIR/bin + + + bowtie2-build + $INSTALL_DIR/bin + + chmod +x $INSTALL_DIR/bin/bowtie2 + + $INSTALL_DIR/bin + + + + +Compiling bowtie2 requires zlib and libpthread to be present on your system. + + + + + + http://downloads.sourceforge.net/project/bowtie-bio/bowtie/0.12.8/bowtie-0.12.8-src.zip + make + + bowtie + $INSTALL_DIR/bin + + + bowtie-inspect + $INSTALL_DIR/bin + + + bowtie-build + $INSTALL_DIR/bin + + + $INSTALL_DIR/bin + + + + + + + + + + diff --git a/lib/tool_shed/test/test_data/repos/bismark/1/bismark_bowtie2_wrapper.xml b/lib/tool_shed/test/test_data/repos/bismark/1/bismark_bowtie2_wrapper.xml new file mode 100644 index 000000000000..68238f4ddd85 --- /dev/null +++ b/lib/tool_shed/test/test_data/repos/bismark/1/bismark_bowtie2_wrapper.xml @@ -0,0 +1,616 @@ + + + bisulfite mapper (bowtie2) + + + SCRIPT_PATH + bowtie + bowtie2 + + + + bismark_wrapper.py + + ## Change this to accommodate the number of threads you have available. + --num-threads 4 + + --bismark_path \$SCRIPT_PATH + + --bowtie2 + + ## + ## Bismark Genome Preparation, if desired. + ## + + ## Handle reference file. + #if $refGenomeSource.genomeSource == "history": + --own-file=$refGenomeSource.ownFile + #else: + --indexes-path ${refGenomeSource.index.fields.path} + #end if + + + ## + ## Input parameters + ## + + + #if $singlePaired.sPaired == "single": + --single-paired $singlePaired.input_singles + + #if $singlePaired.input_singles.ext == "fastqillumina": + --phred64-quals + --fastq + #elif $singlePaired.input_singles.ext == "fastqsanger": + --fastq + #elif $singlePaired.input_singles.ext == "fasta": + --fasta + #end if + #else: + --mate-paired + --mate1 $singlePaired.input_mate1 + --mate2 $singlePaired.input_mate2 + + #if $singlePaired.input_mate1.ext == "fastqillumina": + --phred64-quals + --fastq + #elif $singlePaired.input_mate1.ext == "fastqsanger": + --fastq + #elif $singlePaired.input_mate1.ext == "fasta": + --fasta + #end if + + -I $singlePaired.minInsert + -X $singlePaired.maxInsert + #end if + + + ## for now hardcode the value for the required memory per thread in --best mode + --chunkmbs 512 + + + #if $params.settingsType == "custom": + + ## default 20 + --seed-len $params.seed_len + ## default 0 + --seed-mismatches $params.seed_mismatches + ## default 15 + --seed-extention-attempts $params.seed_extention_attempts + ## default 2 + --max-reseed $params.max_reseed + + ## default 70 + ##--maqerr $params.maqerr + + ## default unlimited + #if $params.qupto != 0: + --qupto $params.qupto + #end if + #if $params.skip_reads != 0: + --skip-reads $params.skip_reads + #end if + + ## if set, disable the original behaviour + $params.no_mixed + ## if set, disable the original behaviour + $params.no_discordant + + + ###if str($params.isReportOutput) == "yes": + ## --output-report-file $report_file + ###end if + + #end if + + ## + ## Output parameters. + ## + --output $output + $suppress_header + + #if str( $singlePaired.sPaired ) == "single" + #if $output_unmapped_reads_l + --output-unmapped-reads $output_unmapped_reads_l + #end if + #if $output_suppressed_reads_l + --output-suppressed-reads $output_suppressed_reads_l + #end if + #else + #if $output_unmapped_reads_l and $output_unmapped_reads_r + --output-unmapped-reads-l $output_unmapped_reads_l + --output-unmapped-reads-r $output_unmapped_reads_r + #end if + #if $output_suppressed_reads_l and $output_suppressed_reads_l + --output-suppressed-reads-l $output_suppressed_reads_l + --output-suppressed-reads-r $output_suppressed_reads_r + #end if + #end if + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + (( + params['settingsType'] == "custom" and + params['suppressed_read_file'] is True + )) + + + + + + + + + + + + + + + + + singlePaired['sPaired'] == "paired" + params['settingsType'] == "custom" + params['supressed_read_file'] is True + + + + + + + + + + + + + + + + + + (( + params['settingsType'] == "custom" and + params['unmapped_read_file'] is True + )) + + + + + + + + + + + + + + + + singlePaired['sPaired'] == "paired" + params['settingsType'] == "custom" + params['unmapped_read_file'] is True + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +Bismark_ is a bisulfite mapper and methylation caller. Bismark takes in FastA or FastQ files and aligns the +reads to a specified bisulfite genome. Sequence reads are transformed into a bisulfite converted forward strand +version (C->T conversion) or into a bisulfite treated reverse strand (G->A conversion of the forward strand). +Each of these reads are then aligned to bisulfite treated forward strand index of a reference genome +(C->T converted) and a bisulfite treated reverse strand index of the genome (G->A conversion of the +forward strand, by doing this alignments will produce the same positions). These 4 instances of Bowtie (1 or 2) +are run in parallel. The sequence file(s) are then read in again sequence by sequence to pull out the original +sequence from the genome and determine if there were any protected C's present or not. + +.. _Bismark: http://www.bioinformatics.babraham.ac.uk/projects/bismark/ + +As of version 0.7.0 Bismark will only run 2 alignment threads for OT and OB in parallel, the 4 strand mode can be +re-enabled by using non_directional mode. + +It is developed by Krueger F and Andrews SR. at the Babraham Institute. Krueger F, Andrews SR. (2011) Bismark: a flexible aligner and methylation caller for Bisulfite-Seq applications. Bioinformatics, 27, 1571-2. + +------ + +**Know what you are doing** + +.. class:: warningmark + +There is no such thing (yet) as an automated gearshift in short read mapping. It is all like stick-shift driving in San Francisco. In other words = running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy. + + .. __: http://www.bioinformatics.babraham.ac.uk/projects/bismark/ + +------ + +**Input formats** + +Bismark accepts files in either Sanger FASTQ format (galaxy type *fastqsanger*), Illumina FASTQ format (galaxy type *fastqillumina*) or FASTA format (galaxy type *fasta*). Use the FASTQ Groomer to prepare your files. + +------ + +**A Note on Built-in Reference Genomes** + +The default variant for all genomes is "Full", defined as all primary chromosomes (or scaffolds/contigs) including mitochondrial plus associated unmapped, plasmid, and other segments. When only one version of a genome is available in this tool, it represents the default "Full" variant. Some genomes will have more than one variant available. The "Canonical Male" or sometimes simply "Canonical" variant contains the primary chromosomes for a genome. For example a human "Canonical" variant contains chr1-chr22, chrX, chrY, and chrM. The "Canonical Female" variant contains the primary chromosomes excluding chrY. + +------ + +The final output of Bismark is in SAM format by default. + +**Outputs** + +The output is in SAM format, and has the following columns:: + + Column Description + -------- -------------------------------------------------------- + 1 QNAME seq-ID + 2 FLAG this flag tries to take the strand a bisulfite read + originated from into account + (this is different from ordinary DNA alignment flags!) + 3 RNAME chromosome + 4 POS start position + 5 MAPQ always 255 + 6 CIGAR extended CIGAR string + 7 MRNM Mate Reference sequence NaMe ('=' if same as RNAME) + 8 MPOS 1-based Mate POSition + 9 ISIZE Inferred insert SIZE + 10 SEQ query SEQuence on the same strand as the reference + 11 QUAL Phred33 scale + 12 NM-tag edit distance to the reference) + 13 XX-tag base-by-base mismatches to the reference. + This does not include indels. + 14 XM-tag methylation call string + 15 XR-tag read conversion state for the alignment + 16 XG-tag genome conversion state for the alignment + + +Each read of paired-end alignments is written out in a separate line in the above format. + + +It looks like this (scroll sideways to see the entire example):: + + QNAME FLAG RNAME POS MAPQ CIAGR MRNM MPOS ISIZE SEQ QUAL OPT + HWI-EAS91_1_30788AAXX:1:1:1761:343 4 * 0 0 * * 0 0 AAAAAAANNAAAAAAAAAAAAAAAAAAAAAAAAAAACNNANNGAGTNGNNNNNNNGCTTCCCACAGNNCTGG hhhhhhh;;hhhhhhhhhhh^hOhhhhghhhfhhhgh;;h;;hhhh;h;;;;;;;hhhhhhghhhh;;Phhh + HWI-EAS91_1_30788AAXX:1:1:1578:331 4 * 0 0 * * 0 0 GTATAGANNAATAAGAAAAAAAAAAATGAAGACTTTCNNANNTCTGNANNNNNNNTCTTTTTTCAGNNGTAG hhhhhhh;;hhhhhhhhhhhhhhhhhhhhhhhhhhhh;;h;;hhhh;h;;;;;;;hhhhhhhhhhh;;hhVh + +------- + +**Bismark settings** + +All of the options have a default value. You can change any of them. If any Bismark function is missing please contact the tool author or your Galaxy admin. + +------ + +**Bismark parameter list** + +This is an exhaustive list of Bismark options: + +------ + +**OPTIONS** + + +Input:: + + --singles A comma- or space-separated list of files containing the reads to be aligned (e.g. + lane1.fq,lane2.fq lane3.fq). Reads may be a mix of different lengths. Bismark will + produce one mapping result and one report file per input file. + + -1 mates1 Comma-separated list of files containing the #1 mates (filename usually includes + "_1"), e.g. flyA_1.fq,flyB_1.fq). Sequences specified with this option must + correspond file-for-file and read-for-read with those specified in mates2. + Reads may be a mix of different lengths. Bismark will produce one mapping result + and one report file per paired-end input file pair. + + -2 mates2 Comma-separated list of files containing the #2 mates (filename usually includes + "_2"), e.g. flyA_1.fq,flyB_1.fq). Sequences specified with this option must + correspond file-for-file and read-for-read with those specified in mates1. + Reads may be a mix of different lengths. + + -q/--fastq The query input files (specified as mate1,mate2 or singles are FASTQ + files (usually having extension .fg or .fastq). This is the default. See also + --solexa-quals. + + -f/--fasta The query input files (specified as mate1,mate2 or singles are FASTA + files (usually havin extension .fa, .mfa, .fna or similar). All quality values + are assumed to be 40 on the Phred scale. + + -s/--skip INT Skip (i.e. do not align) the first INT reads or read pairs from the input. + + -u/--upto INT Only aligns the first INT reads or read pairs from the input. Default: no limit. + + --phred33-quals FASTQ qualities are ASCII chars equal to the Phred quality plus 33. Default: on. + + --phred64-quals FASTQ qualities are ASCII chars equal to the Phred quality plus 64. Default: off. + + --solexa-quals Convert FASTQ qualities from solexa-scaled (which can be negative) to phred-scaled + (which can't). The formula for conversion is: + phred-qual = 10 * log(1 + 10 ** (solexa-qual/10.0)) / log(10). Used with -q. This + is usually the right option for use with (unconverted) reads emitted by the GA + Pipeline versions prior to 1.3. Works only for Bowtie 1. Default: off. + + --solexa1.3-quals Same as --phred64-quals. This is usually the right option for use with (unconverted) + reads emitted by GA Pipeline version 1.3 or later. Default: off. + + +Alignment:: + + -n/--seedmms INT The maximum number of mismatches permitted in the "seed", i.e. the first L base pairs + of the read (where L is set with -l/--seedlen). This may be 0, 1, 2 or 3 and the + default is 1. This option is only available for Bowtie 1 (for Bowtie 2 see -N). + + -l/--seedlen The "seed length"; i.e., the number of bases of the high quality end of the read to + which the -n ceiling applies. The default is 28. Bowtie (and thus Bismark) is faster for + larger values of -l. This option is only available for Bowtie 1 (for Bowtie 2 see -L). + + -e/--maqerr INT Maximum permitted total of quality values at all mismatched read positions throughout + the entire alignment, not just in the "seed". The default is 70. Like Maq, bowtie rounds + quality values to the nearest 10 and saturates at 30. This value is not relevant for + Bowtie 2. + + --chunkmbs INT The number of megabytes of memory a given thread is given to store path descriptors in + --best mode. Best-first search must keep track of many paths at once to ensure it is + always extending the path with the lowest cumulative cost. Bowtie tries to minimize the + memory impact of the descriptors, but they can still grow very large in some cases. If + you receive an error message saying that chunk memory has been exhausted in --best mode, + try adjusting this parameter up to dedicate more memory to the descriptors. This value + is not relevant for Bowtie 2. Default: 512. + + -I/--minins INT The minimum insert size for valid paired-end alignments. E.g. if -I 60 is specified and + a paired-end alignment consists of two 20-bp alignments in the appropriate orientation + with a 20-bp gap between them, that alignment is considered valid (as long as -X is also + satisfied). A 19-bp gap would not be valid in that case. Default: 0. + + -X/--maxins INT The maximum insert size for valid paired-end alignments. E.g. if -X 100 is specified and + a paired-end alignment consists of two 20-bp alignments in the proper orientation with a + 60-bp gap between them, that alignment is considered valid (as long as -I is also satisfied). + A 61-bp gap would not be valid in that case. Default: 500. + + + +Output:: + + --non_directional The sequencing library was constructed in a non strand-specific manner, alignments to all four + bisulfite strands will be reported. Default: OFF. + + (The current Illumina protocol for BS-Seq is directional, in which case the strands complementary + to the original strands are merely theoretical and should not exist in reality. Specifying directional + alignments (which is the default) will only run 2 alignment threads to the original top (OT) + or bottom (OB) strands in parallel and report these alignments. This is the recommended option + for sprand-specific libraries). + + --sam-no-hd Suppress SAM header lines (starting with @). This might be useful when very large input files are + split up into several smaller files to run concurrently and the output files are to be merged. + + --quiet Print nothing besides alignments. + + --vanilla Performs bisulfite mapping with Bowtie 1 and prints the 'old' output (as in Bismark 0.5.X) instead + of SAM format output. + + -un/--unmapped Write all reads that could not be aligned to a file in the output directory. Written reads will + appear as they did in the input, without any translation of quality values that may have + taken place within Bowtie or Bismark. Paired-end reads will be written to two parallel files with _1 + and _2 inserted in their filenames, i.e. _unmapped_reads_1.txt and unmapped_reads_2.txt. Reads + with more than one valid alignment with the same number of lowest mismatches (ambiguous mapping) + are also written to _unmapped_reads.txt unless the option --ambiguous is specified as well. + + --ambiguous Write all reads which produce more than one valid alignment with the same number of lowest + mismatches or other reads that fail to align uniquely to a file in the output directory. + Written reads will appear as they did in the input, without any of the translation of quality + values that may have taken place within Bowtie or Bismark. Paired-end reads will be written to two + parallel files with _1 and _2 inserted in theit filenames, i.e. _ambiguous_reads_1.txt and + _ambiguous_reads_2.txt. These reads are not written to the file specified with --un. + + -o/--output_dir DIR Write all output files into this directory. By default the output files will be written into + the same folder as the input file(s). If the specified folder does not exist, Bismark will attempt + to create it first. The path to the output folder can be either relative or absolute. + + --temp_dir DIR Write temporary files to this directory instead of into the same directory as the input files. If + the specified folder does not exist, Bismark will attempt to create it first. The path to the + temporary folder can be either relative or absolute. + +------ + +Bowtie 2 alignment options:: + + -N INT Sets the number of mismatches to allowed in a seed alignment during multiseed alignment. + Can be set to 0 or 1. Setting this higher makes alignment slower (often much slower) + but increases sensitivity. Default: 0. This option is only available for Bowtie 2 (for + Bowtie 1 see -n). + + -L INT Sets the length of the seed substrings to align during multiseed alignment. Smaller values + make alignment slower but more senstive. Default: the --sensitive preset of Bowtie 2 is + used by default, which sets -L to 20. This option is only available for Bowtie 2 (for + Bowtie 1 see -l). + + --ignore-quals When calculating a mismatch penalty, always consider the quality value at the mismatched + position to be the highest possible, regardless of the actual value. I.e. input is treated + as though all quality values are high. This is also the default behavior when the input + doesn't specify quality values (e.g. in -f mode). This option is invariable and on by default. + + +Bowtie 2 paired-end options:: + + --no-mixed This option disables Bowtie 2's behavior to try to find alignments for the individual mates if + it cannot find a concordant or discordant alignment for a pair. This option is invariable and + and on by default. + + --no-discordant Normally, Bowtie 2 looks for discordant alignments if it cannot find any concordant alignments. + A discordant alignment is an alignment where both mates align uniquely, but that does not + satisfy the paired-end constraints (--fr/--rf/--ff, -I, -X). This option disables that behavior + and it is on by default. + + +Bowtie 2 effort options:: + + -D INT Up to INT consecutive seed extension attempts can "fail" before Bowtie 2 moves on, using + the alignments found so far. A seed extension "fails" if it does not yield a new best or a + new second-best alignment. Default: 15. + + -R INT INT is the maximum number of times Bowtie 2 will "re-seed" reads with repetitive seeds. + When "re-seeding," Bowtie 2 simply chooses a new set of reads (same length, same number of + mismatches allowed) at different offsets and searches for more alignments. A read is considered + to have repetitive seeds if the total number of seed hits divided by the number of seeds + that aligned at least once is greater than 300. Default: 2. + + +Bowtie 2 Scoring options:: + + --score_min "func" Sets a function governing the minimum alignment score needed for an alignment to be considered + "valid" (i.e. good enough to report). This is a function of read length. For instance, specifying + L,0,-0.2 sets the minimum-score function f to f(x) = 0 + -0.2 * x, where x is the read length. + See also: setting function options at http://bowtie-bio.sourceforge.net/bowtie2. The default is + L,0,-0.2. + + +Bowtie 2 Reporting options:: + + --most_valid_alignments INT This used to be the Bowtie 2 parameter -M. As of Bowtie 2 version 2.0.0 beta7 the option -M is + deprecated. It will be removed in subsequent versions. What used to be called -M mode is still the + default mode, but adjusting the -M setting is deprecated. Use the -D and -R options to adjust the + effort expended to find valid alignments. + + For reference, this used to be the old (now deprecated) description of -M: + Bowtie 2 searches for at most INT+1 distinct, valid alignments for each read. The search terminates when it + can't find more distinct valid alignments, or when it finds INT+1 distinct alignments, whichever + happens first. Only the best alignment is reported. Information from the other alignments is used to + estimate mapping quality and to set SAM optional fields, such as AS:i and XS:i. Increasing -M makes + Bowtie 2 slower, but increases the likelihood that it will pick the correct alignment for a read that + aligns many places. For reads that have more than INT+1 distinct, valid alignments, Bowtie 2 does not + guarantee that the alignment reported is the best possible in terms of alignment score. -M is + always used and its default value is set to 10. + + + diff --git a/lib/tool_shed/test/test_data/repos/bismark/1/bismark_bowtie_wrapper.xml b/lib/tool_shed/test/test_data/repos/bismark/1/bismark_bowtie_wrapper.xml new file mode 100644 index 000000000000..6e4e4def6200 --- /dev/null +++ b/lib/tool_shed/test/test_data/repos/bismark/1/bismark_bowtie_wrapper.xml @@ -0,0 +1,614 @@ + + + bisulfite mapper (bowtie) + + + SCRIPT_PATH + bowtie + bowtie2 + + + + bismark_wrapper.py + + ## Change this to accommodate the number of threads you have available. + --num-threads 4 + + --bismark_path \$SCRIPT_PATH + + ## + ## Bismark Genome Preparation, if desired. + ## + + ## Handle reference file. + #if $refGenomeSource.genomeSource == "history": + --own-file=$refGenomeSource.ownFile + #else: + --indexes-path ${refGenomeSource.index.fields.path} + #end if + + + ## + ## Input parameters + ## + + + #if $singlePaired.sPaired == "single": + --single-paired $singlePaired.input_singles + + #if $singlePaired.input_singles.ext == "fastqillumina": + --phred64-quals + --fastq + #elif $singlePaired.input_singles.ext == "fastqsanger": + --fastq + #elif $singlePaired.input_singles.ext == "fasta": + --fasta + #end if + #else: + --mate-paired + --mate1 $singlePaired.input_mate1 + --mate2 $singlePaired.input_mate2 + + #if $singlePaired.input_mate1.ext == "fastqillumina": + --phred64-quals + --fastq + #elif $singlePaired.input_mate1.ext == "fastqsanger": + --fastq + #elif $singlePaired.input_mate1.ext == "fasta": + --fasta + #end if + + -I $singlePaired.minInsert + -X $singlePaired.maxInsert + #end if + + + ## for now hardcode the value for the required memory per thread in --best mode + --chunkmbs 512 + + + #if $params.settingsType == "custom": + + ## default 20 + --seed-len $params.seed_len + ## default 0 + --seed-mismatches $params.seed_mismatches + ## default 15 + --seed-extention-attempts $params.seed_extention_attempts + ## default 2 + --max-reseed $params.max_reseed + + ## default 70 + ##--maqerr $params.maqerr + + ## default unlimited + #if $params.qupto != 0: + --qupto $params.qupto + #end if + #if $params.skip_reads != 0: + --skip-reads $params.skip_reads + #end if + + ## if set, disable the original behaviour + $params.no_mixed + ## if set, disable the original behaviour + $params.no_discordant + + + ###if str($params.isReportOutput) == "yes": + ## --output-report-file $report_file + ###end if + + #end if + + ## + ## Output parameters. + ## + --output $output + $suppress_header + + #if str( $singlePaired.sPaired ) == "single" + #if $output_unmapped_reads_l + --output-unmapped-reads $output_unmapped_reads_l + #end if + #if $output_suppressed_reads_l + --output-suppressed-reads $output_suppressed_reads_l + #end if + #else + #if $output_unmapped_reads_l and $output_unmapped_reads_r + --output-unmapped-reads-l $output_unmapped_reads_l + --output-unmapped-reads-r $output_unmapped_reads_r + #end if + #if $output_suppressed_reads_l and $output_suppressed_reads_l + --output-suppressed-reads-l $output_suppressed_reads_l + --output-suppressed-reads-r $output_suppressed_reads_r + #end if + #end if + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + (( + params['settingsType'] == "custom" and + params['suppressed_read_file'] is True + )) + + + + + + + + + + + + + + + + + singlePaired['sPaired'] == "paired" + params['settingsType'] == "custom" + params['supressed_read_file'] is True + + + + + + + + + + + + + + + + + + (( + params['settingsType'] == "custom" and + params['unmapped_read_file'] is True + )) + + + + + + + + + + + + + + + + singlePaired['sPaired'] == "paired" + params['settingsType'] == "custom" + params['unmapped_read_file'] is True + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +Bismark_ is a bisulfite mapper and methylation caller. Bismark takes in FastA or FastQ files and aligns the +reads to a specified bisulfite genome. Sequence reads are transformed into a bisulfite converted forward strand +version (C->T conversion) or into a bisulfite treated reverse strand (G->A conversion of the forward strand). +Each of these reads are then aligned to bisulfite treated forward strand index of a reference genome +(C->T converted) and a bisulfite treated reverse strand index of the genome (G->A conversion of the +forward strand, by doing this alignments will produce the same positions). These 4 instances of Bowtie (1 or 2) +are run in parallel. The sequence file(s) are then read in again sequence by sequence to pull out the original +sequence from the genome and determine if there were any protected C's present or not. + +.. _Bismark: http://www.bioinformatics.babraham.ac.uk/projects/bismark/ + +As of version 0.7.0 Bismark will only run 2 alignment threads for OT and OB in parallel, the 4 strand mode can be +re-enabled by using non_directional mode. + +It is developed by Krueger F and Andrews SR. at the Babraham Institute. Krueger F, Andrews SR. (2011) Bismark: a flexible aligner and methylation caller for Bisulfite-Seq applications. Bioinformatics, 27, 1571-2. + +------ + +**Know what you are doing** + +.. class:: warningmark + +There is no such thing (yet) as an automated gearshift in short read mapping. It is all like stick-shift driving in San Francisco. In other words = running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy. + + .. __: http://www.bioinformatics.babraham.ac.uk/projects/bismark/ + +------ + +**Input formats** + +Bismark accepts files in either Sanger FASTQ format (galaxy type *fastqsanger*), Illumina FASTQ format (galaxy type *fastqillumina*) or FASTA format (galaxy type *fasta*). Use the FASTQ Groomer to prepare your files. + +------ + +**A Note on Built-in Reference Genomes** + +The default variant for all genomes is "Full", defined as all primary chromosomes (or scaffolds/contigs) including mitochondrial plus associated unmapped, plasmid, and other segments. When only one version of a genome is available in this tool, it represents the default "Full" variant. Some genomes will have more than one variant available. The "Canonical Male" or sometimes simply "Canonical" variant contains the primary chromosomes for a genome. For example a human "Canonical" variant contains chr1-chr22, chrX, chrY, and chrM. The "Canonical Female" variant contains the primary chromosomes excluding chrY. + +------ + +The final output of Bismark is in SAM format by default. + +**Outputs** + +The output is in SAM format, and has the following columns:: + + Column Description + -------- -------------------------------------------------------- + 1 QNAME seq-ID + 2 FLAG this flag tries to take the strand a bisulfite read + originated from into account + (this is different from ordinary DNA alignment flags!) + 3 RNAME chromosome + 4 POS start position + 5 MAPQ always 255 + 6 CIGAR extended CIGAR string + 7 MRNM Mate Reference sequence NaMe ('=' if same as RNAME) + 8 MPOS 1-based Mate POSition + 9 ISIZE Inferred insert SIZE + 10 SEQ query SEQuence on the same strand as the reference + 11 QUAL Phred33 scale + 12 NM-tag edit distance to the reference) + 13 XX-tag base-by-base mismatches to the reference. + This does not include indels. + 14 XM-tag methylation call string + 15 XR-tag read conversion state for the alignment + 16 XG-tag genome conversion state for the alignment + + +Each read of paired-end alignments is written out in a separate line in the above format. + + +It looks like this (scroll sideways to see the entire example):: + + QNAME FLAG RNAME POS MAPQ CIAGR MRNM MPOS ISIZE SEQ QUAL OPT + HWI-EAS91_1_30788AAXX:1:1:1761:343 4 * 0 0 * * 0 0 AAAAAAANNAAAAAAAAAAAAAAAAAAAAAAAAAAACNNANNGAGTNGNNNNNNNGCTTCCCACAGNNCTGG hhhhhhh;;hhhhhhhhhhh^hOhhhhghhhfhhhgh;;h;;hhhh;h;;;;;;;hhhhhhghhhh;;Phhh + HWI-EAS91_1_30788AAXX:1:1:1578:331 4 * 0 0 * * 0 0 GTATAGANNAATAAGAAAAAAAAAAATGAAGACTTTCNNANNTCTGNANNNNNNNTCTTTTTTCAGNNGTAG hhhhhhh;;hhhhhhhhhhhhhhhhhhhhhhhhhhhh;;h;;hhhh;h;;;;;;;hhhhhhhhhhh;;hhVh + +------- + +**Bismark settings** + +All of the options have a default value. You can change any of them. If any Bismark function is missing please contact the tool author or your Galaxy admin. + +------ + +**Bismark parameter list** + +This is an exhaustive list of Bismark options: + +------ + +**OPTIONS** + + +Input:: + + --singles A comma- or space-separated list of files containing the reads to be aligned (e.g. + lane1.fq,lane2.fq lane3.fq). Reads may be a mix of different lengths. Bismark will + produce one mapping result and one report file per input file. + + -1 mates1 Comma-separated list of files containing the #1 mates (filename usually includes + "_1"), e.g. flyA_1.fq,flyB_1.fq). Sequences specified with this option must + correspond file-for-file and read-for-read with those specified in mates2. + Reads may be a mix of different lengths. Bismark will produce one mapping result + and one report file per paired-end input file pair. + + -2 mates2 Comma-separated list of files containing the #2 mates (filename usually includes + "_2"), e.g. flyA_1.fq,flyB_1.fq). Sequences specified with this option must + correspond file-for-file and read-for-read with those specified in mates1. + Reads may be a mix of different lengths. + + -q/--fastq The query input files (specified as mate1,mate2 or singles are FASTQ + files (usually having extension .fg or .fastq). This is the default. See also + --solexa-quals. + + -f/--fasta The query input files (specified as mate1,mate2 or singles are FASTA + files (usually havin extension .fa, .mfa, .fna or similar). All quality values + are assumed to be 40 on the Phred scale. + + -s/--skip INT Skip (i.e. do not align) the first INT reads or read pairs from the input. + + -u/--upto INT Only aligns the first INT reads or read pairs from the input. Default: no limit. + + --phred33-quals FASTQ qualities are ASCII chars equal to the Phred quality plus 33. Default: on. + + --phred64-quals FASTQ qualities are ASCII chars equal to the Phred quality plus 64. Default: off. + + --solexa-quals Convert FASTQ qualities from solexa-scaled (which can be negative) to phred-scaled + (which can't). The formula for conversion is: + phred-qual = 10 * log(1 + 10 ** (solexa-qual/10.0)) / log(10). Used with -q. This + is usually the right option for use with (unconverted) reads emitted by the GA + Pipeline versions prior to 1.3. Works only for Bowtie 1. Default: off. + + --solexa1.3-quals Same as --phred64-quals. This is usually the right option for use with (unconverted) + reads emitted by GA Pipeline version 1.3 or later. Default: off. + + +Alignment:: + + -n/--seedmms INT The maximum number of mismatches permitted in the "seed", i.e. the first L base pairs + of the read (where L is set with -l/--seedlen). This may be 0, 1, 2 or 3 and the + default is 1. This option is only available for Bowtie 1 (for Bowtie 2 see -N). + + -l/--seedlen The "seed length"; i.e., the number of bases of the high quality end of the read to + which the -n ceiling applies. The default is 28. Bowtie (and thus Bismark) is faster for + larger values of -l. This option is only available for Bowtie 1 (for Bowtie 2 see -L). + + -e/--maqerr INT Maximum permitted total of quality values at all mismatched read positions throughout + the entire alignment, not just in the "seed". The default is 70. Like Maq, bowtie rounds + quality values to the nearest 10 and saturates at 30. This value is not relevant for + Bowtie 2. + + --chunkmbs INT The number of megabytes of memory a given thread is given to store path descriptors in + --best mode. Best-first search must keep track of many paths at once to ensure it is + always extending the path with the lowest cumulative cost. Bowtie tries to minimize the + memory impact of the descriptors, but they can still grow very large in some cases. If + you receive an error message saying that chunk memory has been exhausted in --best mode, + try adjusting this parameter up to dedicate more memory to the descriptors. This value + is not relevant for Bowtie 2. Default: 512. + + -I/--minins INT The minimum insert size for valid paired-end alignments. E.g. if -I 60 is specified and + a paired-end alignment consists of two 20-bp alignments in the appropriate orientation + with a 20-bp gap between them, that alignment is considered valid (as long as -X is also + satisfied). A 19-bp gap would not be valid in that case. Default: 0. + + -X/--maxins INT The maximum insert size for valid paired-end alignments. E.g. if -X 100 is specified and + a paired-end alignment consists of two 20-bp alignments in the proper orientation with a + 60-bp gap between them, that alignment is considered valid (as long as -I is also satisfied). + A 61-bp gap would not be valid in that case. Default: 500. + + + +Output:: + + --non_directional The sequencing library was constructed in a non strand-specific manner, alignments to all four + bisulfite strands will be reported. Default: OFF. + + (The current Illumina protocol for BS-Seq is directional, in which case the strands complementary + to the original strands are merely theoretical and should not exist in reality. Specifying directional + alignments (which is the default) will only run 2 alignment threads to the original top (OT) + or bottom (OB) strands in parallel and report these alignments. This is the recommended option + for sprand-specific libraries). + + --sam-no-hd Suppress SAM header lines (starting with @). This might be useful when very large input files are + split up into several smaller files to run concurrently and the output files are to be merged. + + --quiet Print nothing besides alignments. + + --vanilla Performs bisulfite mapping with Bowtie 1 and prints the 'old' output (as in Bismark 0.5.X) instead + of SAM format output. + + -un/--unmapped Write all reads that could not be aligned to a file in the output directory. Written reads will + appear as they did in the input, without any translation of quality values that may have + taken place within Bowtie or Bismark. Paired-end reads will be written to two parallel files with _1 + and _2 inserted in their filenames, i.e. _unmapped_reads_1.txt and unmapped_reads_2.txt. Reads + with more than one valid alignment with the same number of lowest mismatches (ambiguous mapping) + are also written to _unmapped_reads.txt unless the option --ambiguous is specified as well. + + --ambiguous Write all reads which produce more than one valid alignment with the same number of lowest + mismatches or other reads that fail to align uniquely to a file in the output directory. + Written reads will appear as they did in the input, without any of the translation of quality + values that may have taken place within Bowtie or Bismark. Paired-end reads will be written to two + parallel files with _1 and _2 inserted in theit filenames, i.e. _ambiguous_reads_1.txt and + _ambiguous_reads_2.txt. These reads are not written to the file specified with --un. + + -o/--output_dir DIR Write all output files into this directory. By default the output files will be written into + the same folder as the input file(s). If the specified folder does not exist, Bismark will attempt + to create it first. The path to the output folder can be either relative or absolute. + + --temp_dir DIR Write temporary files to this directory instead of into the same directory as the input files. If + the specified folder does not exist, Bismark will attempt to create it first. The path to the + temporary folder can be either relative or absolute. + +------ + +Bowtie 2 alignment options:: + + -N INT Sets the number of mismatches to allowed in a seed alignment during multiseed alignment. + Can be set to 0 or 1. Setting this higher makes alignment slower (often much slower) + but increases sensitivity. Default: 0. This option is only available for Bowtie 2 (for + Bowtie 1 see -n). + + -L INT Sets the length of the seed substrings to align during multiseed alignment. Smaller values + make alignment slower but more senstive. Default: the --sensitive preset of Bowtie 2 is + used by default, which sets -L to 20. This option is only available for Bowtie 2 (for + Bowtie 1 see -l). + + --ignore-quals When calculating a mismatch penalty, always consider the quality value at the mismatched + position to be the highest possible, regardless of the actual value. I.e. input is treated + as though all quality values are high. This is also the default behavior when the input + doesn't specify quality values (e.g. in -f mode). This option is invariable and on by default. + + +Bowtie 2 paired-end options:: + + --no-mixed This option disables Bowtie 2's behavior to try to find alignments for the individual mates if + it cannot find a concordant or discordant alignment for a pair. This option is invariable and + and on by default. + + --no-discordant Normally, Bowtie 2 looks for discordant alignments if it cannot find any concordant alignments. + A discordant alignment is an alignment where both mates align uniquely, but that does not + satisfy the paired-end constraints (--fr/--rf/--ff, -I, -X). This option disables that behavior + and it is on by default. + + +Bowtie 2 effort options:: + + -D INT Up to INT consecutive seed extension attempts can "fail" before Bowtie 2 moves on, using + the alignments found so far. A seed extension "fails" if it does not yield a new best or a + new second-best alignment. Default: 15. + + -R INT INT is the maximum number of times Bowtie 2 will "re-seed" reads with repetitive seeds. + When "re-seeding," Bowtie 2 simply chooses a new set of reads (same length, same number of + mismatches allowed) at different offsets and searches for more alignments. A read is considered + to have repetitive seeds if the total number of seed hits divided by the number of seeds + that aligned at least once is greater than 300. Default: 2. + + +Bowtie 2 Scoring options:: + + --score_min "func" Sets a function governing the minimum alignment score needed for an alignment to be considered + "valid" (i.e. good enough to report). This is a function of read length. For instance, specifying + L,0,-0.2 sets the minimum-score function f to f(x) = 0 + -0.2 * x, where x is the read length. + See also: setting function options at http://bowtie-bio.sourceforge.net/bowtie2. The default is + L,0,-0.2. + + +Bowtie 2 Reporting options:: + + --most_valid_alignments INT This used to be the Bowtie 2 parameter -M. As of Bowtie 2 version 2.0.0 beta7 the option -M is + deprecated. It will be removed in subsequent versions. What used to be called -M mode is still the + default mode, but adjusting the -M setting is deprecated. Use the -D and -R options to adjust the + effort expended to find valid alignments. + + For reference, this used to be the old (now deprecated) description of -M: + Bowtie 2 searches for at most INT+1 distinct, valid alignments for each read. The search terminates when it + can't find more distinct valid alignments, or when it finds INT+1 distinct alignments, whichever + happens first. Only the best alignment is reported. Information from the other alignments is used to + estimate mapping quality and to set SAM optional fields, such as AS:i and XS:i. Increasing -M makes + Bowtie 2 slower, but increases the likelihood that it will pick the correct alignment for a read that + aligns many places. For reads that have more than INT+1 distinct, valid alignments, Bowtie 2 does not + guarantee that the alignment reported is the best possible in terms of alignment score. -M is + always used and its default value is set to 10. + + + diff --git a/lib/tool_shed/test/test_data/repos/bismark/1/bismark_genome_preparation b/lib/tool_shed/test/test_data/repos/bismark/1/bismark_genome_preparation new file mode 100755 index 000000000000..1895a296632c --- /dev/null +++ b/lib/tool_shed/test/test_data/repos/bismark/1/bismark_genome_preparation @@ -0,0 +1,492 @@ +#!/usr/bin/perl -- +use strict; +use warnings; +use Cwd; +use File::Path qw(rmtree); +$|++; + + +## This program is Copyright (C) 2010-12, Felix Krueger (felix.krueger@bbsrc.ac.uk) + +## This program is free software: you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation, either version 3 of the License, or +## (at your option) any later version. + +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. + +## You should have received a copy of the GNU General Public License +## along with this program. If not, see . + +use Getopt::Long; +use Cwd; + +my $verbose; +my $help; +my $version; +my $man; +my $path_to_bowtie; +my $multi_fasta; +my $single_fasta; +my $bowtie2; + +my $bismark_version = 'v0.7.7'; + +GetOptions ('verbose' => \$verbose, + 'help' => \$help, + 'man' => \$man, + 'version' => \$version, + 'path_to_bowtie:s' => \$path_to_bowtie, + 'single_fasta' => \$single_fasta, + 'bowtie2' => \$bowtie2, + ); + +my $genome_folder = shift @ARGV; # mandatory +my $CT_dir; +my $GA_dir; + +if ($help or $man){ + print_helpfile(); + exit; +} + +if ($version){ + print << "VERSION"; + + Bismark - Bisulfite Mapper and Methylation Caller. + + Bismark Genome Preparation Version: $bismark_version + Copyright 2010-12 Felix Krueger, Babraham Bioinformatics + www.bioinformatics.babraham.ac.uk/projects/ + +VERSION + exit; +} + +if ($single_fasta){ + print "Writing individual genomes out into single-entry fasta files (one per chromosome)\n\n"; + $multi_fasta = 0; +} +else{ + print "Writing bisulfite genomes out into a single MFA (multi FastA) file\n\n"; + $single_fasta = 0; + $multi_fasta = 1; +} + +my @filenames = create_bisulfite_genome_folders(); + +process_sequence_files (); + +launch_bowtie_indexer(); + +sub launch_bowtie_indexer{ + if ($bowtie2){ + print "Bismark Genome Preparation - Step III: Launching the Bowtie 2 indexer\n"; + } + else{ + print "Bismark Genome Preparation - Step III: Launching the Bowtie (1) indexer\n"; + } + print "Please be aware that this process can - depending on genome size - take up to several hours!\n"; + sleep(5); + + ### if the path to bowtie was specfified explicitely + if ($path_to_bowtie){ + if ($bowtie2){ + $path_to_bowtie =~ s/$/bowtie2-build/; + } + else{ + $path_to_bowtie =~ s/$/bowtie-build/; + } + } + ### otherwise we assume that bowtie-build is in the path + else{ + if ($bowtie2){ + $path_to_bowtie = 'bowtie2-build'; + } + else{ + $path_to_bowtie = 'bowtie-build'; + } + } + + $verbose and print "\n"; + + ### Forking the program to run 2 instances of Bowtie-build or Bowtie2-build (= the Bowtie (1/2) indexer) + my $pid = fork(); + + # parent process + if ($pid){ + sleep(1); + chdir $CT_dir or die "Unable to change directory: $!\n"; + $verbose and warn "Preparing indexing of CT converted genome in $CT_dir\n"; + my @fasta_files = <*.fa>; + my $file_list = join (',',@fasta_files); + $verbose and print "Parent process: Starting to index C->T converted genome with the following command:\n\n"; + $verbose and print "$path_to_bowtie -f $file_list BS_CT\n\n"; + + sleep (11); + exec ("$path_to_bowtie","-f","$file_list","BS_CT"); + } + + # child process + elsif ($pid == 0){ + sleep(2); + chdir $GA_dir or die "Unable to change directory: $!\n"; + $verbose and warn "Preparing indexing of GA converted genome in $GA_dir\n"; + my @fasta_files = <*.fa>; + my $file_list = join (',',@fasta_files); + $verbose and print "Child process: Starting to index G->A converted genome with the following command:\n\n"; + $verbose and print "$path_to_bowtie -f $file_list BS_GA\n\n"; + $verbose and print "(starting in 10 seconds)\n"; + sleep(10); + exec ("$path_to_bowtie","-f","$file_list","BS_GA"); + } + + # if the platform doesn't support the fork command we will run the indexing processes one after the other + else{ + print "Forking process was not successful, therefore performing the indexing sequentially instead\n"; + sleep(10); + + ### moving to CT genome folder + $verbose and warn "Preparing to index CT converted genome in $CT_dir\n"; + chdir $CT_dir or die "Unable to change directory: $!\n"; + my @fasta_files = <*.fa>; + my $file_list = join (',',@fasta_files); + $verbose and print "$file_list\n\n"; + sleep(2); + system ("$path_to_bowtie","-f","$file_list","BS_CT"); + @fasta_files=(); + $file_list= ''; + + ### moving to GA genome folder + $verbose and warn "Preparing to index GA converted genome in $GA_dir\n"; + chdir $GA_dir or die "Unable to change directory: $!\n"; + @fasta_files = <*.fa>; + $file_list = join (',',@fasta_files); + $verbose and print "$file_list\n\n"; + sleep(2); + exec ("$path_to_bowtie","-f","$file_list","BS_GA"); + } +} + + +sub process_sequence_files { + + my ($total_CT_conversions,$total_GA_conversions) = (0,0); + $verbose and print "Bismark Genome Preparation - Step II: Bisulfite converting reference genome\n\n"; + sleep (3); + + $verbose and print "conversions performed:\n"; + $verbose and print join("\t",'chromosome','C->T','G->A'),"\n"; + + + ### If someone wants to index a genome which consists of thousands of contig and scaffold files we need to write the genome conversions into an MFA file + ### Otherwise the list of comma separated chromosomes we provide for bowtie-build will get too long for the kernel to handle + ### This is now the default option + + if ($multi_fasta){ + ### Here we just use one multi FastA file name, append .CT_conversion or .GA_conversion and print all sequence conversions into these files + my $bisulfite_CT_conversion_filename = "$CT_dir/genome_mfa.CT_conversion.fa"; + open (CT_CONVERT,'>',$bisulfite_CT_conversion_filename) or die "Can't write to file $bisulfite_CT_conversion_filename: $!\n"; + + my $bisulfite_GA_conversion_filename = "$GA_dir/genome_mfa.GA_conversion.fa"; + open (GA_CONVERT,'>',$bisulfite_GA_conversion_filename) or die "Can't write to file $bisulfite_GA_conversion_filename: $!\n"; + } + + foreach my $filename(@filenames){ + my ($chromosome_CT_conversions,$chromosome_GA_conversions) = (0,0); + open (IN,$filename) or die "Failed to read from sequence file $filename $!\n"; + # warn "Reading chromosome information from $filename\n\n"; + + ### first line needs to be a fastA header + my $first_line = ; + chomp $first_line; + + ### Extracting chromosome name from the FastA header + my $chromosome_name = extract_chromosome_name($first_line); + + ### alternatively, chromosomes can be written out into single-entry FastA files. This will only work for genomes with up to a few hundred chromosomes. + unless ($multi_fasta){ + my $bisulfite_CT_conversion_filename = "$CT_dir/$chromosome_name"; + $bisulfite_CT_conversion_filename =~ s/$/.CT_conversion.fa/; + open (CT_CONVERT,'>',$bisulfite_CT_conversion_filename) or die "Can't write to file $bisulfite_CT_conversion_filename: $!\n"; + + my $bisulfite_GA_conversion_filename = "$GA_dir/$chromosome_name"; + $bisulfite_GA_conversion_filename =~ s/$/.GA_conversion.fa/; + open (GA_CONVERT,'>',$bisulfite_GA_conversion_filename) or die "Can't write to file $bisulfite_GA_conversion_filename: $!\n"; + } + + print CT_CONVERT ">",$chromosome_name,"_CT_converted\n"; # first entry + print GA_CONVERT ">",$chromosome_name,"_GA_converted\n"; # first entry + + + while (){ + + ### in case the line is a new fastA header + if ($_ =~ /^>/){ + ### printing out the stats for the previous chromosome + $verbose and print join ("\t",$chromosome_name,$chromosome_CT_conversions,$chromosome_GA_conversions),"\n"; + ### resetting the chromosome transliteration counters + ($chromosome_CT_conversions,$chromosome_GA_conversions) = (0,0); + + ### Extracting chromosome name from the additional FastA header + $chromosome_name = extract_chromosome_name($_); + + ### alternatively, chromosomes can be written out into single-entry FastA files. This will only work for genomes with up to a few hundred chromosomes. + unless ($multi_fasta){ + my $bisulfite_CT_conversion_filename = "$CT_dir/$chromosome_name"; + $bisulfite_CT_conversion_filename =~ s/$/.CT_conversion.fa/; + open (CT_CONVERT,'>',$bisulfite_CT_conversion_filename) or die "Can't write to file $bisulfite_CT_conversion_filename: $!\n"; + + my $bisulfite_GA_conversion_filename = "$GA_dir/$chromosome_name"; + $bisulfite_GA_conversion_filename =~ s/$/.GA_conversion.fa/; + open (GA_CONVERT,'>',$bisulfite_GA_conversion_filename) or die "Can't write to file $bisulfite_GA_conversion_filename: $!\n"; + } + + print CT_CONVERT ">",$chromosome_name,"_CT_converted\n"; + print GA_CONVERT ">",$chromosome_name,"_GA_converted\n"; + } + + else{ + my $sequence = uc$_; + + ### (I) First replacing all ambiguous sequence characters (such as M,S,R....) by N (G,A,T,C,N and the line endings \r and \n are added to a character group) + + $sequence =~ s/[^ATCGN\n\r]/N/g; + + ### (II) Writing the chromosome out into a C->T converted version (equals forward strand conversion) + + my $CT_sequence = $sequence; + my $CT_transliterations_performed = ($CT_sequence =~ tr/C/T/); # converts all Cs into Ts + $total_CT_conversions += $CT_transliterations_performed; + $chromosome_CT_conversions += $CT_transliterations_performed; + + print CT_CONVERT $CT_sequence; + + ### (III) Writing the chromosome out in a G->A converted version of the forward strand (this is equivalent to reverse- + ### complementing the forward strand and then C->T converting it) + + my $GA_sequence = $sequence; + my $GA_transliterations_performed = ($GA_sequence =~ tr/G/A/); # converts all Gs to As on the forward strand + $total_GA_conversions += $GA_transliterations_performed; + $chromosome_GA_conversions += $GA_transliterations_performed; + + print GA_CONVERT $GA_sequence; + + } + } + $verbose and print join ("\t",$chromosome_name,$chromosome_CT_conversions,$chromosome_GA_conversions),"\n"; + } + close (CT_CONVERT) or die "Failed to close filehandle: $!\n"; + close (GA_CONVERT) or die "Failed to close filehandle: $!\n"; + + + print "\nTotal number of conversions performed:\n"; + print "C->T:\t$total_CT_conversions\n"; + print "G->A:\t$total_GA_conversions\n"; + + warn "\nStep II - Genome bisulfite conversions - completed\n\n\n"; +} + +sub extract_chromosome_name { + + my $header = shift; + + ## Bowtie extracts the first string after the initial > in the FASTA file, so we are doing this as well + + if ($header =~ s/^>//){ + my ($chromosome_name) = split (/\s+/,$header); + return $chromosome_name; + } + else{ + die "The specified chromosome file doesn't seem to be in FASTA format as required! $!\n"; + } +} + +sub create_bisulfite_genome_folders{ + + $verbose and print "Bismark Genome Preparation - Step I: Preparing folders\n\n"; + + # Ensuring a genome folder has been specified + if ($genome_folder){ + unless ($genome_folder =~ /\/$/){ + $genome_folder =~ s/$/\//; + } + $verbose and print "Path to genome folder specified: $genome_folder\n"; + chdir $genome_folder or die "Could't move to directory $genome_folder. Make sure the directory exists! $!"; + + # making the genome folder path abolsolute so it won't break if the path was specified relative + $genome_folder = getcwd; + unless ($genome_folder =~ /\/$/){ + $genome_folder =~ s/$/\//; + } + } + + else{ + $verbose and print "Genome folder was not provided as argument "; + while (1){ + print "Please specify a genome folder to be bisulfite converted:\n"; + $genome_folder = ; + chomp $genome_folder; + + # adding a trailing slash unless already present + unless ($genome_folder =~ /\/$/){ + $genome_folder =~ s/$/\//; + } + if (chdir $genome_folder){ + last; + } + else{ + warn "Could't move to directory $genome_folder! $!"; + } + } + } + + if ($path_to_bowtie){ + unless ($path_to_bowtie =~ /\/$/){ + $path_to_bowtie =~ s/$/\//; + } + if (chdir $path_to_bowtie){ + if ($bowtie2){ + $verbose and print "Path to Bowtie 2 specified: $path_to_bowtie\n"; + } + else{ + $verbose and print "Path to Bowtie (1) specified: $path_to_bowtie\n"; + } + } + else{ + die "There was an error with the path to bowtie: $!\n"; + } + } + + chdir $genome_folder or die "Could't move to directory $genome_folder. Make sure the directory exists! $!"; + + + # Exiting unless there are fastA files in the folder + my @filenames = <*.fa>; + + ### if there aren't any genomic files with the extension .fa we will look for files with the extension .fasta + unless (@filenames){ + @filenames = <*.fasta>; + } + + unless (@filenames){ + die "The specified genome folder $genome_folder does not contain any sequence files in FastA format (with .fa or .fasta file extensions\n"; + } + + warn "Bisulfite Genome Indexer version $bismark_version (last modified 17 Nov 2011)\n\n"; + sleep (3); + + # creating a directory inside the genome folder to store the bisfulfite genomes unless it already exists + my $bisulfite_dir = "${genome_folder}Bisulfite_Genome/"; + unless (-d $bisulfite_dir){ + mkdir $bisulfite_dir or die "Unable to create directory $bisulfite_dir $!\n"; + $verbose and print "Created Bisulfite Genome folder $bisulfite_dir\n"; + } + else{ + while (1){ + print "\nA directory called $bisulfite_dir already exists. Bisulfite converted sequences and/or already existing Bowtie (1 or 2) indexes might be overwritten!\nDo you want to continue anyway?\t"; + my $proceed = ; + chomp $proceed; + if ($proceed =~ /^y/i ){ + last; + } + elsif ($proceed =~ /^n/i){ + die "Terminated by user\n\n"; + } + } + } + + ### as of version 0.6.0 the Bismark indexer will no longer delete the Bisulfite_Genome directory if it was present already, since it could store the Bowtie 1 or 2 indexes already + # removing any existing files and subfolders in the bisulfite directory (the specified directory won't be deleted) + # rmtree($bisulfite_dir, {verbose => 1,keep_root => 1}); + # unless (-d $bisulfite_dir){ # had to add this after changing remove_tree to rmtree // suggested by Samantha Cooper @ Illumina + # mkdir $bisulfite_dir or die "Unable to create directory $bisulfite_dir $!\n"; + # } + # } + + chdir $bisulfite_dir or die "Unable to move to $bisulfite_dir\n"; + $CT_dir = "${bisulfite_dir}CT_conversion/"; + $GA_dir = "${bisulfite_dir}GA_conversion/"; + + # creating 2 subdirectories to store a C->T (forward strand conversion) and a G->A (reverse strand conversion) + # converted version of the genome + unless (-d $CT_dir){ + mkdir $CT_dir or die "Unable to create directory $CT_dir $!\n"; + $verbose and print "Created Bisulfite Genome folder $CT_dir\n"; + } + unless (-d $GA_dir){ + mkdir $GA_dir or die "Unable to create directory $GA_dir $!\n"; + $verbose and print "Created Bisulfite Genome folder $GA_dir\n"; + } + + # moving back to the original genome folder + chdir $genome_folder or die "Could't move to directory $genome_folder $!"; + # $verbose and print "Moved back to genome folder folder $genome_folder\n"; + warn "\nStep I - Prepare genome folders - completed\n\n\n"; + return @filenames; +} + +sub print_helpfile{ + print << 'HOW_TO'; + + +DESCRIPTION + +This script is supposed to convert a specified reference genome into two different bisulfite +converted versions and index them for alignments with Bowtie 1 (default), or Bowtie 2. The first +bisulfite genome will have all Cs converted to Ts (C->T), and the other one will have all Gs +converted to As (G->A). Both bisulfite genomes will be stored in subfolders within the reference +genome folder. Once the bisulfite conversion has been completed the program will fork and launch +two simultaneous instances of the bowtie 1 or 2 indexer (bowtie-build or bowtie2-build). Be aware +that the indexing process can take up to several hours; this will mainly depend on genome size +and system resources. + + + + +The following is a brief description of command line options and arguments to control the +Bismark Genome Preparation script: + + +USAGE: bismark_genome_preparation [options] + + +OPTIONS: + +--help/--man Displays this help filea and exits. + +--version Displays version information and exits. + +--verbose Print verbose output for more details or debugging. + +--path_to_bowtie The full path to the Bowtie 1 or Bowtie 2 installation on your system.If + the path is not provided as an option you will be prompted for it. + +--bowtie2 This will create bisulfite indexes for Bowtie 2. (Default: Bowtie 1). + +--single_fasta Instruct the Bismark Indexer to write the converted genomes into + single-entry FastA files instead of making one multi-FastA file (MFA) + per chromosome. This might be useful if individual bisulfite converted + chromosomes are needed (e.g. for debugging), however it can cause a + problem with indexing if the number of chromosomes is vast (this is likely + to be in the range of several thousand files; the operating system can + only handle lists up to a certain length, and some newly assembled + genomes may contain 20000-50000 contigs of scaffold files which do exceed + this list length limit). + + +ARGUMENTS: + + The path to the folder containing the genome to be bisulfite converted. + At the current time Bismark Genome Preparation expects one or more fastA + files in the folder (with the file extension: .fa or .fasta). If the path + is not provided as an argument you will be prompted for it. + + + +This script was last modified on 18 Nov 2011. +HOW_TO +} diff --git a/lib/tool_shed/test/test_data/repos/bismark/1/bismark_methylation_extractor.py b/lib/tool_shed/test/test_data/repos/bismark/1/bismark_methylation_extractor.py new file mode 100644 index 000000000000..cb79d1ecf590 --- /dev/null +++ b/lib/tool_shed/test/test_data/repos/bismark/1/bismark_methylation_extractor.py @@ -0,0 +1,186 @@ +#!/usr/bin/env python + +import argparse, os, shutil, subprocess, sys, tempfile, fileinput +import zipfile +from glob import glob + +def stop_err( msg ): + sys.stderr.write( "%s\n" % msg ) + sys.exit() + +def zipper(dir, zip_file): + zip = zipfile.ZipFile(zip_file, 'w', compression=zipfile.ZIP_DEFLATED) + root_len = len(os.path.abspath(dir)) + for root, dirs, files in os.walk(dir): + archive_root = os.path.abspath(root)[root_len:] + for f in files: + fullpath = os.path.join(root, f) + archive_name = os.path.join(archive_root, f) + zip.write(fullpath, archive_name, zipfile.ZIP_DEFLATED) + zip.close() + return zip_file + +def __main__(): + #Parse Command Line + parser = argparse.ArgumentParser(description='Wrapper for the bismark methylation caller.') + + # input options + parser.add_argument( '--infile', help='Input file in SAM format.' ) + parser.add_argument( '--single-end', dest='single_end', action="store_true" ) + parser.add_argument( '--paired-end', dest='paired_end', action="store_true" ) + + parser.add_argument( '--report-file', dest='report_file' ) + parser.add_argument( '--comprehensive', action="store_true" ) + parser.add_argument( '--merge-non-cpg', dest='merge_non_cpg', action="store_true" ) + parser.add_argument( '--no-overlap', dest='no_overlap', action="store_true" ) + parser.add_argument( '--compress' ) + parser.add_argument( '--ignore-bps', dest='ignore_bps', type=int ) + + # OT - original top strand + parser.add_argument( '--cpg_ot' ) + parser.add_argument( '--chg_ot' ) + parser.add_argument( '--chh_ot' ) + # CTOT - complementary to original top strand + parser.add_argument( '--cpg_ctot' ) + parser.add_argument( '--chg_ctot' ) + parser.add_argument( '--chh_ctot' ) + # OB - original bottom strand + parser.add_argument( '--cpg_ob' ) + parser.add_argument( '--chg_ob' ) + parser.add_argument( '--chh_ob' ) + # CTOT - complementary to original bottom strand + parser.add_argument( '--cpg_ctob' ) + parser.add_argument( '--chg_ctob' ) + parser.add_argument( '--chh_ctob' ) + + parser.add_argument( '--cpg_context' ) + parser.add_argument( '--chg_context' ) + parser.add_argument( '--chh_context' ) + + parser.add_argument( '--non_cpg_context' ) + + parser.add_argument( '--non_cpg_context_ot' ) + parser.add_argument( '--non_cpg_context_ctot' ) + parser.add_argument( '--non_cpg_context_ob' ) + parser.add_argument( '--non_cpg_context_ctob' ) + + args = parser.parse_args() + + + # Build methylation extractor command + output_dir = tempfile.mkdtemp() + cmd = 'bismark_methylation_extractor --no_header -o %s %s %s' + + additional_opts = '' + # Set up all options + if args.single_end: + additional_opts += ' --single-end ' + else: + additional_opts += ' --paired-end ' + if args.no_overlap: + additional_opts += ' --no_overlap ' + if args.ignore_bps: + additional_opts += ' --ignore %s ' % args.ignore_bps + if args.comprehensive: + additional_opts += ' --comprehensive ' + if args.merge_non_cpg: + additional_opts += ' --merge_non_CpG ' + if args.report_file: + additional_opts += ' --report ' + + + # Final command: + cmd = cmd % (output_dir, additional_opts, args.infile) + + # Run + try: + tmp_out = tempfile.NamedTemporaryFile().name + tmp_stdout = open( tmp_out, 'wb' ) + tmp_err = tempfile.NamedTemporaryFile().name + tmp_stderr = open( tmp_err, 'wb' ) + proc = subprocess.Popen( args=cmd, shell=True, cwd=".", stdout=tmp_stdout, stderr=tmp_stderr ) + returncode = proc.wait() + tmp_stderr.close() + # get stderr, allowing for case where it's very large + tmp_stderr = open( tmp_err, 'rb' ) + stderr = '' + buffsize = 1048576 + try: + while True: + stderr += tmp_stderr.read( buffsize ) + if not stderr or len( stderr ) % buffsize != 0: + break + except OverflowError: + pass + tmp_stdout.close() + tmp_stderr.close() + if returncode != 0: + raise Exception, stderr + + # TODO: look for errors in program output. + except Exception, e: + stop_err( 'Error in bismark methylation extractor:\n' + str( e ) ) + + + # collect and copy output files + + if args.compress: + zipper(output_dir, args.compress) + + + if args.cpg_ot: + shutil.move( glob(os.path.join( output_dir, '*CpG_OT_*'))[0], args.cpg_ot ) + if args.chg_ot: + shutil.move( glob(os.path.join( output_dir, '*CHG_OT_*'))[0], args.chg_ot ) + if args.chh_ot: + shutil.move( glob(os.path.join( output_dir, '*CHH_OT_*'))[0], args.chh_ot ) + if args.cpg_ctot: + shutil.move( glob(os.path.join( output_dir, '*CpG_CTOT_*'))[0], args.cpg_ctot ) + if args.chg_ctot: + shutil.move( glob(os.path.join( output_dir, '*CHG_CTOT_*'))[0], args.chg_ctot ) + if args.chh_ctot: + shutil.move( glob(os.path.join( output_dir, '*CHH_CTOT_*'))[0], args.chh_ctot ) + if args.cpg_ob: + shutil.move( glob(os.path.join( output_dir, '*CpG_OB_*'))[0], args.cpg_ob ) + if args.chg_ob: + shutil.move( glob(os.path.join( output_dir, '*CHG_OB_*'))[0], args.chg_ob ) + if args.chh_ob: + shutil.move( glob(os.path.join( output_dir, '*CHH_OB_*'))[0], args.chh_ob ) + if args.cpg_ctob: + shutil.move( glob(os.path.join( output_dir, '*CpG_CTOB_*'))[0], args.cpg_ctob ) + if args.chg_ctob: + shutil.move( glob(os.path.join( output_dir, '*CHG_CTOB_*'))[0], args.chg_ctob ) + if args.chh_ctob: + shutil.move( glob(os.path.join( output_dir, '*CHH_CTOB_*'))[0], args.chh_ctob ) + + # context-dependent methylation output files + if args.cpg_context: + shutil.move( glob(os.path.join( output_dir, '*CpG_context_*'))[0], args.cpg_context ) + if args.chg_context: + shutil.move( glob(os.path.join( output_dir, '*CHG_context_*'))[0], args.chg_context ) + if args.chh_context: + shutil.move( glob(os.path.join( output_dir, '*CHH_context_*'))[0], args.chh_context ) + + if args.non_cpg_context: + shutil.move( glob(os.path.join( output_dir, '*Non_CpG_context_*'))[0], args.non_cpg_context ) + + if args.non_cpg_context_ot: + shutil.move( glob(os.path.join( output_dir, '*Non_CpG_OT_*'))[0], args.non_cpg_context_ot ) + if args.non_cpg_context_ctot: + shutil.move( glob(os.path.join( output_dir, '*Non_CpG_CTOT_*'))[0], args.non_cpg_context_ctot ) + if args.non_cpg_context_ob: + shutil.move( glob(os.path.join( output_dir, '*Non_CpG_OB_*'))[0], args.non_cpg_context_ob ) + if args.non_cpg_context_ctob: + shutil.move( glob(os.path.join( output_dir, '*Non_CpG_CTOB_*'))[0], args.non_cpg_context_ctob ) + + + + if args.report_file: + shutil.move( glob(os.path.join( output_dir, '*_splitting_report*'))[0], args.report_file ) + + + # Clean up temp dirs + if os.path.exists( output_dir ): + shutil.rmtree( output_dir ) + +if __name__=="__main__": __main__() diff --git a/lib/tool_shed/test/test_data/bismark/bismark_methylation_extractor.xml b/lib/tool_shed/test/test_data/repos/bismark/1/bismark_methylation_extractor.xml similarity index 100% rename from lib/tool_shed/test/test_data/bismark/bismark_methylation_extractor.xml rename to lib/tool_shed/test/test_data/repos/bismark/1/bismark_methylation_extractor.xml diff --git a/lib/tool_shed/test/test_data/repos/bismark/1/bismark_wrapper.py b/lib/tool_shed/test/test_data/repos/bismark/1/bismark_wrapper.py new file mode 100644 index 000000000000..606fa428bd77 --- /dev/null +++ b/lib/tool_shed/test/test_data/repos/bismark/1/bismark_wrapper.py @@ -0,0 +1,288 @@ +#!/usr/bin/env python + +import argparse, os, shutil, subprocess, sys, tempfile, fileinput +import fileinput +from glob import glob + +def stop_err( msg ): + sys.stderr.write( "%s\n" % msg ) + sys.exit() + +def __main__(): + #Parse Command Line + parser = argparse.ArgumentParser(description='Wrapper for the bismark bisulfite mapper.') + parser.add_argument( '-p', '--num-threads', dest='num_threads', + type=int, default=4, help='Use this many threads to align reads. The default is 4.' ) + + parser.add_argument( '--bismark_path', dest='bismark_path', help='Path to the bismark perl scripts' ) + + parser.add_argument( '--bowtie2', action='store_true', default=False, help='Running bismark with bowtie2 and not with bowtie.' ) + + # input options + parser.add_argument( '--own-file', dest='own_file', help='' ) + parser.add_argument( '-D', '--indexes-path', dest='index_path', help='Indexes directory; location of .ebwt and .fa files.' ) + parser.add_argument( '-O', '--output', dest='output' ) + parser.add_argument( '--output-report-file', dest='output_report_file' ) + parser.add_argument( '--suppress-header', dest='suppress_header', action="store_true" ) + + parser.add_argument( '--mate-paired', dest='mate_paired', action='store_true', help='Reads are mate-paired', default=False) + + + parser.add_argument( '-1', '--mate1', dest='mate1', + help='The forward reads file in Sanger FASTQ or FASTA format.' ) + parser.add_argument( '-2', '--mate2', dest='mate2', + help='The reverse reads file in Sanger FASTQ or FASTA format.' ) + + parser.add_argument( '--output-unmapped-reads', dest='output_unmapped_reads', + help='Additional output file with unmapped reads (single-end).' ) + parser.add_argument( '--output-unmapped-reads-l', dest='output_unmapped_reads_l', + help='File name for unmapped reads (left, paired-end).' ) + parser.add_argument( '--output-unmapped-reads-r', dest='output_unmapped_reads_r', + help='File name for unmapped reads (right, paired-end).' ) + + + parser.add_argument( '--output-suppressed-reads', dest='output_suppressed_reads', + help='Additional output file with suppressed reads (single-end).' ) + parser.add_argument( '--output-suppressed-reads-l', dest='output_suppressed_reads_l', + help='File name for suppressed reads (left, paired-end).' ) + parser.add_argument( '--output-suppressed-reads-r', dest='output_suppressed_reads_r', + help='File name for suppressed reads (right, paired-end).' ) + + + parser.add_argument( '--single-paired', dest='single_paired', + help='The single-end reads file in Sanger FASTQ or FASTA format.' ) + + parser.add_argument( '--fastq', action='store_true', help='Query filetype is in FASTQ format') + parser.add_argument( '--fasta', action='store_true', help='Query filetype is in FASTA format') + parser.add_argument( '--phred64-quals', dest='phred64', action="store_true" ) + + + parser.add_argument( '--skip-reads', dest='skip_reads', type=int ) + parser.add_argument( '--qupto', type=int) + + + # paired end options + parser.add_argument( '-I', '--minins', dest='min_insert' ) + parser.add_argument( '-X', '--maxins', dest='max_insert' ) + parser.add_argument( '--no-mixed', dest='no_mixed', action="store_true" ) + parser.add_argument( '--no-discordant', dest='no_discordant', action="store_true" ) + + #parse general options + # default 20 + parser.add_argument( '--seed-len', dest='seed_len', type=int) + # default 15 + parser.add_argument( '--seed-extention-attempts', dest='seed_extention_attempts', type=int ) + # default 0 + parser.add_argument( '--seed-mismatches', dest='seed_mismatches', type=int ) + # default 2 + parser.add_argument( '--max-reseed', dest='max_reseed', type=int ) + """ + # default 70 + parser.add_argument( '--maqerr', dest='maqerr', type=int ) + """ + + """ + The number of megabytes of memory a given thread is given to store path + descriptors in --best mode. Best-first search must keep track of many paths + at once to ensure it is always extending the path with the lowest cumulative + cost. Bowtie tries to minimize the memory impact of the descriptors, but + they can still grow very large in some cases. If you receive an error message + saying that chunk memory has been exhausted in --best mode, try adjusting + this parameter up to dedicate more memory to the descriptors. Default: 512. + """ + parser.add_argument( '--chunkmbs', type=int, default=512 ) + + args = parser.parse_args() + + # Create bismark index if necessary. + index_dir = "" + if args.own_file: + """ + Create a temporary index with the offered files from the user. + Utilizing the script: bismark_genome_preparation + bismark_genome_preparation --bowtie2 hg19/ + """ + tmp_index_dir = tempfile.mkdtemp() + index_path = os.path.join( tmp_index_dir, '.'.join( os.path.split( args.own_file )[1].split( '.' )[:-1] ) ) + try: + """ + Create a hard link pointing to args.own_file named 'index_path'.fa. + """ + os.symlink( args.own_file, index_path + '.fa' ) + except Exception, e: + if os.path.exists( tmp_index_dir ): + shutil.rmtree( tmp_index_dir ) + stop_err( 'Error in linking the reference database.\n' + str( e ) ) + # bismark_genome_preparation needs the complete path to the folder in which the database is stored + if args.bowtie2: + cmd_index = 'bismark_genome_preparation --bowtie2 %s ' % ( tmp_index_dir ) + else: + cmd_index = 'bismark_genome_preparation %s ' % ( tmp_index_dir ) + if args.bismark_path: + # add the path to the bismark perl scripts, that is needed for galaxy + cmd_index = '%s/%s' % (args.bismark_path, cmd_index) + try: + tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name + tmp_stderr = open( tmp, 'wb' ) + proc = subprocess.Popen( args=cmd_index, shell=True, cwd=tmp_index_dir, stdout=open(os.devnull, 'wb'), stderr=tmp_stderr.fileno() ) + returncode = proc.wait() + tmp_stderr.close() + # get stderr, allowing for case where it's very large + tmp_stderr = open( tmp, 'rb' ) + stderr = '' + buffsize = 1048576 + try: + while True: + stderr += tmp_stderr.read( buffsize ) + if not stderr or len( stderr ) % buffsize != 0: + break + except OverflowError: + pass + tmp_stderr.close() + if returncode != 0: + raise Exception, stderr + except Exception, e: + if os.path.exists( tmp_index_dir ): + shutil.rmtree( tmp_index_dir ) + stop_err( 'Error indexing reference sequence\n' + str( e ) ) + index_dir = tmp_index_dir + else: + index_dir = args.index_path + + # Build bismark command + tmp_bismark_dir = tempfile.mkdtemp() + output_dir = os.path.join( tmp_bismark_dir, 'results') + cmd = 'bismark %(args)s --temp_dir %(tmp_bismark_dir)s -o %(output_dir)s --quiet %(genome_folder)s %(reads)s' + if args.bismark_path: + # add the path to the bismark perl scripts, that is needed for galaxy + cmd = '%s/%s' % (args.bismark_path, cmd) + + arguments = { + 'genome_folder': index_dir, + 'args': '', + 'tmp_bismark_dir': tmp_bismark_dir, + 'output_dir': output_dir, + } + + additional_opts = '' + # Set up the reads + if args.mate_paired: + # paired-end reads library + reads = '-1 %s ' % ( args.mate1 ) + reads += ' -2 %s ' % ( args.mate2 ) + additional_opts += ' -I %s -X %s ' % (args.min_insert, args.max_insert) + else: + # single paired reads library + reads = ' %s ' % ( args.single_paired ) + + + if not args.bowtie2: + # use bowtie specific options + additional_opts += ' --best ' + if args.seed_mismatches: + # --seedmms + additional_opts += ' -n %s ' % args.seed_mismatches + if args.seed_len: + # --seedlen + additional_opts += ' -l %s ' % args.seed_len + + # alignment options + if args.bowtie2: + additional_opts += ' -p %s --bowtie2 ' % args.num_threads + if args.seed_mismatches: + additional_opts += ' -N %s ' % args.seed_mismatches + if args.seed_len: + additional_opts += ' -L %s ' % args.seed_len + if args.seed_extention_attempts: + additional_opts += ' -D %s ' % args.seed_extention_attempts + if args.max_reseed: + additional_opts += ' -R %s ' % args.max_reseed + if args.no_discordant: + additional_opts += ' --no-discordant ' + if args.no_mixed: + additional_opts += ' --no-mixed ' + """ + if args.maqerr: + additional_opts += ' --maqerr %s ' % args.maqerr + """ + if args.skip_reads: + additional_opts += ' --skip %s ' % args.skip_reads + if args.qupto: + additional_opts += ' --qupto %s ' % args.qupto + if args.phred64: + additional_opts += ' --phred64-quals ' + if args.suppress_header: + additional_opts += ' --sam-no-hd ' + if args.output_unmapped_reads or ( args.output_unmapped_reads_l and args.output_unmapped_reads_r): + additional_opts += ' --un ' + if args.output_suppressed_reads or ( args.output_suppressed_reads_l and args.output_suppressed_reads_r): + additional_opts += ' --ambiguous ' + + arguments.update( {'args': additional_opts, 'reads': reads} ) + + # Final command: + cmd = cmd % arguments + + # Run + try: + tmp_out = tempfile.NamedTemporaryFile().name + tmp_stdout = open( tmp_out, 'wb' ) + tmp_err = tempfile.NamedTemporaryFile().name + tmp_stderr = open( tmp_err, 'wb' ) + proc = subprocess.Popen( args=cmd, shell=True, cwd=".", stdout=tmp_stdout, stderr=tmp_stderr ) + returncode = proc.wait() + tmp_stderr.close() + # get stderr, allowing for case where it's very large + tmp_stderr = open( tmp_err, 'rb' ) + stderr = '' + buffsize = 1048576 + try: + while True: + stderr += tmp_stderr.read( buffsize ) + if not stderr or len( stderr ) % buffsize != 0: + break + except OverflowError: + pass + tmp_stdout.close() + tmp_stderr.close() + if returncode != 0: + raise Exception, stderr + + # TODO: look for errors in program output. + except Exception, e: + stop_err( 'Error in bismark:\n' + str( e ) ) + + + # collect and copy output files + """ + if args.output_report_file: + output_report_file = open(args.output_report_file, 'w+') + for line in fileinput.input(glob( os.path.join( output_dir, '*.txt') )): + output_report_file.write(line) + output_report_file.close() + """ + + if args.output_suppressed_reads: + shutil.move( glob(os.path.join( output_dir, '*ambiguous_reads.txt'))[0], args.output_suppressed_reads ) + if args.output_suppressed_reads_l: + shutil.move( glob(os.path.join( output_dir, '*ambiguous_reads_1.txt'))[0], args.output_suppressed_reads_l ) + if args.output_suppressed_reads_r: + shutil.move( glob(os.path.join( output_dir, '*ambiguous_reads_2.txt'))[0], args.output_suppressed_reads_r ) + + if args.output_unmapped_reads: + shutil.move( glob(os.path.join( output_dir, '*unmapped_reads.txt'))[0], args.output_unmapped_reads ) + if args.output_unmapped_reads_l: + shutil.move( glob(os.path.join( output_dir, '*unmapped_reads_1.txt'))[0], args.output_unmapped_reads_l ) + if args.output_unmapped_reads_r: + shutil.move( glob(os.path.join( output_dir, '*unmapped_reads_2.txt'))[0], args.output_unmapped_reads_r ) + + shutil.move( glob( os.path.join( output_dir, '*.sam'))[0] , args.output) + + # Clean up temp dirs + if args.own_file: + if os.path.exists( tmp_index_dir ): + shutil.rmtree( tmp_index_dir ) + if os.path.exists( tmp_bismark_dir ): + shutil.rmtree( tmp_bismark_dir ) + +if __name__=="__main__": __main__() diff --git a/lib/tool_shed/test/test_data/repos/bismark/1/tool_data_table_conf.xml.sample b/lib/tool_shed/test/test_data/repos/bismark/1/tool_data_table_conf.xml.sample new file mode 100644 index 000000000000..9e62763c741d --- /dev/null +++ b/lib/tool_shed/test/test_data/repos/bismark/1/tool_data_table_conf.xml.sample @@ -0,0 +1,13 @@ + + + + + value, dbkey, name, path + +
+ + + value, dbkey, name, path + +
+
diff --git a/lib/tool_shed/test/test_data/repos/bismark/1/tool_dependencies.xml b/lib/tool_shed/test/test_data/repos/bismark/1/tool_dependencies.xml new file mode 100644 index 000000000000..776d2aa33442 --- /dev/null +++ b/lib/tool_shed/test/test_data/repos/bismark/1/tool_dependencies.xml @@ -0,0 +1,61 @@ + + + + $REPOSITORY_INSTALL_DIR + + + + + http://downloads.sourceforge.net/project/bowtie-bio/bowtie2/2.0.0-beta7/bowtie2-2.0.0-beta7-source.zip + make + + bowtie2 + $INSTALL_DIR/bin + + + bowtie2-align + $INSTALL_DIR/bin + + + bowtie2-build + $INSTALL_DIR/bin + + chmod +x $INSTALL_DIR/bin/bowtie2 + + $INSTALL_DIR/bin + + + + +Compiling bowtie2 requires zlib and libpthread to be present on your system. + + + + + + http://downloads.sourceforge.net/project/bowtie-bio/bowtie/0.12.8/bowtie-0.12.8-src.zip + make + + bowtie + $INSTALL_DIR/bin + + + bowtie-inspect + $INSTALL_DIR/bin + + + bowtie-build + $INSTALL_DIR/bin + + + $INSTALL_DIR/bin + + + + + + + + + + diff --git a/lib/tool_shed/test/test_data/repos/emboss_5_0470/0/emboss_antigenic.xml b/lib/tool_shed/test/test_data/repos/emboss_5_0470/0/emboss_antigenic.xml new file mode 100644 index 000000000000..1f3793c6007d --- /dev/null +++ b/lib/tool_shed/test/test_data/repos/emboss_5_0470/0/emboss_antigenic.xml @@ -0,0 +1,58 @@ + + Predicts potentially antigenic regions of a protein sequence, using the method of Kolaskar and Tongaonkar. + emboss + antigenic -sequence $input1 -outfile $out_file1 -minlen $minlen -rformat2 $out_format1 -auto + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + You can view the original documentation here_. + + .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/antigenic.html + +------ + +**Citation** + +For the underlying tool, please cite `Rice P, Longden I, Bleasby A. EMBOSS: the European Molecular Biology Open Software Suite. Trends Genet. 2000 Jun;16(6):276-7. <http://www.ncbi.nlm.nih.gov/pubmed/10827456>`_ + +If you use this tool in Galaxy, please cite `Blankenberg D, Taylor J, Schenck I, He J, Zhang Y, Ghent M, Veeraraghavan N, Albert I, Miller W, Makova KD, Hardison RC, Nekrutenko A. A framework for collaborative analysis of ENCODE data: making large-scale analyses biologist-friendly. Genome Res. 2007 Jun;17(6):960-4. <http://www.ncbi.nlm.nih.gov/pubmed/17568012>`_ + + + \ No newline at end of file diff --git a/lib/tool_shed/test/test_data/repos/emboss_5_0470/0/emboss_format_corrector.py b/lib/tool_shed/test/test_data/repos/emboss_5_0470/0/emboss_format_corrector.py new file mode 100644 index 000000000000..3591cd8feaeb --- /dev/null +++ b/lib/tool_shed/test/test_data/repos/emboss_5_0470/0/emboss_format_corrector.py @@ -0,0 +1,53 @@ +#EMBOSS format corrector + +import operator +#from galaxy import datatypes + +#Properly set file formats after job run +def exec_after_process( app, inp_data, out_data, param_dict,tool, stdout, stderr): +#Properly set file formats before job run +#def exec_before_job(trans, inp_data, out_data, param_dict,tool): + #why isn't items an ordered list? + items = out_data.items() + #lets sort it ourselves.... + items = sorted(items, key=operator.itemgetter(0)) + #items is now sorted... + + #normal filetype correction + data_count=1 + for name, data in items: + outputType = param_dict.get( 'out_format'+str(data_count), None ) + #print "data_count",data_count, "name", name, "outputType", outputType + if outputType !=None: + if outputType == 'ncbi': + outputType = "fasta" + elif outputType == 'excel': + outputType = "tabular" + elif outputType == 'text': + outputType = "txt" + data = app.datatypes_registry.change_datatype(data, outputType) + app.model.context.add( data ) + app.model.context.flush() + data_count+=1 + + #html filetype correction + data_count=1 + for name, data in items: + wants_plot = param_dict.get( 'html_out'+str(data_count), None ) + ext = "html" + if wants_plot == "yes": + data = app.datatypes_registry.change_datatype(data, ext) + app.model.context.add( data ) + app.model.context.flush() + data_count+=1 + + #png file correction + data_count=1 + for name, data in items: + wants_plot = param_dict.get( 'plot'+str(data_count), None ) + ext = "png" + if wants_plot == "yes": + data = app.datatypes_registry.change_datatype(data, ext) + app.model.context.add( data ) + app.model.context.flush() + data_count+=1 diff --git a/lib/tool_shed/test/test_data/repos/emboss_5_0470/0/tool_dependencies.xml b/lib/tool_shed/test/test_data/repos/emboss_5_0470/0/tool_dependencies.xml new file mode 100644 index 000000000000..3c9b8f43ec1e --- /dev/null +++ b/lib/tool_shed/test/test_data/repos/emboss_5_0470/0/tool_dependencies.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/lib/tool_shed/test/test_data/repos/emboss_5_0470/1/emboss_antigenic.xml b/lib/tool_shed/test/test_data/repos/emboss_5_0470/1/emboss_antigenic.xml new file mode 100644 index 000000000000..1f3793c6007d --- /dev/null +++ b/lib/tool_shed/test/test_data/repos/emboss_5_0470/1/emboss_antigenic.xml @@ -0,0 +1,58 @@ + + Predicts potentially antigenic regions of a protein sequence, using the method of Kolaskar and Tongaonkar. + emboss + antigenic -sequence $input1 -outfile $out_file1 -minlen $minlen -rformat2 $out_format1 -auto + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + You can view the original documentation here_. + + .. _here: http://emboss.sourceforge.net/apps/release/5.0/emboss/apps/antigenic.html + +------ + +**Citation** + +For the underlying tool, please cite `Rice P, Longden I, Bleasby A. EMBOSS: the European Molecular Biology Open Software Suite. Trends Genet. 2000 Jun;16(6):276-7. <http://www.ncbi.nlm.nih.gov/pubmed/10827456>`_ + +If you use this tool in Galaxy, please cite `Blankenberg D, Taylor J, Schenck I, He J, Zhang Y, Ghent M, Veeraraghavan N, Albert I, Miller W, Makova KD, Hardison RC, Nekrutenko A. A framework for collaborative analysis of ENCODE data: making large-scale analyses biologist-friendly. Genome Res. 2007 Jun;17(6):960-4. <http://www.ncbi.nlm.nih.gov/pubmed/17568012>`_ + + + \ No newline at end of file diff --git a/lib/tool_shed/test/test_data/repos/emboss_5_0470/1/emboss_format_corrector.py b/lib/tool_shed/test/test_data/repos/emboss_5_0470/1/emboss_format_corrector.py new file mode 100644 index 000000000000..3591cd8feaeb --- /dev/null +++ b/lib/tool_shed/test/test_data/repos/emboss_5_0470/1/emboss_format_corrector.py @@ -0,0 +1,53 @@ +#EMBOSS format corrector + +import operator +#from galaxy import datatypes + +#Properly set file formats after job run +def exec_after_process( app, inp_data, out_data, param_dict,tool, stdout, stderr): +#Properly set file formats before job run +#def exec_before_job(trans, inp_data, out_data, param_dict,tool): + #why isn't items an ordered list? + items = out_data.items() + #lets sort it ourselves.... + items = sorted(items, key=operator.itemgetter(0)) + #items is now sorted... + + #normal filetype correction + data_count=1 + for name, data in items: + outputType = param_dict.get( 'out_format'+str(data_count), None ) + #print "data_count",data_count, "name", name, "outputType", outputType + if outputType !=None: + if outputType == 'ncbi': + outputType = "fasta" + elif outputType == 'excel': + outputType = "tabular" + elif outputType == 'text': + outputType = "txt" + data = app.datatypes_registry.change_datatype(data, outputType) + app.model.context.add( data ) + app.model.context.flush() + data_count+=1 + + #html filetype correction + data_count=1 + for name, data in items: + wants_plot = param_dict.get( 'html_out'+str(data_count), None ) + ext = "html" + if wants_plot == "yes": + data = app.datatypes_registry.change_datatype(data, ext) + app.model.context.add( data ) + app.model.context.flush() + data_count+=1 + + #png file correction + data_count=1 + for name, data in items: + wants_plot = param_dict.get( 'plot'+str(data_count), None ) + ext = "png" + if wants_plot == "yes": + data = app.datatypes_registry.change_datatype(data, ext) + app.model.context.add( data ) + app.model.context.flush() + data_count+=1 diff --git a/lib/tool_shed/test/test_data/emboss/0470_files/tool_dependencies.xml b/lib/tool_shed/test/test_data/repos/emboss_5_0470/1/tool_dependencies.xml similarity index 100% rename from lib/tool_shed/test/test_data/emboss/0470_files/tool_dependencies.xml rename to lib/tool_shed/test/test_data/repos/emboss_5_0470/1/tool_dependencies.xml diff --git a/lib/tool_shed/test/test_data/emboss/libx11_proto/first_tool_dependency/tool_dependencies.xml b/lib/tool_shed/test/test_data/repos/libx11_proto/0/tool_dependencies.xml similarity index 100% rename from lib/tool_shed/test/test_data/emboss/libx11_proto/first_tool_dependency/tool_dependencies.xml rename to lib/tool_shed/test/test_data/repos/libx11_proto/0/tool_dependencies.xml diff --git a/lib/tool_shed/test/test_data/emboss/libx11_proto/second_tool_dependency/tool_dependencies.xml b/lib/tool_shed/test/test_data/repos/libx11_proto/1/tool_dependencies.xml similarity index 100% rename from lib/tool_shed/test/test_data/emboss/libx11_proto/second_tool_dependency/tool_dependencies.xml rename to lib/tool_shed/test/test_data/repos/libx11_proto/1/tool_dependencies.xml diff --git a/lib/tool_shed/test/test_data/emboss/emboss_5_0_0/first_tool_dependency/tool_dependencies.xml b/lib/tool_shed/test/test_data/repos/package_emboss_5_0_0_0470/0/tool_dependencies.xml similarity index 100% rename from lib/tool_shed/test/test_data/emboss/emboss_5_0_0/first_tool_dependency/tool_dependencies.xml rename to lib/tool_shed/test/test_data/repos/package_emboss_5_0_0_0470/0/tool_dependencies.xml diff --git a/lib/tool_shed/test/test_data/emboss/emboss_5_0_0/second_tool_dependency/tool_dependencies.xml b/lib/tool_shed/test/test_data/repos/package_emboss_5_0_0_0470/1/tool_dependencies.xml similarity index 100% rename from lib/tool_shed/test/test_data/emboss/emboss_5_0_0/second_tool_dependency/tool_dependencies.xml rename to lib/tool_shed/test/test_data/repos/package_emboss_5_0_0_0470/1/tool_dependencies.xml diff --git a/lib/tool_shed/test/test_data/safetar_with_symlink.tar b/lib/tool_shed/test/test_data/safetar_with_symlink.tar new file mode 100644 index 000000000000..a7810960f691 Binary files /dev/null and b/lib/tool_shed/test/test_data/safetar_with_symlink.tar differ diff --git a/lib/tool_shed/util/repository_content_util.py b/lib/tool_shed/util/repository_content_util.py index 418e9ca9a4e7..3f04ff5581da 100644 --- a/lib/tool_shed/util/repository_content_util.py +++ b/lib/tool_shed/util/repository_content_util.py @@ -24,6 +24,20 @@ from tool_shed.webapp.model import Repository +def tar_open(uploaded_file): + isgzip = False + isbz2 = False + isgzip = checkers.is_gzip(uploaded_file) + if not isgzip: + isbz2 = checkers.is_bz2(uploaded_file) + if isgzip or isbz2: + # Open for reading with transparent compression. + tar = tarfile.open(uploaded_file, "r:*") + else: + tar = tarfile.open(uploaded_file) + return tar + + def upload_tar( app: "ToolShedApp", host: str, @@ -39,17 +53,7 @@ def upload_tar( tdah: Optional[ToolDependencyAttributeHandler] = None, ) -> ChangeResponseT: if tar is None: - isgzip = False - isbz2 = False - isgzip = checkers.is_gzip(uploaded_file) - if not isgzip: - isbz2 = checkers.is_bz2(uploaded_file) - if isgzip or isbz2: - # Open for reading with transparent compression. - tar = tarfile.open(uploaded_file, "r:*") - else: - tar = tarfile.open(uploaded_file) - + tar = tar_open(uploaded_file) rdah = rdah or RepositoryDependencyAttributeHandler(app, unpopulate=False) tdah = tdah or ToolDependencyAttributeHandler(app, unpopulate=False) # Upload a tar archive of files. diff --git a/lib/tool_shed/webapp/api/repositories.py b/lib/tool_shed/webapp/api/repositories.py index a0d5df75247e..16c13f67ad53 100644 --- a/lib/tool_shed/webapp/api/repositories.py +++ b/lib/tool_shed/webapp/api/repositories.py @@ -41,7 +41,6 @@ from tool_shed.metadata import repository_metadata_manager from tool_shed.repository_types import util as rt_util from tool_shed.util import ( - commit_util, encoding_util, metadata_util, repository_util, diff --git a/mypy.ini b/mypy.ini index 6a3b9a641717..fe479abe7da6 100644 --- a/mypy.ini +++ b/mypy.ini @@ -4,7 +4,7 @@ plugins = pydantic.mypy show_error_codes = True ignore_missing_imports = True check_untyped_defs = True -exclude = lib/galaxy/tools/bundled|test/functional +exclude = lib/galaxy/tools/bundled|test/functional|.*tool_shed/test/test_data/repos pretty = True no_implicit_reexport = True no_implicit_optional = True diff --git a/pyproject.toml b/pyproject.toml index 3a74fe4a38ca..6ab9b16296be 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,6 +5,7 @@ include = '\.pyi?$' extend-exclude = ''' ^/( | packages + | lib/tool_shed/test/test_data/repos )/ ''' diff --git a/test/unit/webapps/test_tool_validation.py b/test/unit/webapps/test_tool_validation.py index 02326c67e397..a1f421120ecc 100644 --- a/test/unit/webapps/test_tool_validation.py +++ b/test/unit/webapps/test_tool_validation.py @@ -1,7 +1,5 @@ import os import shutil -import tarfile -import tempfile from contextlib import contextmanager from galaxy.app_unittest_utils.galaxy_mock import MockApp @@ -9,14 +7,15 @@ from galaxy.util import galaxy_directory from tool_shed.tools.tool_validator import ToolValidator -BISMARK_TAR = os.path.join(galaxy_directory(), "lib/tool_shed/test/test_data/bismark/bismark.tar") +BISMARK_DIR = os.path.join(galaxy_directory(), "lib/tool_shed/test/test_data/repos/bismark/0") BOWTIE2_INDICES = os.path.join( galaxy_directory(), "lib/tool_shed/test/test_data/bowtie2_loc_sample/bowtie2_indices.loc.sample" ) def test_validate_valid_tool(): - with get_tool_validator() as tv, setup_bismark() as repo_dir: + repo_dir = BISMARK_DIR + with get_tool_validator() as tv: full_path = os.path.join(repo_dir, "bismark_methylation_extractor.xml") tool, valid, message = tv.load_tool_from_config(repository_id=None, full_path=full_path) assert tool.name == "Bismark" @@ -27,14 +26,16 @@ def test_validate_valid_tool(): def test_tool_validation_denies_allow_codefile(): - with get_tool_validator() as tv, setup_bismark() as repo_dir: + repo_dir = BISMARK_DIR + with get_tool_validator() as tv: full_path = os.path.join(repo_dir, "bismark_methylation_extractor.xml") tool, valid, message = tv.load_tool_from_config(repository_id=None, full_path=full_path) assert tool._allow_code_files is False def test_validate_tool_without_index(): - with get_tool_validator() as tv, setup_bismark() as repo_dir: + repo_dir = BISMARK_DIR + with get_tool_validator() as tv: full_path = os.path.join(repo_dir, "bismark_bowtie2_wrapper.xml") tool, valid, message = tv.load_tool_from_config(repository_id=None, full_path=full_path) assert valid is True @@ -63,15 +64,6 @@ def test_validate_tool_without_index(): assert not tool.params_with_missing_index_file -@contextmanager -def setup_bismark(): - repo_dir = tempfile.mkdtemp() - with tarfile.open(BISMARK_TAR) as archive: - archive.extractall(repo_dir) - yield repo_dir - shutil.rmtree(repo_dir, ignore_errors=True) - - @contextmanager def get_tool_validator(): app = MockApp()