chore: secrets rotation and updates.

Signed-off-by: Matteo Manica <[email protected]>
GT4SD · Feb 19, 2025 · 5424e6b · 5424e6b
1 parent 5bcea3d
commit 5424e6b
Show file tree

Hide file tree

Showing 5 changed files with 217 additions and 35 deletions.
diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
@@ -16,33 +16,47 @@ jobs:
     runs-on: ubuntu-latest
     permissions:
       contents: read
+    defaults:
+      run:
+        shell: bash -l {0} # for conda command
+    env:
+      GIT_CLONE_PROTECTION_ACTIVE: false
     steps:
       - uses: actions/checkout@v2
+      - uses: conda-incubator/setup-miniconda@v2
+        with:
+          activate-environment: gt4sd
+          environment-file: conda_cpu_linux.yml
+          auto-activate-base: false
+          use-only-tar-bz2: true
       - name: Install gt4sd from source
         run: |
-          pip install -r dev_requirements.txt
-          pip install -r requirements.txt
-          pip install .
+          conda activate gt4sd
+          pip install --no-deps .
       - name: Check black
         run: |
+          conda activate gt4sd
           python -m black src/gt4sd --check --diff --color
-      # - name: Check isort
-      #   run: |
-      #     conda activate gt4sd
-      #     python -m isort src/gt4sd --check-only
+      - name: Remove unnecessary files (see https://stackoverflow.com/questions/75536771/github-runner-out-of-disk-space-after-building-docker-image)
+        run: |
+          sudo rm -rf /usr/share/dotnet
+          sudo rm -rf /opt/ghc
+          sudo rm -rf "/usr/local/share/boost"
+          sudo rm -rf "$AGENT_TOOLSDIRECTORY"
       - name: Check flake8
         run: |
+          conda activate gt4sd
           python -m flake8 --disable-noqa --per-file-ignores="__init__.py:F401" src/gt4sd
-      - name: Check mypy
-        run: |
-          python -m mypy src/gt4sd
       - name: Run pytests
         run: |
+          conda activate gt4sd
           python -m pytest -sv
       - name: Test entry-points
         run: |
+          conda activate gt4sd
           gt4sd-trainer --help
           gt4sd-inference --help
           gt4sd-saving --help
           gt4sd-upload --help
+          gt4sd-pl-to-hf --help
           gt4sd-hf-to-st --help
diff --git a/dev_requirements.txt b/dev_requirements.txt
@@ -1,16 +1,15 @@
-better-apidoc==0.3.1
+better-apidoc==0.3.2
 black==22.3.0
 docutils==0.17.1
 flake8==3.8.4
 flask==1.1.2
 flask_login==0.5.0
 # isort==5.7.0
 licenseheaders==0.8.8
-mypy==0.950
-myst-parser==0.13.3
-pytest>=6.2.5
+mypy==1.0.0
+myst-parser==1.0.0
+pytest==6.2.5
 pytest-cov==2.10.1
-sphinx==3.4.3
+sphinx>=5
 sphinx-autodoc-typehints==1.11.1
 jinja2<3.1.0
-sphinx_rtd_theme==0.5.1
diff --git a/requirements.txt b/requirements.txt
@@ -1,20 +1,55 @@
 # pypi requirements
+setuptools==69.5.1
+accelerate>=0.12,<0.20.0
+datasets>=1.11.0
+diffusers<=0.6.0
+enzeptional>=1.0.0
 importlib-metadata>=1.7.0,<5.0.0 # temporary: https://github.com/python/importlib_metadata/issues/409
 importlib-resources>=5.10.0
+ipaddress>=1.0.23
+ipykernel<=6.22.0
 joblib>=1.1.0
+gt4sd-molformer>=0.1.1
+gt4sd-trainer-hf-pl>=0.0.2
+keras>=2.3.1,<2.11.0
+keybert>=0.7.0
+markdown-it-py
 minio==7.0.1
+modlamp>=4.0.0
+molgx>=0.22.0a1
+nglview>=3.0.3
 numpy>=1.16.5,<1.24.0
-pydantic>=1.7.3,<2.0.0
-pytoda @ git+https://github.com/PaccMann/[email protected]
+pandas<=2.0.3
+protobuf<3.20
+pyarrow>=8.0.0
+pydantic>=2.0.0
+pymatgen>=2022.11.7
+PyTDC==0.3.7
+pytorch_lightning<=1.7.7
+pyyaml>=5.4.1
 rdkit>=2022.3.5
+rdkit-stubs>=0.7
 regex>=2.5.91
+reinvent-chemistry==0.0.38
+sacremoses>=0.0.41
 scikit-learn>=1.0.0,<1.3.0
 scikit-optimize>=0.8.1
-scipy>=1.0.0
-sentence-transformers>=2.2.2
+scipy>=1.0.0,<=1.11.0
 sentencepiece>=0.1.95
-terminator @ git+https://github.com/IBM/regression-transformer@gt4sd
-torch>=1.0,<=1.12.1
-torchvision<=0.13.1
+sentence_transformers>1.0,<=2.2.2
+sympy>=1.10.1
+tables>=3.7.0
+tape-proteins>=0.4
+tensorboard!=2.5.0,>=2.2.0,<2.11.0
+tensorboard-data-server<=0.6.1
+tensorflow>=2.1.0,<2.11.0
+tensorflow-io-gcs-filesystem<0.32.0
+torchdrug>=0.2.0
 torchmetrics>=0.7.0,<1.0.0
-transformers>=4.22.0,<=4.24.0
+transformers>=4.22.0,<=4.24.0
+typing_extensions>=3.7.4.3
+wheel>=0.26
+xgboost>=1.7.6
+sphinx_rtd_theme==0.5.1
+pydantic-settings>=2.0.0
+huggingface_hub<0.26.0
diff --git a/src/gt4sd/configuration.py b/src/gt4sd/configuration.py
@@ -27,8 +27,7 @@
 import os
 from functools import lru_cache
 from typing import Dict, Optional, Set
-
-from pydantic import BaseSettings
+from pydantic_settings import BaseSettings, SettingsConfigDict
 
 from .s3 import GT4SDS3Client, S3SyncError, sync_folder_with_s3, upload_file_to_s3
 
@@ -51,25 +50,21 @@ class GT4SDConfiguration(BaseSettings):
     gt4sd_max_runtime: int = 86400
     gt4sd_create_unverified_ssl_context: bool = False
     gt4sd_disable_cudnn: bool = False
-    gt4sd_skip_s3_sync_in_inference: bool = False
 
     gt4sd_s3_host: str = "s3.par01.cloud-object-storage.appdomain.cloud"
-    gt4sd_s3_access_key: str = "6e9891531d724da89997575a65f4592e"
-    gt4sd_s3_secret_key: str = "5997d63c4002cc04e13c03dc0c2db9dae751293dab106ac5"
+    gt4sd_s3_access_key: str = "b087e6810a5d4246a64e07e36ace338f"
+    gt4sd_s3_secret_key: str = "ba4a1db5647a32c6109b58714befb7ea7145b983143e0836"
     gt4sd_s3_secure: bool = True
     gt4sd_s3_bucket_algorithms: str = "gt4sd-cos-algorithms-artifacts"
     gt4sd_s3_bucket_properties: str = "gt4sd-cos-properties-artifacts"
 
     gt4sd_s3_host_hub: str = "s3.par01.cloud-object-storage.appdomain.cloud"
-    gt4sd_s3_access_key_hub: str = "d9536662ebcf462f937efb9f58012830"
-    gt4sd_s3_secret_key_hub: str = "934d1f3afdaea55ac586f6c2f729ac2ba2694bb8e975ee0b"
+    gt4sd_s3_access_key_hub: str = "1168c1d9ce664e75a8a151e6f4a29078"
+    gt4sd_s3_secret_key_hub: str = "4996c6cc737828213a7afcc7e27450e1af2daf027af95c1d"
     gt4sd_s3_secure_hub: bool = True
     gt4sd_s3_bucket_hub_algorithms: str = "gt4sd-cos-hub-algorithms-artifacts"
     gt4sd_s3_bucket_hub_properties: str = "gt4sd-cos-hub-properties-artifacts"
-
-    class Config:
-        # immutable and in turn hashable, that is required for lru_cache
-        frozen = True
+    model_config = SettingsConfigDict(frozen=True)
 
     @staticmethod
     @lru_cache(maxsize=None)
@@ -201,6 +196,7 @@ def sync_algorithm_with_s3(
 def get_cached_algorithm_path(
     prefix: Optional[str] = None, module: str = "algorithms"
 ) -> str:
+
     if module not in gt4sd_artifact_management_configuration.gt4sd_s3_modules:
         raise ValueError(
             f"Unknown cache module: {module}. Supported modules: "

diff --git a/src/gt4sd/frameworks/enzeptional/tests/test_core.py b/src/gt4sd/frameworks/enzeptional/tests/test_core.py
@@ -0,0 +1,138 @@
+#
+# MIT License
+#
+# Copyright (c) 2024 GT4SD team
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+import pytest
+import warnings
+from gt4sd.frameworks.enzeptional import (
+    EnzymeOptimizer,
+    SequenceMutator,
+    SequenceScorer,
+    CrossoverGenerator,
+    HuggingFaceEmbedder,
+    HuggingFaceModelLoader,
+    HuggingFaceTokenizerLoader,
+    SelectionGenerator,
+)
+
+from gt4sd.configuration import sync_algorithm_with_s3
+from gt4sd.configuration import GT4SDConfiguration
+
+configuration = GT4SDConfiguration.get_instance()
+
+
+warnings.simplefilter(action="ignore", category=FutureWarning)
+
+sync_algorithm_with_s3("proteins/enzeptional/scorers", module="properties")
+
+scorer_filepath = f"{configuration.gt4sd_local_cache_path}/properties/proteins/enzeptional/scorers/feasibility/model.pkl"
+
+
+@pytest.mark.skip(reason="out-of-scope for current repo")
+def test_optimize():
+    language_model_path = "facebook/esm2_t33_650M_UR50D"
+    tokenizer_path = "facebook/esm2_t33_650M_UR50D"
+    chem_model_path = "seyonec/ChemBERTa-zinc-base-v1"
+    chem_tokenizer_path = "seyonec/ChemBERTa-zinc-base-v1"
+
+    model_loader = HuggingFaceModelLoader()
+    tokenizer_loader = HuggingFaceTokenizerLoader()
+
+    protein_model = HuggingFaceEmbedder(
+        model_loader=model_loader,
+        tokenizer_loader=tokenizer_loader,
+        model_path=language_model_path,
+        tokenizer_path=tokenizer_path,
+        cache_dir=None,
+        device="cpu",
+    )
+
+    chem_model = HuggingFaceEmbedder(
+        model_loader=model_loader,
+        tokenizer_loader=tokenizer_loader,
+        model_path=chem_model_path,
+        tokenizer_path=chem_tokenizer_path,
+        cache_dir=None,
+        device="cpu",
+    )
+
+    mutation_config = {
+        "type": "language-modeling",
+        "embedding_model_path": language_model_path,
+        "tokenizer_path": tokenizer_path,
+        "unmasking_model_path": language_model_path,
+    }
+
+    intervals = [(5, 10), (20, 25)]
+    batch_size = 2
+    top_k = 1
+    substrate_smiles = "NC1=CC=C(N)C=C1"
+    product_smiles = "CNC1=CC=C(NC(=O)C2=CC=C(C=C2)C(C)=O)C=C1"
+
+    sample_sequence = "MSKLLMIGTGPVAIDQFLTRYEASCQAYKDMHQDQQLSSQFNTNLFEGDKALVTKFLEINRTLS"
+
+    mutator = SequenceMutator(sequence=sample_sequence, mutation_config=mutation_config)
+    mutator.set_top_k(top_k)
+
+    scorer = SequenceScorer(
+        protein_model=protein_model,
+        scorer_filepath=scorer_filepath,
+        use_xgboost=False,
+        scaler_filepath=None,
+    )
+
+    selection_generator = SelectionGenerator()
+    crossover_generator = CrossoverGenerator()
+
+    optimizer = EnzymeOptimizer(
+        sequence=sample_sequence,
+        mutator=mutator,
+        scorer=scorer,
+        intervals=intervals,
+        substrate_smiles=substrate_smiles,
+        product_smiles=product_smiles,
+        chem_model=chem_model,
+        selection_generator=selection_generator,
+        crossover_generator=crossover_generator,
+        concat_order=["substrate", "sequence", "product"],
+        batch_size=batch_size,
+        selection_ratio=0.25,
+        perform_crossover=True,
+        crossover_type="single_point",
+        pad_intervals=False,
+        minimum_interval_length=8,
+        seed=123,
+    )
+
+    num_iterations = 3
+    num_sequences = 5
+    num_mutations = 5
+    time_budget = 50000
+
+    optimized_sequences, iteration_info = optimizer.optimize(
+        num_iterations=num_iterations,
+        num_sequences=num_sequences,
+        num_mutations=num_mutations,
+        time_budget=time_budget,
+    )
+
+    assert len(optimized_sequences) > 0