Skip to content

Commit

Permalink
chore: secrets rotation and updates.
Browse files Browse the repository at this point in the history
Signed-off-by: Matteo Manica <[email protected]>
  • Loading branch information
drugilsberg committed Feb 19, 2025
1 parent 5bcea3d commit 5424e6b
Show file tree
Hide file tree
Showing 5 changed files with 217 additions and 35 deletions.
34 changes: 24 additions & 10 deletions .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,33 +16,47 @@ jobs:
runs-on: ubuntu-latest
permissions:
contents: read
defaults:
run:
shell: bash -l {0} # for conda command
env:
GIT_CLONE_PROTECTION_ACTIVE: false
steps:
- uses: actions/checkout@v2
- uses: conda-incubator/setup-miniconda@v2
with:
activate-environment: gt4sd
environment-file: conda_cpu_linux.yml
auto-activate-base: false
use-only-tar-bz2: true
- name: Install gt4sd from source
run: |
pip install -r dev_requirements.txt
pip install -r requirements.txt
pip install .
conda activate gt4sd
pip install --no-deps .
- name: Check black
run: |
conda activate gt4sd
python -m black src/gt4sd --check --diff --color
# - name: Check isort
# run: |
# conda activate gt4sd
# python -m isort src/gt4sd --check-only
- name: Remove unnecessary files (see https://stackoverflow.com/questions/75536771/github-runner-out-of-disk-space-after-building-docker-image)
run: |
sudo rm -rf /usr/share/dotnet
sudo rm -rf /opt/ghc
sudo rm -rf "/usr/local/share/boost"
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
- name: Check flake8
run: |
conda activate gt4sd
python -m flake8 --disable-noqa --per-file-ignores="__init__.py:F401" src/gt4sd
- name: Check mypy
run: |
python -m mypy src/gt4sd
- name: Run pytests
run: |
conda activate gt4sd
python -m pytest -sv
- name: Test entry-points
run: |
conda activate gt4sd
gt4sd-trainer --help
gt4sd-inference --help
gt4sd-saving --help
gt4sd-upload --help
gt4sd-pl-to-hf --help
gt4sd-hf-to-st --help
11 changes: 5 additions & 6 deletions dev_requirements.txt
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
better-apidoc==0.3.1
better-apidoc==0.3.2
black==22.3.0
docutils==0.17.1
flake8==3.8.4
flask==1.1.2
flask_login==0.5.0
# isort==5.7.0
licenseheaders==0.8.8
mypy==0.950
myst-parser==0.13.3
pytest>=6.2.5
mypy==1.0.0
myst-parser==1.0.0
pytest==6.2.5
pytest-cov==2.10.1
sphinx==3.4.3
sphinx>=5
sphinx-autodoc-typehints==1.11.1
jinja2<3.1.0
sphinx_rtd_theme==0.5.1
51 changes: 43 additions & 8 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,20 +1,55 @@
# pypi requirements
setuptools==69.5.1
accelerate>=0.12,<0.20.0
datasets>=1.11.0
diffusers<=0.6.0
enzeptional>=1.0.0
importlib-metadata>=1.7.0,<5.0.0 # temporary: https://github.com/python/importlib_metadata/issues/409
importlib-resources>=5.10.0
ipaddress>=1.0.23
ipykernel<=6.22.0
joblib>=1.1.0
gt4sd-molformer>=0.1.1
gt4sd-trainer-hf-pl>=0.0.2
keras>=2.3.1,<2.11.0
keybert>=0.7.0
markdown-it-py
minio==7.0.1
modlamp>=4.0.0
molgx>=0.22.0a1
nglview>=3.0.3
numpy>=1.16.5,<1.24.0
pydantic>=1.7.3,<2.0.0
pytoda @ git+https://github.com/PaccMann/[email protected]
pandas<=2.0.3
protobuf<3.20
pyarrow>=8.0.0
pydantic>=2.0.0
pymatgen>=2022.11.7
PyTDC==0.3.7
pytorch_lightning<=1.7.7
pyyaml>=5.4.1
rdkit>=2022.3.5
rdkit-stubs>=0.7
regex>=2.5.91
reinvent-chemistry==0.0.38
sacremoses>=0.0.41
scikit-learn>=1.0.0,<1.3.0
scikit-optimize>=0.8.1
scipy>=1.0.0
sentence-transformers>=2.2.2
scipy>=1.0.0,<=1.11.0
sentencepiece>=0.1.95
terminator @ git+https://github.com/IBM/regression-transformer@gt4sd
torch>=1.0,<=1.12.1
torchvision<=0.13.1
sentence_transformers>1.0,<=2.2.2
sympy>=1.10.1
tables>=3.7.0
tape-proteins>=0.4
tensorboard!=2.5.0,>=2.2.0,<2.11.0
tensorboard-data-server<=0.6.1
tensorflow>=2.1.0,<2.11.0
tensorflow-io-gcs-filesystem<0.32.0
torchdrug>=0.2.0
torchmetrics>=0.7.0,<1.0.0
transformers>=4.22.0,<=4.24.0
transformers>=4.22.0,<=4.24.0
typing_extensions>=3.7.4.3
wheel>=0.26
xgboost>=1.7.6
sphinx_rtd_theme==0.5.1
pydantic-settings>=2.0.0
huggingface_hub<0.26.0
18 changes: 7 additions & 11 deletions src/gt4sd/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,7 @@
import os
from functools import lru_cache
from typing import Dict, Optional, Set

from pydantic import BaseSettings
from pydantic_settings import BaseSettings, SettingsConfigDict

from .s3 import GT4SDS3Client, S3SyncError, sync_folder_with_s3, upload_file_to_s3

Expand All @@ -51,25 +50,21 @@ class GT4SDConfiguration(BaseSettings):
gt4sd_max_runtime: int = 86400
gt4sd_create_unverified_ssl_context: bool = False
gt4sd_disable_cudnn: bool = False
gt4sd_skip_s3_sync_in_inference: bool = False

gt4sd_s3_host: str = "s3.par01.cloud-object-storage.appdomain.cloud"
gt4sd_s3_access_key: str = "6e9891531d724da89997575a65f4592e"
gt4sd_s3_secret_key: str = "5997d63c4002cc04e13c03dc0c2db9dae751293dab106ac5"
gt4sd_s3_access_key: str = "b087e6810a5d4246a64e07e36ace338f"
gt4sd_s3_secret_key: str = "ba4a1db5647a32c6109b58714befb7ea7145b983143e0836"
gt4sd_s3_secure: bool = True
gt4sd_s3_bucket_algorithms: str = "gt4sd-cos-algorithms-artifacts"
gt4sd_s3_bucket_properties: str = "gt4sd-cos-properties-artifacts"

gt4sd_s3_host_hub: str = "s3.par01.cloud-object-storage.appdomain.cloud"
gt4sd_s3_access_key_hub: str = "d9536662ebcf462f937efb9f58012830"
gt4sd_s3_secret_key_hub: str = "934d1f3afdaea55ac586f6c2f729ac2ba2694bb8e975ee0b"
gt4sd_s3_access_key_hub: str = "1168c1d9ce664e75a8a151e6f4a29078"
gt4sd_s3_secret_key_hub: str = "4996c6cc737828213a7afcc7e27450e1af2daf027af95c1d"
gt4sd_s3_secure_hub: bool = True
gt4sd_s3_bucket_hub_algorithms: str = "gt4sd-cos-hub-algorithms-artifacts"
gt4sd_s3_bucket_hub_properties: str = "gt4sd-cos-hub-properties-artifacts"

class Config:
# immutable and in turn hashable, that is required for lru_cache
frozen = True
model_config = SettingsConfigDict(frozen=True)

@staticmethod
@lru_cache(maxsize=None)
Expand Down Expand Up @@ -201,6 +196,7 @@ def sync_algorithm_with_s3(
def get_cached_algorithm_path(
prefix: Optional[str] = None, module: str = "algorithms"
) -> str:

if module not in gt4sd_artifact_management_configuration.gt4sd_s3_modules:
raise ValueError(
f"Unknown cache module: {module}. Supported modules: "
Expand Down
138 changes: 138 additions & 0 deletions src/gt4sd/frameworks/enzeptional/tests/test_core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
#
# MIT License
#
# Copyright (c) 2024 GT4SD team
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#
import pytest
import warnings
from gt4sd.frameworks.enzeptional import (
EnzymeOptimizer,
SequenceMutator,
SequenceScorer,
CrossoverGenerator,
HuggingFaceEmbedder,
HuggingFaceModelLoader,
HuggingFaceTokenizerLoader,
SelectionGenerator,
)

from gt4sd.configuration import sync_algorithm_with_s3
from gt4sd.configuration import GT4SDConfiguration

configuration = GT4SDConfiguration.get_instance()


warnings.simplefilter(action="ignore", category=FutureWarning)

sync_algorithm_with_s3("proteins/enzeptional/scorers", module="properties")

scorer_filepath = f"{configuration.gt4sd_local_cache_path}/properties/proteins/enzeptional/scorers/feasibility/model.pkl"


@pytest.mark.skip(reason="out-of-scope for current repo")
def test_optimize():
language_model_path = "facebook/esm2_t33_650M_UR50D"
tokenizer_path = "facebook/esm2_t33_650M_UR50D"
chem_model_path = "seyonec/ChemBERTa-zinc-base-v1"
chem_tokenizer_path = "seyonec/ChemBERTa-zinc-base-v1"

model_loader = HuggingFaceModelLoader()
tokenizer_loader = HuggingFaceTokenizerLoader()

protein_model = HuggingFaceEmbedder(
model_loader=model_loader,
tokenizer_loader=tokenizer_loader,
model_path=language_model_path,
tokenizer_path=tokenizer_path,
cache_dir=None,
device="cpu",
)

chem_model = HuggingFaceEmbedder(
model_loader=model_loader,
tokenizer_loader=tokenizer_loader,
model_path=chem_model_path,
tokenizer_path=chem_tokenizer_path,
cache_dir=None,
device="cpu",
)

mutation_config = {
"type": "language-modeling",
"embedding_model_path": language_model_path,
"tokenizer_path": tokenizer_path,
"unmasking_model_path": language_model_path,
}

intervals = [(5, 10), (20, 25)]
batch_size = 2
top_k = 1
substrate_smiles = "NC1=CC=C(N)C=C1"
product_smiles = "CNC1=CC=C(NC(=O)C2=CC=C(C=C2)C(C)=O)C=C1"

sample_sequence = "MSKLLMIGTGPVAIDQFLTRYEASCQAYKDMHQDQQLSSQFNTNLFEGDKALVTKFLEINRTLS"

mutator = SequenceMutator(sequence=sample_sequence, mutation_config=mutation_config)
mutator.set_top_k(top_k)

scorer = SequenceScorer(
protein_model=protein_model,
scorer_filepath=scorer_filepath,
use_xgboost=False,
scaler_filepath=None,
)

selection_generator = SelectionGenerator()
crossover_generator = CrossoverGenerator()

optimizer = EnzymeOptimizer(
sequence=sample_sequence,
mutator=mutator,
scorer=scorer,
intervals=intervals,
substrate_smiles=substrate_smiles,
product_smiles=product_smiles,
chem_model=chem_model,
selection_generator=selection_generator,
crossover_generator=crossover_generator,
concat_order=["substrate", "sequence", "product"],
batch_size=batch_size,
selection_ratio=0.25,
perform_crossover=True,
crossover_type="single_point",
pad_intervals=False,
minimum_interval_length=8,
seed=123,
)

num_iterations = 3
num_sequences = 5
num_mutations = 5
time_budget = 50000

optimized_sequences, iteration_info = optimizer.optimize(
num_iterations=num_iterations,
num_sequences=num_sequences,
num_mutations=num_mutations,
time_budget=time_budget,
)

assert len(optimized_sequences) > 0

0 comments on commit 5424e6b

Please sign in to comment.