Nemo 2 Model Checkpoint Load Test (#270)

Adds a test that ensures a Geneformer model checkpoint saved with nemo 2 can be loaded. Also adds the top-level `results/` directory to the docker and git ignore files. --------- Co-authored-by: John St John <[email protected]>
NVIDIA · Oct 7, 2024 · fb33522 · fb33522
1 parent b28c7a7
commit fb33522
Show file tree

Hide file tree

Showing 3 changed files with 31 additions and 0 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -1,5 +1,8 @@
 # -- Docker-specific ignores --
 
+# bionemo related
+results/
+
 # we copy over docs/ into the image for notebook testing, but can skip the images for space
 docs/docs/assets
 

diff --git a/.gitignore b/.gitignore
@@ -3,6 +3,7 @@ docs/site/
 *.nemo
 protein/
 singlecell/
+results/
 
 # Local configs
 .gitconfig

diff --git a/sub-packages/bionemo-geneformer/tests/bionemo/geneformer/test_model.py b/sub-packages/bionemo-geneformer/tests/bionemo/geneformer/test_model.py
@@ -64,6 +64,7 @@
 
 nemo1_checkpoint_path: Path = load("geneformer/qa")
 nemo1_release_checkpoint_path: Path = load("geneformer/10M_240530:1.0")
+nemo2_release_checkpoint_path: Path = load("geneformer/10M_240530:2.0")
 nemo_1_per_layer_outputs_path: Path = load("single_cell/nemo1-geneformer-per-layer-outputs")
 nemo_1_expected_values_path: Path = load("single_cell/nemo1-geneformer-golden-vals")
 data_path: Path = load("single_cell/testdata-20240506") / "cellxgene_2023-12-15_small" / "processed_data"
@@ -747,6 +748,32 @@ def test_inference_loss_10m_released_checkpoint(geneformer_config: GeneformerCon
     assert mean_loss < TARGET_MEAN_LOSS or mean_loss == pytest.approx(TARGET_MEAN_LOSS, abs=1e-2, rel=None)
 
 
+def test_inference_loss_10m_nemo2_released_checkpoint(geneformer_config: GeneformerConfig, seed: int = 42):
+    """Test that we get a good loss when loading a bionemo1 checkpoint with a properly initialized config"""
+    geneformer_config_logit = deepcopy(geneformer_config)
+    # Set up the model to return logits and switch to the released 10M checkpoint
+    geneformer_config_logit.set_hparam("return_only_hidden_states", False)  # return logits
+    geneformer_config_logit.set_hparam(
+        "initial_ckpt_path", nemo2_release_checkpoint_path
+    )  # release checkpoint is important
+
+    mean_loss = _get_loss_from_model(geneformer_config_logit, seed)
+
+    # NOTE: the values in the table were from the average of averages of 8 sized batches
+    # Experiment 1) loaded the 10M model and did the mean of mean loss with 8 sized batches
+    #  this gives: 2.3558831214904785 vs 2.357126723703872, so we actually do better!
+    # For NVIDIA employees see work here:
+    #   https://docs.google.com/document/d/1CofamqHbQlp5U8SjmW7NR7PbTbF72Lj9L9xz1W5t3ZI/edit
+    # Experiment 2)
+    #  With a proper loss (sum/n) and limiting to 200 _random_ batches of size 8 for speed
+    #  we get a similar range number of 2.368649959564209.
+    #  a small change that has lower impact than the change between models is probably acceptable.
+    #  the target is defined as described above for the 10M checkpoint based on our first pass
+    #  of the megatron implementation. Since we manually passed experiment 1 this experiment
+    #  will define our initial "golden value" test target.
+    assert mean_loss < TARGET_MEAN_LOSS or mean_loss == pytest.approx(TARGET_MEAN_LOSS, abs=1e-2, rel=None)
+
+
 def test_inference_loss_10m_released_checkpoint_wrong_activation(geneformer_config: GeneformerConfig, seed: int = 42):
     """Test that when we use the wrong activation we get worse loss out of the same function we test for a positive
     signal. This acts as the negative control.