Shrink size of query set

d31003 · d31003 · commit e0aa645d7cdf · 2024-10-19T17:02:12.000-05:00
diff --git a/config/main.yaml b/config/main.yaml
@@ -4,7 +4,8 @@ defaults:
 seed: 42
 train_test_ratio: 0.7
 noisy_train_set: True
-repo_root: "/home/pl22767/Project/MMDA/"
+# repo_root: "/home/pl22767/Project/MMDA/"
+repo_root: "/home/po-han/Desktop/Projects/MMDA/"
 
 dataset: "MSRVTT"
 dataset_level_datasets: [pitts, imagenet, cosmos, sop, tiil, musiccaps, flickr]
@@ -32,7 +33,8 @@ MSRVTT:
   retrieval_dim: "" # we use all the dimensions for retrieval
   mask_ratio: 0 # ratio of the missing data : size of test data
   paths:
-    dataset_path: "/nas/pohan/datasets/MSR-VTT/"
+    # dataset_path: "/nas/pohan/datasets/MSR-VTT/"
+    dataset_path: "/home/po-han/Downloads/MSR-VTT/"
     save_path: ${MSRVTT.paths.dataset_path}embeddings/
     plots_path: ${repo_root}plots/MSR-VTT/
 
diff --git a/mmda/baselines/asif_core.py b/mmda/baselines/asif_core.py
@@ -88,7 +88,7 @@ def normalize_sparse(
     return torch.sparse_coo_tensor(tensor_idx, v.t().flatten(), tensor.shape)
 
 
-def zero_shot_classification(  # noqa: PLR0913
+def zero_shot_classification(
     zimgs: torch.Tensor,
     ztxts: torch.Tensor,
     aimgs: torch.Tensor,
diff --git a/mmda/utils/mstvtt_ds_class.py b/mmda/utils/mstvtt_ds_class.py
@@ -83,6 +83,7 @@ def __init__(self, cfg: DictConfig) -> None:
         self.cali_size = 3_800
         self.train_size = 53_000  # TODO: no training data is needed for MSRVTT
         self.test_size = 3_000
+        self.query_step = 5
         self.img2txt_encoder = self.cfg_dataset.img_encoder
         self.audio2txt_encoder = self.cfg_dataset.audio_encoder
         self.save_tag = f"{self.img2txt_encoder}_{self.audio2txt_encoder}"
@@ -95,7 +96,7 @@ def load_data(self) -> None:
         with Path(self.cfg_dataset.paths.save_path, "MSRVTT_id_order.pkl").open(
             "rb"
         ) as f:
-            self.ref_id_order = pickle.load(f)  # noqa: S301
+            self.ref_id_order = pickle.load(f)[:: self.query_step]  # noqa: S301
         with Path(self.cfg_dataset.paths.save_path, "MSRVTT_null_audio.pkl").open(
             "rb"
         ) as f:
@@ -176,16 +177,16 @@ def preprocess_retrieval_data(self) -> None:
             "cali": self.txt2img_emb[txt_cali_idx],
         }
         self.img2txt_emb = {
-            "test": self.img2txt_emb,
-            "cali": self.img2txt_emb,
+            "test": self.img2txt_emb[:: self.query_step],
+            "cali": self.img2txt_emb[:: self.query_step],
         }
         self.txt2audio_emb = {
             "test": self.txt2audio_emb[txt_test_idx],
             "cali": self.txt2audio_emb[txt_cali_idx],
         }
         self.audio2txt_emb = {
-            "test": self.audio2txt_emb,
-            "cali": self.audio2txt_emb,
+            "test": self.audio2txt_emb[:: self.query_step],
+            "cali": self.audio2txt_emb[:: self.query_step],
         }
         # masking missing data in the test set. Mask the whole modality of an instance at a time.
         if self.cfg_dataset.mask_ratio != 0:
@@ -199,6 +200,9 @@ def preprocess_retrieval_data(self) -> None:
             self.mask[0] = []
             self.mask[1] = []
 
+        # check the length of the reference order
+        assert len(self.ref_id_order) == self.audio2txt_emb["test"].shape[0]
+
     def check_correct_retrieval(self, q_idx: int, r_idx: int) -> bool:
         """Check if the retrieval is correct.
 
diff --git a/ruff.toml b/ruff.toml
@@ -81,7 +81,7 @@ select = [
     "RUF",
 ]
 
-ignore = ["ANN101","ANN102","COM","EXE","PD","S307","FBT001","FBT002","G004","ISC001","S101","T201","NPY002","I001"]
+ignore = ["ANN101","ANN102","COM","EXE","PD","S307","FBT001","FBT002","G004","ISC001","S101","T201","NPY002","I001","PLR0913"]
 
 
 # Allow fix for all enabled rules (when `--fix`) is provided.

Original file line number	Diff line number	Diff line change
`@@ -81,7 +81,7 @@ select = [`
`81`	`81`	`"RUF",`
`82`	`82`	`]`
`83`	`83`
`84`		`-ignore = ["ANN101","ANN102","COM","EXE","PD","S307","FBT001","FBT002","G004","ISC001","S101","T201","NPY002","I001"]`
	`84`	`+ignore = ["ANN101","ANN102","COM","EXE","PD","S307","FBT001","FBT002","G004","ISC001","S101","T201","NPY002","I001","PLR0913"]`
`85`	`85`
`86`	`86`
`87`	`87`	# Allow fix for all enabled rules (when `--fix`) is provided.