UTAustin-SwarmLab
diff --git a/‎bash_scripts/btc_script.sh
Lines changed: 6 additions & 0 deletions b/‎bash_scripts/btc_script.sh
Lines changed: 6 additions & 0 deletions
diff --git a/‎config/main.yaml
Lines changed: 14 additions & 1 deletion b/‎config/main.yaml
Lines changed: 14 additions & 1 deletion
diff --git a/‎mmda/any2any_conformal_retrieval.py
Lines changed: 19 additions & 5 deletions b/‎mmda/any2any_conformal_retrieval.py
Lines changed: 19 additions & 5 deletions
diff --git a/‎mmda/plot.py
Lines changed: 0 additions & 50 deletions b/‎mmda/plot.py
Lines changed: 0 additions & 50 deletions
diff --git a/‎mmda/plot_nonconformity.ipynb
Lines changed: 8 additions & 8 deletions b/‎mmda/plot_nonconformity.ipynb
Lines changed: 8 additions & 8 deletions
diff --git a/‎mmda/plot_single_modal.py
Lines changed: 111 additions & 0 deletions b/‎mmda/plot_single_modal.py
Lines changed: 111 additions & 0 deletions
diff --git a/‎mmda/utils/any2any_ds_class.py
Lines changed: 2 additions & 0 deletions b/‎mmda/utils/any2any_ds_class.py
Lines changed: 2 additions & 0 deletions
@@ -0,0 +1,6 @@
+poetry run python mmda/any2any_conformal_retrieval.py dataset=BTC BTC.retrieval_dim=10
+poetry run python mmda/any2any_conformal_retrieval.py dataset=BTC BTC.retrieval_dim=25
+poetry run python mmda/any2any_conformal_retrieval.py dataset=BTC BTC.retrieval_dim=50
+poetry run python mmda/any2any_conformal_retrieval.py dataset=BTC BTC.retrieval_dim=75
+poetry run python mmda/any2any_conformal_retrieval.py dataset=BTC BTC.retrieval_dim=100
+
@@ -13,7 +13,7 @@ class_level_datasets: [sop]
 object_level_datasets: [pitts, sop]
 mislabeled_datasets: [imagenet, cosmos, tiil]
 retrieval_datasets: [flickr]
-any_retrieval_datasets: [KITTI, MSRVTT]
+any_retrieval_datasets: [KITTI, MSRVTT, BTC]
 shuffle_llava_datasets: [pitts, sop] # datasets whose plots contains llava
 mislabel_llava_datasets: [imagenet]
 classification_datasets: [imagenet, leafy_spurge]
@@ -27,6 +27,19 @@ dataset_size: {
   flickr: 155070
 }
 
+BTC:
+  retrieval_dim: 100
+  equal_weights: False
+  img_encoder: ""
+  audio_encoder: ""
+  horizon: 120
+  mask_ratio: 2 # ratio of the missing data : size of test data
+  paths:
+    dataset_path: "/nas/timeseries/timeseries_synthesis/sameep_store/btc/split_fresh_large_120/"
+    save_path: ${BTC.paths.dataset_path}/any2any/
+    plots_path: ${repo_root}plots/BTC/
+
+
 MSRVTT:
   img_encoder: "clip"
   audio_encoder: "clap"
 
@@ -48,16 +48,16 @@ def main(cfg: DictConfig) -> None:
     }
     df = pd.DataFrame(data)
     dir_path = Path(cfg_dataset.paths.plots_path)
-    if cfg.dataset == "KITTI":
+    if cfg.dataset == "MSRVTT":
         df_path = (
             dir_path
-            / f"any2any_retrieval_{cfg_dataset.retrieval_dim}_{cfg_dataset.mask_ratio}{thres_tag}.csv"
+            / f"{cfg_dataset.img_encoder}_{cfg_dataset.audio_encoder}"
+            / f"any2any_retrieval_{cfg_dataset.retrieval_dim}_{cfg_dataset.mask_ratio}.csv"
         )
-    elif cfg.dataset == "MSRVTT":
+    else:
         df_path = (
             dir_path
-            / f"{cfg_dataset.img_encoder}_{cfg_dataset.audio_encoder}"
-            / f"any2any_retrieval_{cfg_dataset.retrieval_dim}_{cfg_dataset.mask_ratio}.csv"
+            / f"any2any_retrieval_{cfg_dataset.retrieval_dim}_{cfg_dataset.mask_ratio}{thres_tag}.csv"
         )
     df_path.parent.mkdir(parents=True, exist_ok=True)
     df.to_csv(df_path, index=False)
@@ -91,6 +91,20 @@ def main(cfg: DictConfig) -> None:
             annot=True,
             annot_kws={"size": 30, "weight": "bold"},
         )
+    elif cfg.dataset == "BTC":
+        single_recalls = np.array(list(single1_recalls.values())).reshape(2, 2) * 100
+        plt.figure(figsize=(8, 8))
+        ax = sns.heatmap(
+            single_recalls,
+            fmt=".1f",
+            cmap="YlGnBu",
+            cbar=False,
+            square=True,
+            xticklabels=["Time", "Stats"],
+            yticklabels=["Text", "Trend"],
+            annot=True,
+            annot_kws={"size": 34, "weight": "bold"},
+        )
     else:
         msg = f"unknown dataset {cfg.dataset}"
         raise ValueError(msg)
 
@@ -0,0 +1,111 @@
+"""Plot functions."""
+
+from pathlib import Path
+
+import matplotlib.pyplot as plt
+import numpy as np
+import seaborn as sns
+from omegaconf import DictConfig
+
+import hydra
+
+
+@hydra.main(version_base=None, config_path="../config", config_name="main")
+def plot_single_modal_recall(cfg: DictConfig) -> None:
+    """Plot single-modal recall."""
+    cell_size = 30
+    label_size = 30
+    ticks_size = 28
+
+    cfg_dataset = cfg["KITTI"]
+    dir_path = Path(cfg_dataset.paths.plots_path)
+    single1_recalls = [[31.9, 31.9, 31.7], [32.4, 32.4, 31.8], [33.7, 32.8, 32.2]]
+    single_recalls = np.array(single1_recalls).reshape(3, 3)
+    plt.figure(figsize=(9, 9))
+    ax = sns.heatmap(
+        single_recalls,
+        fmt=".1f",
+        cmap="YlGnBu",
+        cbar=False,
+        square=True,
+        xticklabels=["Image", "Lidar", "Text"],
+        yticklabels=["Image", "Lidar", "Text"],
+        annot=True,
+        annot_kws={"size": cell_size, "weight": "bold"},
+    )
+    ax.xaxis.tick_top()
+    plt.xlabel("Reference modality", fontsize=label_size)
+    plt.ylabel("Query modality", fontsize=label_size)
+    plt.xticks(fontsize=ticks_size)
+    plt.yticks(fontsize=ticks_size)
+    plt.tight_layout()
+    ax.xaxis.set_label_position("top")  # Move the label to the top
+    plt.subplots_adjust(bottom=-0.05)
+    plt.savefig(
+        dir_path
+        / f"single_modal_recall5_{cfg_dataset.retrieval_dim}_{cfg_dataset.mask_ratio}.pdf"
+    )
+
+    cfg_dataset = cfg["MSRVTT"]
+    dir_path = (
+        Path(cfg_dataset.paths.plots_path)
+        / f"{cfg_dataset.img_encoder}_{cfg_dataset.audio_encoder}"
+    )
+    single1_recalls = [49.3, 2.6]
+    single_recalls = np.array(single1_recalls).reshape(1, 2)
+    plt.figure(figsize=(6, 4.5))
+    ax = sns.heatmap(
+        single_recalls,
+        fmt=".1f",
+        cmap="YlGnBu",
+        cbar=False,
+        square=True,
+        xticklabels=["Image", "Audio"],
+        yticklabels=["Text"],
+        annot=True,
+        annot_kws={"size": cell_size, "weight": "bold"},
+    )
+    ax.xaxis.tick_top()
+    plt.xlabel("Reference modality", fontsize=label_size)
+    plt.ylabel("Query modality", fontsize=label_size)
+    plt.xticks(fontsize=ticks_size)
+    plt.yticks(fontsize=ticks_size)
+    plt.tight_layout()
+    ax.xaxis.set_label_position("top")  # Move the label to the top
+    plt.savefig(
+        dir_path
+        / f"single_modal_recall5_{cfg_dataset.retrieval_dim}_{cfg_dataset.mask_ratio}.pdf"
+    )
+
+    cfg_dataset = cfg["BTC"]
+    dir_path = Path(cfg_dataset.paths.plots_path)
+    single1_recalls = [[4.1, 4.7], [3.4, 4.7]]
+    single_recalls = np.array(single1_recalls).reshape(2, 2)
+    plt.figure(figsize=(6, 6))
+    ax = sns.heatmap(
+        single_recalls,
+        fmt=".1f",
+        cmap="YlGnBu",
+        cbar=False,
+        square=True,
+        xticklabels=["Time", "Stats"],
+        yticklabels=["Prev News", "Text (2)"],
+        annot=True,
+        annot_kws={"size": cell_size, "weight": "bold"},
+    )
+    ax.xaxis.tick_top()
+    plt.xlabel("Reference modality", fontsize=label_size)
+    plt.ylabel("Query modality", fontsize=label_size)
+    plt.xticks(fontsize=ticks_size)
+    plt.yticks(fontsize=ticks_size)
+    plt.tight_layout()
+    ax.xaxis.set_label_position("top")  # Move the label to the top
+    plt.subplots_adjust(bottom=-0.05)
+    plt.savefig(
+        dir_path
+        / f"single_modal_recall5_{cfg_dataset.retrieval_dim}_{cfg_dataset.mask_ratio}.pdf"
+    )
+
+
+if __name__ == "__main__":
+    plot_single_modal_recall()
@@ -27,6 +27,8 @@ def __init__(self) -> None:
 
     def preprocess_retrieval_data(self) -> None:
         """Preprocess the data for retrieval."""
+        # create the save path if not exists
+        Path(self.cfg_dataset.paths.save_path).mkdir(parents=True, exist_ok=True)
 
     def train_crossmodal_similarity(self) -> None:
         """Train the cross-modal similarity, aka the CSA method."""