UTAustin-SwarmLab
diff --git a/‎bash_scripts/handwriting_script.sh
+8-5 b/‎bash_scripts/handwriting_script.sh
+8-5
diff --git a/‎config/main.yaml
+2-2 b/‎config/main.yaml
+2-2
diff --git a/‎mmda/bimodal_classification.py
+2-6 b/‎mmda/bimodal_classification.py
+2-6
diff --git a/‎mmda/exps/classification.py
+4-3 b/‎mmda/exps/classification.py
+4-3
diff --git a/‎mmda/get_embeddings.py
+27-38 b/‎mmda/get_embeddings.py
+27-38
diff --git a/‎mmda/tsfresh_features.py
+119 b/‎mmda/tsfresh_features.py
+119
diff --git a/‎mmda/utils/cca_class.py
+14-18 b/‎mmda/utils/cca_class.py
+14-18
@@ -1,7 +1,10 @@
 # classification
-CUDA_VISIBLE_DEVICES=1 poetry run python mmda/bimodal_classification.py dataset=handwriting handwriting.sim_dim=10
-CUDA_VISIBLE_DEVICES=1 poetry run python mmda/bimodal_classification.py dataset=handwriting handwriting.sim_dim=25
+# CUDA_VISIBLE_DEVICES=1 poetry run python mmda/bimodal_classification.py dataset=handwriting handwriting.sim_dim=10
+# CUDA_VISIBLE_DEVICES=1 poetry run python mmda/bimodal_classification.py dataset=handwriting handwriting.sim_dim=25
+CUDA_VISIBLE_DEVICES=1 poetry run python mmda/bimodal_classification.py dataset=handwriting handwriting.sim_dim=30
 CUDA_VISIBLE_DEVICES=1 poetry run python mmda/bimodal_classification.py dataset=handwriting handwriting.sim_dim=50
-CUDA_VISIBLE_DEVICES=1 poetry run python mmda/bimodal_classification.py dataset=handwriting handwriting.sim_dim=100
-CUDA_VISIBLE_DEVICES=1 poetry run python mmda/bimodal_classification.py dataset=handwriting handwriting.sim_dim=200
-CUDA_VISIBLE_DEVICES=1 poetry run python mmda/bimodal_classification.py dataset=handwriting handwriting.sim_dim=700
+CUDA_VISIBLE_DEVICES=1 poetry run python mmda/bimodal_classification.py dataset=handwriting handwriting.sim_dim=60
+CUDA_VISIBLE_DEVICES=1 poetry run python mmda/bimodal_classification.py dataset=handwriting handwriting.sim_dim=70
+# CUDA_VISIBLE_DEVICES=1 poetry run python mmda/bimodal_classification.py dataset=handwriting handwriting.sim_dim=100
+# CUDA_VISIBLE_DEVICES=1 poetry run python mmda/bimodal_classification.py dataset=handwriting handwriting.sim_dim=200
+# CUDA_VISIBLE_DEVICES=1 poetry run python mmda/bimodal_classification.py dataset=handwriting handwriting.sim_dim=500
@@ -106,9 +106,9 @@ imagenet:
 handwriting:
   sim_dim: 700 # dimension of the similarity score and the CCA transformation
   equal_weights: False
-  img_encoder: "chronos"
+  img_encoder: "tsfresh"
   text_encoder: "clip"
-  train_test_ratios: [0.9]
+  train_test_ratios: [0.85]
   shuffle: False
   paths:
     dataset_path: "/nas/pohan/datasets/Handwriting/"
 
@@ -52,13 +52,9 @@ def main(cfg: DictConfig) -> None:  # noqa: C901, PLR0915, PLR0912
                 f.write(f"{shuffle_ratio},{cca_accs},{asif_accs}\n")
     else:
         for train_test_ratio in cfg_dataset.train_test_ratios:
-            asif_accs = asif_classification(cfg, train_test_ratio)
             cca_accs = cca_classification(cfg, train_test_ratio)
-            clip_accs = (
-                clip_like_classification(cfg, train_test_ratio)
-                if cfg.dataset != "handwriting"
-                else 0
-            )
+            asif_accs = 0 if True else asif_classification(cfg, train_test_ratio)
+            clip_accs = 0 if True else clip_like_classification(cfg, train_test_ratio)
             # write accuracy to file
             if not csv_save_path.exists():
                 # create the file and write the header
 
@@ -20,15 +20,14 @@ def cca_classification(
     Returns:
         data_size2accuracy: {data_size: accuracy}
     """
-    print("CCA")
     cfg_dataset = cfg[cfg.dataset]
+    print(f"CCA {cfg_dataset.sim_dim}")
     ds = load_classification_dataset(cfg)
     ds.load_data(train_test_ratio, clip_bool=False, shuffle_ratio=shuffle_ratio)
-    cca = ReNormalizedCCA() if cfg.dataset == "handwriting" else NormalizedCCA()
+    cca = ReNormalizedCCA() if True else NormalizedCCA()
     ds.train_img, ds.train_text, corr = cca.fit_transform_train_data(
         cfg_dataset, ds.train_img, ds.train_text
     )
-    print("corr", corr)
     ds.test_img, ds.test_text = cca.transform_data(ds.test_img, ds.test_text)
 
     ds.get_labels_emb()
@@ -50,6 +49,7 @@ def clip_like_classification(cfg: DictConfig, train_test_ratio: float) -> float:
     Returns:
         data_size2accuracy: {data_size: accuracy}
     """
+    print("CLIP-like")
     ds = load_classification_dataset(cfg)
     ds.load_data(train_test_ratio, clip_bool=True)
     ds.get_labels_emb()
@@ -68,6 +68,7 @@ def asif_classification(
     Returns:
         data_size2accuracy: {data_size: accuracy}
     """
+    print("ASIF")
     ds = load_classification_dataset(cfg)
     ds.load_data(train_test_ratio, clip_bool=False, shuffle_ratio=shuffle_ratio)
     ds.get_labels_emb()
 
@@ -560,39 +560,35 @@ def main(cfg: DictConfig) -> None:  # noqa: PLR0915, C901, PLR0912
         print("CLIP embeddings saved")
 
     elif dataset == "handwriting":
-        # sentence_26 = {
-        #     1: "apple.",
-        #     2: "ball.",
-        #     3: "cat.",
-        #     4: "dog.",
-        #     5: "elephant.",
-        #     6: "fish.",
-        #     7: "giraffe.",
-        #     8: "hat.",
-        #     9: "ice cream.",
-        #     10: "jaguar.",
-        #     11: "kangaroo.",
-        #     12: "lion.",
-        #     13: "monkey.",
-        #     14: "nest.",
-        #     15: "owl.",
-        #     16: "penguin.",
-        #     17: "queen.",
-        #     18: "rabbit.",
-        #     19: "snake.",
-        #     20: "tiger.",
-        #     21: "umbrella.",
-        #     22: "vase.",
-        #     23: "whale.",
-        #     24: "x-ray.",
-        #     25: "yak.",
-        #     26: "zebra.",
-        # }
         data, labels, num2alphabet, alphabets_hand = load_handwriting(cfg_dataset)
-        # sentences = [sentence_26[int(label.split(".")[0])] for label in labels]
-        # int_labels = [int(label.split(".")[0]) - 1 for label in labels]
+        # save data
+        with Path(cfg_dataset.paths.save_path, "Handwriting_data.pkl").open("wb") as f:
+            pickle.dump(data, f)
+        print("Handwriting data saved")
+        return
 
-        embeddings = chronos_ts(data) if False else data.reshape(data.shape[0], -1)
+        embeddings = clip_imgs(alphabets_hand, 256)
+        print("text shape:", embeddings.shape)
+        with Path(cfg_dataset.paths.save_path, "Handwriting_emb_clip.pkl").open(
+            "wb"
+        ) as f:
+            pickle.dump(embeddings, f)
+        print("CLIP embeddings saved")
+
+        sentences = [f"Alphabet {num2alphabet[label]}." for label in labels]
+        print(sentences[15:21])
+        embeddings = gtr_text(sentences)
+        assert np.allclose(
+            embeddings[15], embeddings[20], atol=1e-3, rtol=1e-4
+        ), f"{embeddings[15].shape}!={embeddings[20].shape}"
+        with Path(cfg_dataset.paths.save_path, "Handwriting_emb_gtr.pkl").open(
+            "wb"
+        ) as f:
+            pickle.dump(embeddings, f)
+        print("GTR shape:", embeddings.shape)
+        print("GTR embeddings saved")
+
+        embeddings = chronos_ts(data)
         # check if embeddings has unique rows
         assert embeddings.shape[0] == len(
             np.unique(embeddings, axis=0)
@@ -604,13 +600,6 @@ def main(cfg: DictConfig) -> None:  # noqa: PLR0915, C901, PLR0912
             pickle.dump(embeddings, f)
         print("Chronos embeddings saved")
 
-        embeddings = clip_imgs(alphabets_hand, 256)
-        print("text shape:", embeddings.shape)
-        with Path(cfg_dataset.paths.save_path, "Handwriting_text_emb_clip.pkl").open(
-            "wb"
-        ) as f:
-            pickle.dump(embeddings, f)
-        print("CLIP embeddings saved")
     # TODO: add more datasets
     else:
         msg = f"Dataset {dataset} not supported."
 
@@ -0,0 +1,119 @@
+"""Extract tsfresh features from the Handwriting dataset."""
+
+import pickle
+from pathlib import Path
+
+import kagglehub
+import numpy as np
+import pandas as pd
+from aeon.datasets import load_classification
+from PIL import Image
+from tsfresh import extract_features
+
+PATH = "/nas/pohan/datasets/Handwriting/"
+PATH_SAVE = "/nas/pohan/datasets/Handwriting/embeddings/"
+
+
+def load_handwriting() -> tuple[np.ndarray, np.ndarray, dict[str, tuple[str, str]]]:
+    """Load the Handwriting dataset (https://github.com/amazon-science/aeon).
+
+    Args:
+        cfg_dataset: configuration file
+    Returns:
+        data: data. shape: (num_samples, 3, 152)
+        labels: labels. e.g. "1.0"
+        num2alphabet: a dict of index to alphabet
+        alphabets_hand: list of PIL images
+    """
+    # train_x.shape: (150, 3, 152), test_x.shape: (850, 3, 152)
+    train_x, train_y = load_classification(
+        "Handwriting", split="train"
+    )  # np.ndarray, list[str]
+    test_x, test_y = load_classification("Handwriting", split="test")
+    # merge train and test
+    x = np.concatenate([train_x, test_x], axis=0)
+    y = np.concatenate([train_y, test_y], axis=0)
+    num2alphabet = {f"{i+1}.0": chr(65 + i) for i in range(26)}
+    idx = np.arange(x.shape[0])
+    x = x[idx]
+    y = y[idx]
+
+    def load_alphabets_img() -> tuple[np.ndarray, np.ndarray]:
+        """Load the MNIST dataset.
+
+        Returns:
+            data: data
+            labels: labels
+        """
+        # Download latest version
+        path = kagglehub.dataset_download(
+            "sachinpatel21/az-handwritten-alphabets-in-csv-format"
+        )
+        df = pd.read_csv(path + "/A_Z Handwritten Data.csv")
+        labels = df.iloc[:, 0]
+        data = df.iloc[:, 1:]
+        return data, labels
+
+    alphabets_x, alphabets_y = load_alphabets_img()
+    alphabets_img = {}
+    for i in range(26):
+        alphabets_img[i + 1] = alphabets_x[alphabets_y == i][:100]
+
+    alphabets_hand = []
+    for i in range(x.shape[0]):
+        label = int(y[i].split(".")[0])
+        random_idx = np.random.choice(alphabets_img[label].shape[0])
+        random_df = alphabets_img[label].iloc[random_idx].to_numpy()
+        random_df = random_df.reshape(28, 28).astype(np.uint8)
+        # save image to png
+        path = Path(PATH, f"alphabet_{label}_{random_idx}.png")
+        Image.fromarray(random_df, mode="L").save(path)
+        alphabets_hand.append(path)
+    return (
+        x,
+        y,
+        num2alphabet,
+        alphabets_hand,
+    )
+
+
+def tsfresh_features() -> np.ndarray:
+    """Extract tsfresh features from the data.
+
+    Returns:
+        features: features
+    """
+    data, labels, num2alphabet, alphabets_hand = load_handwriting()
+
+    path = Path(PATH_SAVE, "Handwriting_tsfresh.csv")
+
+    if path.exists():
+        df = pd.read_csv(path)
+    else:
+        # convert data to a df
+        # column_id: id, column_sort: time, values: 3 channels
+        df = pd.DataFrame(columns=["id", "time", "channel_1", "channel_2", "channel_3"])
+        for idx in range(data.shape[0]):
+            for time in range(data.shape[2]):  # 152
+                df.loc[idx, "id"] = idx
+                df.loc[idx, "time"] = time
+                df.loc[idx, "channel_1"] = data[idx, 0, time]
+                df.loc[idx, "channel_2"] = data[idx, 1, time]
+                df.loc[idx, "channel_3"] = data[idx, 2, time]
+        print(df.head())
+        print(df.tail())
+
+        df.to_csv(path, index=False)
+    ts_features = extract_features(df, column_id="id", column_sort="time")
+    ts_features = ts_features.dropna(axis=1)
+    print(type(ts_features))
+    print(ts_features.shape)
+    print(ts_features.head())
+    print("ts_features shape:", ts_features.shape)
+    with Path(PATH_SAVE, "Handwriting_emb_tsfresh.pkl.pkl").open("wb") as f:
+        pickle.dump(ts_features, f)
+    print("TSFresh features saved")
+
+
+if __name__ == "__main__":
+    tsfresh_features()
@@ -67,7 +67,7 @@ def fit_transform_train_data(
             corr_coeff >= 0
         ).all(), f"Correlation should be non-negative. {corr_coeff}"
         assert (
-            corr_coeff <= 1
+            corr_coeff <= 1.05  # noqa: PLR2004
         ).all(), f"Correlation should be less than 1. {corr_coeff}"
         self.corr_coeff = corr_coeff
         self.traindata1, self.traindata2 = traindata1, traindata2
@@ -141,6 +141,8 @@ def fit_transform_train_data(
             corr_coeff: the correlation coefficient. shape: (dim,)
         """
         # Check the shape of the training data
+        traindata1 = traindata1.astype(np.float32)
+        traindata2 = traindata2.astype(np.float32)
         # zero mean data
         traindata1, traindata1_mean = origin_centered(traindata1)
         traindata2, traindata2_mean = origin_centered(traindata2)
@@ -155,23 +157,15 @@ def fit_transform_train_data(
         ), f"traindata2align not zero mean: {max(abs(traindata2.mean(axis=0)))}"
 
         # CCA dimensionality reduction
-        print((traindata1.T @ traindata1).shape)
-        sigma_z1_inv = np.linalg.inv(traindata1.T @ traindata1)
+        sigma_z1_inv = np.linalg.inv(
+            traindata1.T @ traindata1 + np.eye(traindata1.shape[1]) * 1e-5
+        )
         sigma_z1_inv_sqrt = sqrtm(sigma_z1_inv)
-        assert np.allclose(
-            sigma_z1_inv_sqrt @ sigma_z1_inv_sqrt, sigma_z1_inv
-        ), "sigma_z1_inv_sqrt is not the square root of sigma_z1_inv"
         sigma_z2_inv = np.linalg.inv(traindata2.T @ traindata2)
         sigma_z2_inv_sqrt = sqrtm(sigma_z2_inv)
-        assert np.allclose(
-            sigma_z2_inv_sqrt @ sigma_z2_inv_sqrt, sigma_z2_inv
-        ), "sigma_z2_inv_sqrt is not the square root of sigma_z2_inv"
 
         svd_mat = sigma_z1_inv_sqrt @ traindata1.T @ traindata2 @ sigma_z2_inv_sqrt
         u, s, vh = np.linalg.svd(svd_mat)
-        assert np.allclose(
-            u @ np.diag(s) @ vh, svd_mat
-        ), "svd_mat is not the SVD of svd_mat"
 
         self.A = u @ sigma_z1_inv_sqrt
         self.B = vh @ sigma_z2_inv_sqrt
@@ -180,13 +174,12 @@ def fit_transform_train_data(
         assert (
             corr_coeff >= 0
         ).all(), f"Correlation should be non-negative. {corr_coeff}"
-        assert (
-            corr_coeff <= 1
-        ).all(), f"Correlation should be less than 1. {corr_coeff}"
         self.corr_coeff = corr_coeff
+        if self.sim_dim is None:
+            self.sim_dim = cfg_dataset.sim_dim
         self.traindata1, self.traindata2 = (
-            (self.A @ traindata1.T).T,
-            (self.B @ traindata2.T).T,
+            (self.A @ traindata1.T).T[:, : self.sim_dim],
+            (self.B @ traindata2.T).T[:, : self.sim_dim],
         )
         return self.traindata1, self.traindata2, corr_coeff
 
@@ -203,12 +196,15 @@ def transform_data(
             data1: the first transformed data. shape: (num_samples, dim)
             data2: the second transformed data. shape: (num_samples, dim)
         """
+        data1 = data1.astype(np.float32)
+        data2 = data2.astype(np.float32)
         assert self.traindata1_mean is not None, "Please fit the cca model first."
         assert self.traindata2_mean is not None, "Please fit the cca model first."
         # zero mean data and transform
         data1 = data1 - self.traindata1_mean
         data2 = data2 - self.traindata2_mean
-        data1, data2 = (self.A @ data1.T).T, (self.B @ data2.T).T
+        data1 = (self.A @ data1.T).T[:, : self.sim_dim]
+        data2 = (self.B @ data2.T).T[:, : self.sim_dim]
         return data1, data2
 
     def save_model(self, path: str | Path) -> None: