diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml
index a0f502b..76941b3 100644
--- a/.github/workflows/coverage.yml
+++ b/.github/workflows/coverage.yml
@@ -30,6 +30,29 @@ jobs:
           token: ${{ secrets.CODECOV_TOKEN }}
           slug: TLCFEM/msglc
           plugins: pycoverage
+  # benchmark:
+  #   name: Benchmark
+  #   runs-on: ubuntu-latest
+  #   timeout-minutes: 100
+  #   steps:
+  #     - name: Clone
+  #       uses: actions/checkout@v4
+  #     - name: Python
+  #       uses: actions/setup-python@v5
+  #       with:
+  #         python-version: 3.11
+  #     - name: Build
+  #       run: pip install .[dev,msgspec,numpy] matplotlib h5py
+  #     - name: Test
+  #       run: |
+  #         python3 h5/generate.py
+  #         python3 h5/read.py
+  #         tar czf benchmark.tar.gz ./h5/*.pdf
+  #     - name: Upload
+  #       uses: actions/upload-artifact@v4
+  #       with:
+  #         name: msglc-benchmark
+  #         path: benchmark.tar.gz
   wheels:
     name: Build
     runs-on: ubuntu-latest
diff --git a/docs/.pages b/docs/.pages
index f292765..600147a 100644
--- a/docs/.pages
+++ b/docs/.pages
@@ -2,4 +2,5 @@ nav:
     - msglc: index.md
     - Motivation: motivation.md
     - Examples: tutorial.md
+    - Benchmark: benchmark.md
     - API: api
\ No newline at end of file
diff --git a/docs/benchmark.md b/docs/benchmark.md
new file mode 100644
index 0000000..6a47ee1
--- /dev/null
+++ b/docs/benchmark.md
@@ -0,0 +1,60 @@
+# Benchmark
+
+The embedded structure allows fast read without loading the whole archive, which is the main advantage of this package.
+In the following, we benchmark the random read performance and compare with the `HDF5` format.
+
+## Data Generation
+
+A square matrix of size 5000 with random floating-point numbers is used.
+The matrix is dumped onto the disk with different configurations.
+
+1. For `msglc`, `small_obj_optimization_threshold` varies from 4KB to 4MB, `numpy_encoder` is switched off so the matrix is stored as plain json instead binary blob.
+2. For `h5py`, the chunk size is computed so that each block has a size similar to `small_obj_optimization_threshold`. Compression is optionally switched on.
+
+The following code snippets show the relevant functions.
+
+```py
+def generate_msg(mat: np.ndarray, block: int):
+    configure(small_obj_optimization_threshold=2**block, numpy_encoder=False)  # 16KB
+    dump(f"data-{block}.msg", mat)
+
+def generate_h5(mat: np.ndarray, block: int, **kwargs):
+    with h5py.File(h5_name(block, **kwargs), "w") as f:
+        if block > 0:
+            chunk_size = int(sqrt(2**block / 128))
+            kwargs["chunks"] = (chunk_size, chunk_size)
+        f.create_dataset("data", data=mat, **kwargs)
+```
+
+The write time of `msglc` is in general constant, because the packer needs to traverse the whole json object.
+Depending on different configurations, `h5py` requires different amounts of time to dump the matrix.
+
+![write time](./write_time.pdf)
+
+`msglc` shall be used for data that is written to disk for cold storage and does not require frequent changes.
+When compression is on, `h5py` needs to traverse the object just like `msglc`, thus requires a similar amount of time.
+
+## Read Test
+
+We mainly test the random read.
+To this end, we repeatedly read random locations in the matrix and measure the time required.
+
+```py
+@timeit
+def read_msg(file: str):
+    with LazyReader(file, unpacker=MsgspecUnpacker, cached=False) as reader:
+        for _ in range(repeat):
+            reader[random.randint(0, 4999)][random.randint(0, 4999)]
+
+
+@timeit
+def read_h5(file: str):
+    with h5py.File(file, "r") as f:
+        dataset = f["data"]
+        for _ in range(repeat):
+            dataset[random.randint(0, 4999)][random.randint(0, 4999)]`
+```
+
+![read 1k random elements](./read_time_log_1k.pdf)
+
+![read 10k random elements](./read_time_log_10k.pdf)
\ No newline at end of file
diff --git a/docs/read_time_log_10k.pdf b/docs/read_time_log_10k.pdf
new file mode 100644
index 0000000..753f09b
Binary files /dev/null and b/docs/read_time_log_10k.pdf differ
diff --git a/docs/read_time_log_1k.pdf b/docs/read_time_log_1k.pdf
new file mode 100644
index 0000000..aa799f5
Binary files /dev/null and b/docs/read_time_log_1k.pdf differ
diff --git a/docs/write_time.pdf b/docs/write_time.pdf
new file mode 100644
index 0000000..366e9f6
Binary files /dev/null and b/docs/write_time.pdf differ
diff --git a/tests/h5/generate.py b/tests/h5/generate.py
new file mode 100644
index 0000000..c1310e9
--- /dev/null
+++ b/tests/h5/generate.py
@@ -0,0 +1,113 @@
+import os
+from math import sqrt
+
+import h5py
+import matplotlib.pyplot as plt
+import numpy as np
+from timer import get_color, timeit
+
+from msglc import dump
+from msglc.config import configure
+
+
+@timeit
+def generate_msg(mat: np.ndarray, block: int):
+    configure(small_obj_optimization_threshold=2**block, numpy_encoder=False)  # 16KB
+    dump(f"data-{block}.msg", mat)
+
+
+def h5_name(block: int, **kwargs):
+    file_name = "data"
+    if kwargs:
+        file_name += "-compressed"
+    elif block > 0:
+        file_name += "-chunked"
+
+    if block > 0:
+        file_name += f"-{block}"
+
+    return f"{file_name}.h5"
+
+
+@timeit
+def generate_h5(mat: np.ndarray, block: int, **kwargs):
+    with h5py.File(h5_name(block, **kwargs), "w") as f:
+        if block > 0:
+            chunk_size = int(sqrt(2**block / 128))
+            kwargs["chunks"] = (chunk_size, chunk_size)
+        f.create_dataset("data", data=mat, **kwargs)
+
+
+def plot_write_time(write_time: dict):
+    x = []
+    y = []
+    color = []
+    for k, v in sorted(write_time.items()):
+        x.append(k)
+        y.append(v)
+        color.append(get_color(k))
+
+    plt.figure(figsize=(10, 10))
+    plt.bar(x, y, color=color)
+    plt.ylabel("time (s)")
+    plt.xlabel("format")
+    plt.xticks(rotation=-90)
+    plt.tight_layout()
+    plt.savefig("write_time.pdf")
+
+
+def plot_file_size(file_size: dict):
+    x = []
+    y = []
+    color = []
+    for k in sorted(file_size.keys()):
+        x.append(k)
+        y.append(os.path.getsize(k) / 2**20)
+        color.append(get_color(k))
+
+    plt.figure(figsize=(10, 10))
+    plt.bar(x, y, color=color)
+    plt.ylabel("size (MB)")
+    plt.xlabel("format")
+    plt.xticks(rotation=-90)
+    plt.tight_layout()
+    plt.savefig("file_size.pdf")
+
+
+def plot_memory_usage(write_memory: dict):
+    x = []
+    y = []
+    color = []
+    for k, v in sorted(write_memory.items()):
+        x.append(k)
+        y.append(v)
+        color.append(get_color(k))
+
+    plt.figure(figsize=(10, 10))
+    plt.bar(x, y, color=color)
+    plt.ylabel("write memory usage")
+    plt.xlabel("format")
+    plt.xticks(rotation=-90)
+    plt.tight_layout()
+    plt.savefig("write_memory.pdf")
+
+
+if __name__ == "__main__":
+    os.chdir(os.path.dirname(__file__))
+
+    collect = {}
+
+    mat = np.random.rand(5000, 5000)
+
+    collect[h5_name(-1)] = generate_h5(mat, -1)
+
+    for i in range(12, 23):
+        collect[h5_name(i)] = generate_h5(mat, i)
+        collect[h5_name(i, compression="gzip", compression_opts=9)] = generate_h5(
+            mat, i, compression="gzip", compression_opts=9
+        )
+        collect[f"data-{i}.msg"] = generate_msg(mat, i)
+
+    plot_write_time({k: v[0] for k, v in collect.items()})
+    plot_file_size(collect)
+    # plot_memory_usage({k: v[1] for k, v in collect.items()})
diff --git a/tests/h5/read.py b/tests/h5/read.py
new file mode 100644
index 0000000..6d2c9a9
--- /dev/null
+++ b/tests/h5/read.py
@@ -0,0 +1,83 @@
+import os
+import random
+
+import h5py
+import matplotlib.pyplot as plt
+from timer import get_color, timeit
+
+from msglc.reader import LazyReader
+from msglc.unpacker import MsgspecUnpacker
+
+repeat = 1000
+
+
+@timeit
+def read_msg(file: str):
+    with LazyReader(file, unpacker=MsgspecUnpacker, cached=False) as reader:
+        for _ in range(repeat):
+            reader[random.randint(0, 4999)][random.randint(0, 4999)]
+
+
+@timeit
+def read_h5(file: str):
+    with h5py.File(file, "r") as f:
+        dataset = f["data"]
+        for _ in range(repeat):
+            dataset[random.randint(0, 4999)][random.randint(0, 4999)]
+
+
+def plot_read_time(time: dict, logscale=False):
+    x = []
+    y = []
+    color = []
+    for k, v in sorted(time.items()):
+        x.append(k)
+        y.append(v)
+        color.append(get_color(k))
+
+    plt.figure(figsize=(10, 10))
+    plt.bar(x, y, color=color)
+    plt.ylabel("time")
+    plt.xlabel("format")
+    plt.xticks(rotation=-90)
+    if logscale:
+        plt.yscale("log")
+    plt.tight_layout()
+    plt.savefig(f"read_time{'_log' if logscale else ''}.pdf")
+
+
+def plot_memory_usage(memory: dict):
+    x = []
+    y = []
+    color = []
+    for k, v in sorted(memory.items()):
+        x.append(k)
+        y.append(v)
+        color.append(get_color(k))
+
+    plt.figure(figsize=(10, 10))
+    plt.bar(x, y, color=color)
+    plt.ylabel("memory usage")
+    plt.xlabel("format")
+    plt.xticks(rotation=-90)
+    plt.tight_layout()
+    plt.savefig("read_memory_usage.pdf")
+
+
+if __name__ == "__main__":
+    os.chdir(os.path.dirname(__file__))
+
+    collect = {}
+    for file in os.listdir():
+        if "data" not in file:
+            continue
+        if "msg" in file:
+            collect[file] = read_msg(file)
+        elif "h5" in file:
+            collect[file] = read_h5(file)
+
+    time_dict = {k: v[0] for k, v in collect.items()}
+    memory_dict = {k: v[1] for k, v in collect.items()}
+    plot_read_time(time_dict)
+    plot_read_time(time_dict, logscale=True)
+    # plot_memory_usage(memory_dict)
diff --git a/tests/h5/timer.py b/tests/h5/timer.py
new file mode 100644
index 0000000..ce5e760
--- /dev/null
+++ b/tests/h5/timer.py
@@ -0,0 +1,26 @@
+import time
+
+
+def timeit(func):
+    def wrapper(*args, **kwargs):
+        print(
+            f"Calling function '{func.__name__}' with arguments: args={[arg for arg in args if isinstance(arg, int | str)]}."
+        )
+        start_time = time.time()
+        func(*args, **kwargs)
+        end_time = time.time()
+        duration = end_time - start_time
+        print(f"Function '{func.__name__}' executed in {duration:.6f} seconds.")
+        return duration, 0
+
+    return wrapper
+
+
+def get_color(input: str):
+    if "msg" in input:
+        return "red"
+    if "compressed" in input:
+        return "blue"
+    if "h5" in input:
+        return "green"
+    return "black"