ecmwf · sophie-xhonneux · Aug 19, 2025
diff --git a/config/default_config.yml b/config/default_config.yml
@@ -45,7 +45,7 @@ forecast_delta_hrs: 0
 forecast_steps: 0
 forecast_policy: null
 forecast_freeze_model: False
-forecast_att_dense_rate: 0.25
+forecast_att_dense_rate: 1.0
 fe_num_blocks: 0
 fe_num_heads: 16
 fe_dropout_rate: 0.1
@@ -136,4 +136,4 @@ run_id: ???
 # Parameters for logging/printing in the training loop
 train_log:
   # The period to log metrics (in number of batch steps)
-  log_interval: 20
+  log_interval: 20
diff --git a/src/weathergen/model/engines.py b/src/weathergen/model/engines.py
@@ -250,7 +250,7 @@ def create(self) -> torch.nn.ModuleList:
 
 
 class ForecastingEngine:
-    def __init__(self, cf: Config, num_healpix_cells: int) -> None:
+    def __init__(self, cf: Config, num_healpix_cells: int, aux_channels: int) -> None:
         """
         Initialize the ForecastingEngine with the configuration.
 
@@ -259,6 +259,7 @@ def __init__(self, cf: Config, num_healpix_cells: int) -> None:
         """
         self.cf = cf
         self.num_healpix_cells = num_healpix_cells
+        self.aux_channels = aux_channels
         self.fe_blocks = torch.nn.ModuleList()
 
     def create(self) -> torch.nn.ModuleList:
@@ -280,7 +281,7 @@ def create(self) -> torch.nn.ModuleList:
                             with_qk_lnorm=self.cf.fe_with_qk_lnorm,
                             with_flash=self.cf.with_flash_attention,
                             norm_type=self.cf.norm_type,
-                            dim_aux=1,
+                            dim_aux=self.aux_channels,
                             norm_eps=self.cf.norm_eps,
                             attention_dtype=get_dtype(self.cf.attention_dtype),
                         )
@@ -296,7 +297,7 @@ def create(self) -> torch.nn.ModuleList:
                             with_qk_lnorm=self.cf.fe_with_qk_lnorm,
                             with_flash=self.cf.with_flash_attention,
                             norm_type=self.cf.norm_type,
-                            dim_aux=1,
+                            dim_aux=self.aux_channels,
                             norm_eps=self.cf.norm_eps,
                             attention_dtype=get_dtype(self.cf.attention_dtype),
                         )
@@ -309,7 +310,7 @@ def create(self) -> torch.nn.ModuleList:
                         with_residual=True,
                         dropout_rate=self.cf.fe_dropout_rate,
                         norm_type=self.cf.norm_type,
-                        dim_aux=1,
+                        dim_aux=self.aux_channels,
                         norm_eps=self.cf.mlp_norm_eps,
                     )
                 )

diff --git a/src/weathergen/model/model.py b/src/weathergen/model/model.py
@@ -17,9 +17,11 @@
 import astropy_healpix.healpy
 import numpy as np
 import torch
+import torch.nn as nn
 from astropy_healpix import healpy
 from torch.utils.checkpoint import checkpoint
 
+from weathergen.model.positional_encoding import FourierEmbedding, PositionalEmbedding
 from weathergen.model.engines import (
     EmbeddingEngine,
     EnsPredictionHead,
@@ -249,7 +251,21 @@ def create(self) -> "Model":
                 "Empty forecast engine (fe_num_blocks = 0), but forecast_steps[i] > 0 for some i"
             )
 
-        self.fe_blocks = ForecastingEngine(cf, self.num_healpix_cells).create()
+        fe_aux_encoding_type = cf.get("fe_aux_encoding_type", "identity")
+        if fe_aux_encoding_type == "identity":
+            self.fe_aux_info = nn.Identity()
+            fe_aux_channels = 1
+        elif fe_aux_encoding_type == "positional":
+            fe_aux_channels = cf.get("fe_aux_channels", 64)
+            self.fe_aux_info = PositionalEmbedding(fe_aux_channels)
+        elif fe_aux_encoding_type == "fourier":
+            fe_aux_channels = cf.get("fe_aux_channels", 64)
+            self.fe_aux_info = FourierEmbedding(fe_aux_channels)
+        else:
+            raise NotImplemented(
+                f"{fe_aux_encoding_type} is not known, options are identity, positional, or fourier"
+            )
+        self.fe_blocks = ForecastingEngine(cf, self.num_healpix_cells, fe_aux_channels).create()
 
         ###############
         # embed coordinates yielding one query token for each target token
@@ -704,7 +720,8 @@ def forecast(self, model_params: ModelParams, tokens: torch.Tensor) -> torch.Ten
         """
 
         for it, block in enumerate(self.fe_blocks):
-            aux_info = torch.tensor([it], dtype=torch.float32, device="cuda")
+            aux_info = self.fe_aux_info(torch.tensor([it], dtype=torch.float32, device="cuda"))
+            # aux_info = torch.tensor([it], dtype=torch.float32, device="cuda")
             tokens = checkpoint(block, tokens, aux_info, use_reentrant=False)
 
         return tokens

diff --git a/src/weathergen/model/positional_encoding.py b/src/weathergen/model/positional_encoding.py
@@ -11,6 +11,39 @@
 
 import numpy as np
 import torch
+import torch.nn
+
+
+class PositionalEmbedding(torch.nn.Module):
+    def __init__(self, num_channels, max_positions=10000, endpoint=False):
+        super().__init__()
+        self.num_channels = num_channels
+        self.max_positions = max_positions
+        self.endpoint = endpoint
+
+    def forward(self, x):
+        freqs = torch.arange(
+            start=0, end=self.num_channels // 2, dtype=torch.float32, device=x.device
+        )
+        freqs = freqs / (self.num_channels // 2 - (1 if self.endpoint else 0))
+        freqs = (1 / self.max_positions) ** freqs
+        x = x.ger(freqs.to(x.dtype))
+        x = torch.cat([x.cos(), x.sin()], dim=1)
+        return x
+
+
+# ----------------------------------------------------------------------------
+
+
+class FourierEmbedding(torch.nn.Module):
+    def __init__(self, num_channels, scale=16):
+        super().__init__()
+        self.register_buffer("freqs", torch.randn(num_channels // 2) * scale)
+
+    def forward(self, x):
+        x = x.ger((2 * np.pi * self.freqs).to(x.dtype))
+        x = torch.cat([x.cos(), x.sin()], dim=1)
+        return x
 
 
 ####################################################################################################