teerthsharma · teerthsharma · Jun 7, 2026
diff --git a/.jules/bolt.md b/.jules/bolt.md
@@ -5,3 +5,7 @@
 ## 2026-05-01 - Avoid High-Level Tensor Ops in Scalar Reductions
 **Learning:** High-level `Tensor` operations like `sub()` and `mul()` trigger intermediate heap allocations for shape and stride metadata. When computing scalar reductions (like MSE, distances, or loss functions), using these operations introduces severe memory overhead inside hot loops. Attempting to use `.min()` length truncation as a safeguard is an anti-pattern as it masks shape mismatch errors.
 **Action:** For scalar reductions, assert shape equality (`assert_eq!(a.shape, b.shape)`) and perform a single-pass iteration directly over the underlying borrowed data arrays (`a.data.borrow()`) to eliminate intermediate allocations and safely compute the result.
+
+## 2026-05-02 - Avoid Redundant Allocations in Tensor Initialization
+**Learning:** `Tensor::new()` takes a slice `&[f64]` and calls `.to_vec()`, causing a redundant heap allocation. Furthermore, using manual `for` loops to `push` into a `Vec<f64>` introduces bounds checking and loop overhead.
+**Action:** When creating new Tensors from raw data loops, always use iterator chains (e.g., `data_a.iter().zip(data_b.iter()).map(...).collect()`) and pass the resulting vector directly to `Tensor::from_vec()`. This elides bounds checks, optimizes vector building, and completely eliminates the double allocation.
diff --git a/crates/aether-core/src/ml/linalg.rs b/crates/aether-core/src/ml/linalg.rs
@@ -14,7 +14,6 @@
 // ═══════════════════════════════════════════════════════════════════════════════
 //
 
-
 #![allow(dead_code)]
 
 #[cfg(feature = "alloc")]
@@ -56,58 +55,76 @@ impl LossConfig {
     pub fn derivative(&self, y_true: &Tensor, y_pred: &Tensor) -> Tensor {
         match self {
             LossConfig::MSE => {
-                let diff = y_pred.sub(y_true);
-                let n = y_true.shape.iter().product::<usize>() as f64;
-                diff.scale(2.0 / n)
+                assert_eq!(y_true.shape, y_pred.shape, "Shape mismatch for MSE derivative");
+                let true_data = y_true.data.borrow();
+                let pred_data = y_pred.data.borrow();
+                let n = true_data.len() as f64;
+
+                let grad_data: Vec<f64> = pred_data
+                    .iter()
+                    .zip(true_data.iter())
+                    .map(|(&p, &y)| (p - y) * 2.0 / n)
+                    .collect();
+
+                Tensor::from_vec(grad_data, y_pred.shape.clone())
             }
             LossConfig::MAE => {
-                let diff = y_pred.sub(y_true);
-                let n = y_true.shape.iter().product::<usize>() as f64;
-                diff.map(|x| {
-                    if x > 0.0 {
-                        1.0 / n
-                    } else if x < 0.0 {
-                        -1.0 / n
-                    } else {
-                        0.0
-                    }
-                })
+                assert_eq!(y_true.shape, y_pred.shape, "Shape mismatch for MAE derivative");
+                let true_data = y_true.data.borrow();
+                let pred_data = y_pred.data.borrow();
+                let n = true_data.len() as f64;
+
+                let grad_data: Vec<f64> = pred_data
+                    .iter()
+                    .zip(true_data.iter())
+                    .map(|(&p, &y)| {
+                        let diff = p - y;
+                        if diff > 0.0 {
+                            1.0 / n
+                        } else if diff < 0.0 {
+                            -1.0 / n
+                        } else {
+                            0.0
+                        }
+                    })
+                    .collect();
+
+                Tensor::from_vec(grad_data, y_pred.shape.clone())
             }
             LossConfig::BinaryCrossEntropy => {
+                assert_eq!(y_true.shape, y_pred.shape, "Shape mismatch for BinaryCrossEntropy derivative");
                 // dL/dp = (1-y)/(1-p) - y/p
                 let true_data = y_true.data.borrow();
                 let pred_data = y_pred.data.borrow();
-                let n = true_data.len();
-                let mut grad_data = Vec::with_capacity(n); // Fixed: using Vec instead of let mut
-
-                for i in 0..n {
-                    let y = true_data[i];
-                    let p = pred_data[i].clamp(1e-7, 1.0 - 1e-7); // Avoid div by zero
-
-                    let grad = -(y / p) + ((1.0 - y) / (1.0 - p));
-                    grad_data.push(grad / n as f64);
-                }
-                Tensor::new(&grad_data, &y_pred.shape)
+                let n = true_data.len() as f64;
+
+                let grad_data: Vec<f64> = pred_data
+                    .iter()
+                    .zip(true_data.iter())
+                    .map(|(&p, &y)| {
+                        let p = p.clamp(1e-7, 1.0 - 1e-7); // Avoid div by zero
+                        let grad = -(y / p) + ((1.0 - y) / (1.0 - p));
+                        grad / n
+                    })
+                    .collect();
+
+                Tensor::from_vec(grad_data, y_pred.shape.clone())
             }
             LossConfig::Hinge => {
+                assert_eq!(y_true.shape, y_pred.shape, "Shape mismatch for Hinge derivative");
                 // L = max(0, 1 - y*p)
                 // dL/dp = -y if 1 - y*p > 0 else 0
                 let true_data = y_true.data.borrow();
                 let pred_data = y_pred.data.borrow();
-                let n = true_data.len();
-                let mut grad_data = Vec::with_capacity(n);
-
-                for i in 0..n {
-                    let y = true_data[i];
-                    let p = pred_data[i];
-
-                    if 1.0 - y * p > 0.0 {
-                        grad_data.push(-y / n as f64);
-                    } else {
-                        grad_data.push(0.0);
-                    }
-                }
-                Tensor::new(&grad_data, &y_pred.shape)
+                let n = true_data.len() as f64;
+
+                let grad_data: Vec<f64> = pred_data
+                    .iter()
+                    .zip(true_data.iter())
+                    .map(|(&p, &y)| if 1.0 - y * p > 0.0 { -y / n } else { 0.0 })
+                    .collect();
+
+                Tensor::from_vec(grad_data, y_pred.shape.clone())
             }
         }
     }

diff --git a/crates/aether-core/src/ml/tensor.rs b/crates/aether-core/src/ml/tensor.rs
@@ -14,7 +14,6 @@
 // ═══════════════════════════════════════════════════════════════════════════════
 //
 
-
 #[cfg(feature = "alloc")]
 use alloc::rc::Rc;
 #[cfg(feature = "alloc")]
@@ -196,46 +195,40 @@ impl Tensor {
     /// Element-wise addition
     pub fn add(&self, other: &Tensor) -> Tensor {
         assert_eq!(self.shape, other.shape, "Shape mismatch for add");
-        let total_size: usize = self.shape.iter().product();
-        let mut result_data = Vec::with_capacity(total_size);
-
         let data_a = self.data.borrow();
         let data_b = other.data.borrow();
 
-        for i in 0..total_size {
-            result_data.push(data_a[i] + data_b[i]);
-        }
+        let result_data: Vec<f64> = data_a
+            .iter()
+            .zip(data_b.iter())
+            .map(|(a, b)| a + b)
+            .collect();
 
-        Self::new(&result_data, &self.shape)
+        Self::from_vec(result_data, self.shape.clone())
     }
 
     /// Element-wise multiplication
     pub fn mul(&self, other: &Tensor) -> Tensor {
         assert_eq!(self.shape, other.shape, "Shape mismatch for mul");
-        let total_size: usize = self.shape.iter().product();
-        let mut result_data = Vec::with_capacity(total_size);
-
         let data_a = self.data.borrow();
         let data_b = other.data.borrow();
 
-        for i in 0..total_size {
-            result_data.push(data_a[i] * data_b[i]);
-        }
+        let result_data: Vec<f64> = data_a
+            .iter()
+            .zip(data_b.iter())
+            .map(|(a, b)| a * b)
+            .collect();
 
-        Self::new(&result_data, &self.shape)
+        Self::from_vec(result_data, self.shape.clone())
     }
 
     /// Scalar multiplication
     pub fn scale(&self, s: f64) -> Tensor {
-        let total_size: usize = self.shape.iter().product();
-        let mut result_data = Vec::with_capacity(total_size);
         let data = self.data.borrow();
 
-        for i in 0..total_size {
-            result_data.push(data[i] * s);
-        }
+        let result_data: Vec<f64> = data.iter().map(|&x| x * s).collect();
 
-        Self::new(&result_data, &self.shape)
+        Self::from_vec(result_data, self.shape.clone())
     }
 
     /// Transpose (2D)
@@ -267,32 +260,27 @@ impl Tensor {
     /// Element-wise subtraction
     pub fn sub(&self, other: &Tensor) -> Tensor {
         assert_eq!(self.shape, other.shape, "Shape mismatch for sub");
-        let total_size: usize = self.shape.iter().product();
-        let mut result_data = Vec::with_capacity(total_size);
-
         let data_a = self.data.borrow();
         let data_b = other.data.borrow();
 
-        for i in 0..total_size {
-            result_data.push(data_a[i] - data_b[i]);
-        }
+        let result_data: Vec<f64> = data_a
+            .iter()
+            .zip(data_b.iter())
+            .map(|(a, b)| a - b)
+            .collect();
 
-        Self::new(&result_data, &self.shape)
+        Self::from_vec(result_data, self.shape.clone())
     }
 
     /// Element-wise mapping
     pub fn map<F>(&self, f: F) -> Self
     where
         F: Fn(f64) -> f64,
     {
-        let total_size: usize = self.shape.iter().product();
-        let mut result_data = Vec::with_capacity(total_size);
         let data = self.data.borrow();
 
-        for i in 0..total_size {
-            result_data.push(f(data[i]));
-        }
+        let result_data: Vec<f64> = data.iter().copied().map(f).collect();
 
-        Self::new(&result_data, &self.shape)
+        Self::from_vec(result_data, self.shape.clone())
     }
 }