Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .jules/bolt.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,7 @@
## 2026-05-01 - Avoid High-Level Tensor Ops in Scalar Reductions
**Learning:** High-level `Tensor` operations like `sub()` and `mul()` trigger intermediate heap allocations for shape and stride metadata. When computing scalar reductions (like MSE, distances, or loss functions), using these operations introduces severe memory overhead inside hot loops. Attempting to use `.min()` length truncation as a safeguard is an anti-pattern as it masks shape mismatch errors.
**Action:** For scalar reductions, assert shape equality (`assert_eq!(a.shape, b.shape)`) and perform a single-pass iteration directly over the underlying borrowed data arrays (`a.data.borrow()`) to eliminate intermediate allocations and safely compute the result.

## 2026-05-02 - Avoid Redundant Allocations in Tensor Initialization
**Learning:** `Tensor::new()` takes a slice `&[f64]` and calls `.to_vec()`, causing a redundant heap allocation. Furthermore, using manual `for` loops to `push` into a `Vec<f64>` introduces bounds checking and loop overhead.
**Action:** When creating new Tensors from raw data loops, always use iterator chains (e.g., `data_a.iter().zip(data_b.iter()).map(...).collect()`) and pass the resulting vector directly to `Tensor::from_vec()`. This elides bounds checks, optimizes vector building, and completely eliminates the double allocation.
97 changes: 57 additions & 40 deletions crates/aether-core/src/ml/linalg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
// ═══════════════════════════════════════════════════════════════════════════════
//


#![allow(dead_code)]

#[cfg(feature = "alloc")]
Expand Down Expand Up @@ -56,58 +55,76 @@ impl LossConfig {
pub fn derivative(&self, y_true: &Tensor, y_pred: &Tensor) -> Tensor {
match self {
LossConfig::MSE => {
let diff = y_pred.sub(y_true);
let n = y_true.shape.iter().product::<usize>() as f64;
diff.scale(2.0 / n)
assert_eq!(y_true.shape, y_pred.shape, "Shape mismatch for MSE derivative");
let true_data = y_true.data.borrow();
let pred_data = y_pred.data.borrow();
let n = true_data.len() as f64;

let grad_data: Vec<f64> = pred_data
.iter()
.zip(true_data.iter())
.map(|(&p, &y)| (p - y) * 2.0 / n)
.collect();

Tensor::from_vec(grad_data, y_pred.shape.clone())
}
LossConfig::MAE => {
let diff = y_pred.sub(y_true);
let n = y_true.shape.iter().product::<usize>() as f64;
diff.map(|x| {
if x > 0.0 {
1.0 / n
} else if x < 0.0 {
-1.0 / n
} else {
0.0
}
})
assert_eq!(y_true.shape, y_pred.shape, "Shape mismatch for MAE derivative");
let true_data = y_true.data.borrow();
let pred_data = y_pred.data.borrow();
let n = true_data.len() as f64;

let grad_data: Vec<f64> = pred_data
.iter()
.zip(true_data.iter())
.map(|(&p, &y)| {
let diff = p - y;
if diff > 0.0 {
1.0 / n
} else if diff < 0.0 {
-1.0 / n
} else {
0.0
}
})
.collect();

Tensor::from_vec(grad_data, y_pred.shape.clone())
}
LossConfig::BinaryCrossEntropy => {
assert_eq!(y_true.shape, y_pred.shape, "Shape mismatch for BinaryCrossEntropy derivative");
// dL/dp = (1-y)/(1-p) - y/p
let true_data = y_true.data.borrow();
let pred_data = y_pred.data.borrow();
let n = true_data.len();
let mut grad_data = Vec::with_capacity(n); // Fixed: using Vec instead of let mut

for i in 0..n {
let y = true_data[i];
let p = pred_data[i].clamp(1e-7, 1.0 - 1e-7); // Avoid div by zero

let grad = -(y / p) + ((1.0 - y) / (1.0 - p));
grad_data.push(grad / n as f64);
}
Tensor::new(&grad_data, &y_pred.shape)
let n = true_data.len() as f64;

let grad_data: Vec<f64> = pred_data
.iter()
.zip(true_data.iter())
.map(|(&p, &y)| {
let p = p.clamp(1e-7, 1.0 - 1e-7); // Avoid div by zero
let grad = -(y / p) + ((1.0 - y) / (1.0 - p));
grad / n
})
.collect();

Tensor::from_vec(grad_data, y_pred.shape.clone())
}
LossConfig::Hinge => {
assert_eq!(y_true.shape, y_pred.shape, "Shape mismatch for Hinge derivative");
// L = max(0, 1 - y*p)
// dL/dp = -y if 1 - y*p > 0 else 0
let true_data = y_true.data.borrow();
let pred_data = y_pred.data.borrow();
let n = true_data.len();
let mut grad_data = Vec::with_capacity(n);

for i in 0..n {
let y = true_data[i];
let p = pred_data[i];

if 1.0 - y * p > 0.0 {
grad_data.push(-y / n as f64);
} else {
grad_data.push(0.0);
}
}
Tensor::new(&grad_data, &y_pred.shape)
let n = true_data.len() as f64;

let grad_data: Vec<f64> = pred_data
.iter()
.zip(true_data.iter())
.map(|(&p, &y)| if 1.0 - y * p > 0.0 { -y / n } else { 0.0 })
.collect();

Tensor::from_vec(grad_data, y_pred.shape.clone())
}
}
}
Expand Down
56 changes: 22 additions & 34 deletions crates/aether-core/src/ml/tensor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
// ═══════════════════════════════════════════════════════════════════════════════
//


#[cfg(feature = "alloc")]
use alloc::rc::Rc;
#[cfg(feature = "alloc")]
Expand Down Expand Up @@ -196,46 +195,40 @@ impl Tensor {
/// Element-wise addition
pub fn add(&self, other: &Tensor) -> Tensor {
assert_eq!(self.shape, other.shape, "Shape mismatch for add");
let total_size: usize = self.shape.iter().product();
let mut result_data = Vec::with_capacity(total_size);

let data_a = self.data.borrow();
let data_b = other.data.borrow();

for i in 0..total_size {
result_data.push(data_a[i] + data_b[i]);
}
let result_data: Vec<f64> = data_a
.iter()
.zip(data_b.iter())
.map(|(a, b)| a + b)
.collect();

Self::new(&result_data, &self.shape)
Self::from_vec(result_data, self.shape.clone())
}

/// Element-wise multiplication
pub fn mul(&self, other: &Tensor) -> Tensor {
assert_eq!(self.shape, other.shape, "Shape mismatch for mul");
let total_size: usize = self.shape.iter().product();
let mut result_data = Vec::with_capacity(total_size);

let data_a = self.data.borrow();
let data_b = other.data.borrow();

for i in 0..total_size {
result_data.push(data_a[i] * data_b[i]);
}
let result_data: Vec<f64> = data_a
.iter()
.zip(data_b.iter())
.map(|(a, b)| a * b)
.collect();

Self::new(&result_data, &self.shape)
Self::from_vec(result_data, self.shape.clone())
}

/// Scalar multiplication
pub fn scale(&self, s: f64) -> Tensor {
let total_size: usize = self.shape.iter().product();
let mut result_data = Vec::with_capacity(total_size);
let data = self.data.borrow();

for i in 0..total_size {
result_data.push(data[i] * s);
}
let result_data: Vec<f64> = data.iter().map(|&x| x * s).collect();

Self::new(&result_data, &self.shape)
Self::from_vec(result_data, self.shape.clone())
}

/// Transpose (2D)
Expand Down Expand Up @@ -267,32 +260,27 @@ impl Tensor {
/// Element-wise subtraction
pub fn sub(&self, other: &Tensor) -> Tensor {
assert_eq!(self.shape, other.shape, "Shape mismatch for sub");
let total_size: usize = self.shape.iter().product();
let mut result_data = Vec::with_capacity(total_size);

let data_a = self.data.borrow();
let data_b = other.data.borrow();

for i in 0..total_size {
result_data.push(data_a[i] - data_b[i]);
}
let result_data: Vec<f64> = data_a
.iter()
.zip(data_b.iter())
.map(|(a, b)| a - b)
.collect();

Self::new(&result_data, &self.shape)
Self::from_vec(result_data, self.shape.clone())
}

/// Element-wise mapping
pub fn map<F>(&self, f: F) -> Self
where
F: Fn(f64) -> f64,
{
let total_size: usize = self.shape.iter().product();
let mut result_data = Vec::with_capacity(total_size);
let data = self.data.borrow();

for i in 0..total_size {
result_data.push(f(data[i]));
}
let result_data: Vec<f64> = data.iter().copied().map(f).collect();

Self::new(&result_data, &self.shape)
Self::from_vec(result_data, self.shape.clone())
}
}