diff --git a/.jules/bolt.md b/.jules/bolt.md new file mode 100644 index 000000000..e775e5941 --- /dev/null +++ b/.jules/bolt.md @@ -0,0 +1,3 @@ +## 2026-03-XX - O(N^2) Bottleneck in Scale-Free Graph Generation +**Learning:** The naive implementation of Barabási–Albert preferential attachment by calculating degrees and sorting all candidate nodes on every step leads to an O(N^2) complexity, severely bottling large network generation (e.g., in `codomyrmex/meme/rhizome/network.py`). +**Action:** Replaced the sorting approach with the "roulette wheel" (node repetition array) method. Maintaining a flat list of node IDs repeated proportional to their degree allows O(m) weighted random choice per node, drastically reducing generation time for large N. Also discovered that care must be taken to safely handle edge cases like N < 5 or drawing more nodes than exist in the network. diff --git a/src/codomyrmex/meme/rhizome/network.py b/src/codomyrmex/meme/rhizome/network.py index d597d9607..89b92b278 100644 --- a/src/codomyrmex/meme/rhizome/network.py +++ b/src/codomyrmex/meme/rhizome/network.py @@ -34,7 +34,7 @@ def build_graph(num_nodes: int, topology: NetworkTopology) -> Graph: # Barabási–Albert preferential attachment m = 2 # New edges per node # Initial core - initial_count = max(m + 1, 5) + initial_count = min(max(m + 1, 5), num_nodes) for i in range(initial_count): for j in range(i + 1, initial_count): src, tgt = node_ids[i], node_ids[j] @@ -43,25 +43,32 @@ def build_graph(num_nodes: int, topology: NetworkTopology) -> Graph: g.nodes[src].connections.add(tgt) g.nodes[tgt].connections.add(src) + # Fast Preferential Attachment via node repetition list + # This replaces the O(N^2) sorting loop with O(m) weighted random choice + repeated_nodes = [] + for i in range(initial_count): + repeated_nodes.extend([node_ids[i]] * max(0, initial_count - 1)) + # Add remaining nodes for i in range(initial_count, num_nodes): targets = set() - # Probability proportional to degree - # Simplified: just pick from existing list weighted by degree - existing = node_ids[:i] - # Since strict PA is expensive O(N^2), use random sample approximation - # or just pick m nodes if small - candidates = random.sample(existing, min(len(existing), m * 2)) - # Sort by degree - candidates.sort(key=lambda nid: len(g.nodes[nid].connections), reverse=True) - targets = set(candidates[:m]) + # If not enough nodes to pick from, pick what we can + available_distinct = i + target_count = min(m, available_distinct) + + # Pick distinct targets + if target_count > 0 and len(repeated_nodes) > 0: + while len(targets) < target_count: + target = random.choice(repeated_nodes) + targets.add(target) + src = node_ids[i] for t in targets: - src, tgt = node_ids[i], t - edge = Edge(source=src, target=tgt) + edge = Edge(source=src, target=t) g.edges.append(edge) - g.nodes[src].connections.add(tgt) - g.nodes[tgt].connections.add(src) + g.nodes[src].connections.add(t) + g.nodes[t].connections.add(src) + repeated_nodes.extend([src, t]) return g