bella/kernel.py at main · Recursive-Emergence/bella · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
"""BELLA Kernel — the ribosome.

Boots Bella from her gene. Routes claims to fields. Each field
has its own gene. The proven e103_proof firmware is the RNA.

Architecture:
  Gene (DNA):       B-nodes (constitution) + P-nodes per field
  Firmware (RNA):   proven prompt template from e103_proof
  Kernel (ribosome): routes, transcribes, parses, applies. Stable.
  LLM (substrate):   interprets transcript, produces actions.
"""

import asyncio, hashlib, json, os, re, sys
from dotenv import load_dotenv
load_dotenv()

sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'packages', 'here'))
from here.services.llm_gateway import LLMGateway, ModelTier
from here.fabric.grow_match import _get_embeddings, cosine_similarity


# ---------------------------------------------------------------------------
# Gene — one per field
# ---------------------------------------------------------------------------

class Gene:
    def __init__(self, name=''):
        self.name = name
        self.nodes = {}
        self.roots = []
        self.next_n = 1

    def add(self, n, desc, parent=None, rel='→', voice='', emb=None):
        self.nodes[n] = {
            'desc': desc[:60], 'parent': parent, 'rel': rel,
            'children': [], 'voices': set(), 'embedding': emb,
        }
        if voice:
            self.nodes[n]['voices'].add(voice)
        self.next_n = max(self.next_n, n + 1)
        if parent and parent in self.nodes:
            self.nodes[parent]['children'].append(n)
        else:
            self.roots.append(n)

    def confirm(self, n, voice=''):
        if n in self.nodes and voice:
            self.nodes[n]['voices'].add(voice)

    def amend(self, n, detail='', voice=''):
        if n in self.nodes:
            if voice: self.nodes[n]['voices'].add(voice)
            if detail:
                self.nodes[n]['desc'] = f"{self.nodes[n]['desc']}; {detail}"[:60]

    def render(self):
        lines = []
        def show(n, depth=0):
            nd = self.nodes[n]
            pad = '  ' * depth
            counter = '⊥ ' if nd['rel'] == '⊥' else ''
            v = len(nd['voices'])
            vc = f' [{v}v]' if v > 1 else ''
            lines.append(f"{pad}{counter}P{n}: \"{nd['desc']}\"{vc}")
            for cn in nd['children']:
                show(cn, depth + 1)
        for r in self.roots:
            show(r)
        return '\n'.join(lines)

    def root_embedding(self):
        """Average embedding of roots — the field's signature."""
        embs = [self.nodes[r]['embedding'] for r in self.roots
                if r in self.nodes and self.nodes[r].get('embedding')]
        if not embs:
            return None
        dim = len(embs[0])
        return [sum(e[i] for e in embs) / len(embs) for i in range(dim)]


# ---------------------------------------------------------------------------
# Bella — the organism
# ---------------------------------------------------------------------------

class Bella:
    def __init__(self):
        self.fields = {}       # name → Gene
        self.self_model = Gene('bella')  # B-nodes
        self._boot_constitution()

    def _boot_constitution(self):
        sm = self.self_model
        sm.add(1, "I construct beliefs from evidence")
        sm.add(2, "operations: ⊨ ⊨∧δ ⊢→ ⊢⊥ ⊢", 1)
        sm.add(3, "⊥ = explicit denial only", 1)
        sm.add(4, "one action per claim", 1)
        sm.add(5, "descriptions are 3-5 word facts", 1)

    def find_field(self, claim_emb, threshold=0.45):
        """Route claim to existing field by best node match."""
        best_sim, best_name = 0, None
        for name, gene in self.fields.items():
            for n, nd in gene.nodes.items():
                if nd.get('embedding'):
                    sim = cosine_similarity(claim_emb, nd['embedding'])
                    if sim > best_sim:
                        best_sim, best_name = sim, name
        if best_sim >= threshold:
            return best_name, best_sim
        return None, 0

    def create_field(self, name, n, desc, voice='', emb=None):
        """Birth a new field from the first claim."""
        gene = Gene(name)
        gene.add(n, desc, voice=voice, emb=emb)
        self.fields[name] = gene
        return gene

    def stats(self):
        total = sum(len(g.nodes) for g in self.fields.values())
        return f"{len(self.fields)} fields, {total} propositions"


# ---------------------------------------------------------------------------
# Firmware (RNA) — the proven e103_proof prompt template
# ---------------------------------------------------------------------------

FIRMWARE = '''KB:
{kb}

c: "{claim}"

ONE action only:
⊨ P<n>                     -- confirms existing (same fact)
⊨ P<n> ∧ δ"2-3 words"      -- confirms + adds detail
⊢ P{next_n} →P<n> "3-5 words" -- new supporting fact
⊢ P{next_n} ⊥P<n> "3-5 words" -- explicitly denies/rejects P<n>
⊢ P{next_n} "3-5 words"       -- new unrelated root

⊥ means EXPLICIT denial only. → means new supporting detail.
One line.'''


def transcribe(gene, claim_text):
    """Gene → RNA: render gene through firmware template."""
    kb = gene.render()
    if not kb.strip():
        return None
    return FIRMWARE.format(kb=kb, claim=claim_text[:200], next_n=gene.next_n)


# ---------------------------------------------------------------------------
# Embedding
# ---------------------------------------------------------------------------

EMB_CACHE = {}

def get_emb(text):
    if text in EMB_CACHE: return EMB_CACHE[text]
    e = _get_embeddings([text])
    r = e[0] if e and e[0] else None
    EMB_CACHE[text] = r
    return r


# ---------------------------------------------------------------------------
# LLM
# ---------------------------------------------------------------------------

CACHE = {}
CACHE_FILE = os.path.join(os.path.dirname(__file__), 'kernel_cache.json')
COST = 0.0
CALLS = 0


def load_cache():
    global CACHE
    if os.path.exists(CACHE_FILE):
        with open(CACHE_FILE) as f: CACHE = json.load(f)


def save_cache():
    with open(CACHE_FILE, 'w') as f: json.dump(CACHE, f, indent=2)


async def call_llm(prompt, gw):
    global COST, CALLS
    key = hashlib.md5(prompt.encode()).hexdigest()
    if key in CACHE: return CACHE[key]
    resp = await gw.complete(
        tier=ModelTier.POWERFUL,
        messages=[{"role": "user", "content": prompt}],
        temperature=0.0, max_tokens=40, purpose="bella")
    COST += getattr(resp, 'estimated_cost_usd', 0.0)
    CALLS += 1
    result = resp.content.strip()
    CACHE[key] = result
    save_cache()
    return result


# ---------------------------------------------------------------------------
# Parse + Apply (same as before, proven)
# ---------------------------------------------------------------------------

def parse(raw):
    line = raw.strip().split('\n')[0].strip()
    m = re.match(r'[⊨=]\s*P(\d+)\s*$', line)
    if m: return 'CONFIRM', int(m.group(1)), None, None
    m = re.match(r'[⊨=]\s*P(\d+)\s*[∧&]\s*[δd]?"?([^"]*)"?', line)
    if m: return 'AMEND', int(m.group(1)), None, m.group(2).strip()
    m = re.match(r'[⊢|-]\s*P(\d+)\s*[→>]\s*P(\d+)\s*"?([^"]*)"?', line)
    if m: return 'CHILD', int(m.group(2)), int(m.group(1)), m.group(3).strip()
    m = re.match(r'[⊢|-]\s*P(\d+)\s*[⊥x]\s*P(\d+)\s*"?([^"]*)"?', line)
    if m: return 'COUNTER', int(m.group(2)), int(m.group(1)), m.group(3).strip()
    m = re.match(r'[⊢|-]\s*P(\d+)\s+"?([^"]+)"?', line)
    if m and '→' not in line and '⊥' not in line:
        return 'ROOT', None, int(m.group(1)), m.group(2).strip()
    return 'ROOT', None, None, raw[:40]


def apply(gene, action, voice='', emb=None):
    act, target, new_n, desc = action
    if act == 'CONFIRM' and target in gene.nodes:
        gene.confirm(target, voice)
        return f'⊨ P{target}'
    if act == 'AMEND' and target in gene.nodes:
        gene.amend(target, desc, voice)
        return f'⊨ P{target} ∧ δ"{desc}"'
    if act in ('CHILD', 'COUNTER') and target in gene.nodes:
        n = new_n or gene.next_n
        rel = '⊥' if act == 'COUNTER' else '→'
        gene.add(n, desc or '?', target, rel, voice, emb)
        return f'⊢ P{n} {"⊥" if act == "COUNTER" else "→"}P{target} "{desc}"'
    if act == 'ROOT':
        n = new_n or gene.next_n
        gene.add(n, desc or '?', voice=voice, emb=emb)
        return f'⊢ P{n} "{desc}"'
    return f'? {act}'


# ---------------------------------------------------------------------------
# Process — the full cycle
# ---------------------------------------------------------------------------

AUTO_CONFIRM = 0.88
FIELD_THRESHOLD = 0.45


async def process(claim, bella, gw):
    """One claim through the living system."""
    text = claim['text']
    voice = claim.get('voice', '')
    emb = get_emb(text)

    # Route to field
    field_name, sim = bella.find_field(emb)

    if field_name:
        gene = bella.fields[field_name]

        # Auto-confirm: very high similarity to existing node
        best_sim, best_n = 0, None
        for n, nd in gene.nodes.items():
            if nd.get('embedding'):
                s = cosine_similarity(emb, nd['embedding'])
                if s > best_sim:
                    best_sim, best_n = s, n
        if best_sim >= AUTO_CONFIRM and best_n:
            gene.confirm(best_n, voice)
            return field_name, f'⊨ P{best_n} (auto)'

        # Deliberate: transcribe and call LLM
        transcript = transcribe(gene, text)
        if transcript:
            raw = await call_llm(transcript, gw)
            action = parse(raw)
            result = apply(gene, action, voice, emb)
            return field_name, result

    # No field matches — birth new field
    # Use first ~30 chars as field name seed
    words = text.split()[:4]
    name = '_'.join(w.lower().strip('.,;:') for w in words if w.isalpha())[:30]
    if not name:
        name = f'field_{len(bella.fields)}'
    desc = text[:60]
    n = 1
    gene = bella.create_field(name, n, desc, voice, emb)
    return name, f'⊢ P1 "{desc[:40]}" (new field: {name})'


# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------

async def main():
    bella = Bella()

    print("BELLA booted.\n")
    print(f"Constitution: {len(bella.self_model.nodes)} B-nodes")
    print(bella.self_model.render())
    print()

    gw = LLMGateway()
    load_cache()

    # Load claims
    data = os.path.join(os.path.dirname(__file__), '..', 'experiments', 'data')
    f35 = json.load(open(os.path.join(data, 'f35_claims.json')))
    eps = json.load(open(os.path.join(data, 'epstein_death_claims.json')))

    # Interleave
    claims = []
    for i in range(max(len(f35), len(eps))):
        if i < len(f35): claims.append(('F35', f35[i]))
        if i < len(eps): claims.append(('EPS', eps[i]))

    stop = int(os.environ.get('STOP_AFTER', 40))
    print(f"Processing {min(stop, len(claims))} interleaved claims...\n")

    for i, (tag, c) in enumerate(claims[:stop]):
        field_name, result = await process(c, bella, gw)
        print(f"  [{tag}] → {field_name}: {result}")

    # Final state
    print(f"\n{'='*60}")
    print(f"Bella: {bella.stats()}\n")
    for name, gene in bella.fields.items():
        nv = sum(len(nd['voices']) for nd in gene.nodes.values())
        print(f"  {name}: {len(gene.nodes)} props, {len(gene.roots)} roots, {nv} total voices")
        print(f"  {gene.render()}")
        print()

    print(f"{CALLS} LLM calls, ${COST:.3f}")


if __name__ == '__main__':
    asyncio.run(main())