port docs and tests to new version

amakelov · Jul 2, 2024 · 3d152d1 · 3d152d1
1 parent 1fb3a40
commit 3d152d1
Show file tree

Hide file tree

Showing 25 changed files with 1,803 additions and 1,774 deletions.
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 <div align="center">
   <br>
-    <img src="../../assets/logo-no-background.png" height=128 alt="logo" align="center">
+    <img src="assets/logo-no-background.png" height=128 alt="logo" align="center">
   <br>
 <a href="#install">Install</a> |
 <a href="#quickstart">Quickstart</a> |

diff --git a/docs/docs/01_storage_and_ops.md b/docs/docs/01_storage_and_ops.md
@@ -12,11 +12,16 @@ it:
 
 
 ```python
-from mandala._next.imports import Storage
+from mandala.imports import Storage
+import os
+
+DB_PATH = 'my_persistent_storage.db'
+if os.path.exists(DB_PATH):
+    os.remove(DB_PATH)
 
 storage = Storage(
     # omit for an in-memory storage
-    db_path='my_persistent_storage.db', 
+    db_path=DB_PATH,
     # omit to disable automatic dependency tracking & versioning
     # use "__main__" to only track functions defined in the current session
     deps_path='__main__', 
@@ -28,7 +33,7 @@ storage = Storage(
 
 
 ```python
-from mandala._next.imports import op
+from mandala.imports import op
 
 @op 
 def sum_args(a, *args, b=1, **kwargs):

diff --git a/docs/docs/02_retracing.md b/docs/docs/02_retracing.md
@@ -36,7 +36,7 @@ Here's a small example of a machine learning pipeline:
 
 
 ```python
-from mandala._next.imports import *
+from mandala.imports import *
 from sklearn.datasets import load_digits
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.metrics import accuracy_score
@@ -69,7 +69,7 @@ with storage:
     Loading data
     Training model
     Getting accuracy
-    AtomRef(0.99, hid='d16...', cid='12a...')
+    AtomRef(1.0, hid='d16...', cid='b67...')
 
 
 ## Retracing your steps with memoization
@@ -89,8 +89,8 @@ with storage:
 ```
 
     AtomRef(hid='d0f...', cid='908...', in_memory=False) AtomRef(hid='f1a...', cid='69f...', in_memory=False)
-    AtomRef(hid='caf...', cid='f35...', in_memory=False)
-    AtomRef(hid='d16...', cid='12a...', in_memory=False)
+    AtomRef(hid='caf...', cid='c37...', in_memory=False)
+    AtomRef(hid='d16...', cid='b67...', in_memory=False)
 
 
 This puts all the `Ref`s along the way in your local variables (as if you've
@@ -105,7 +105,7 @@ storage.unwrap(acc)
 
 
 
-    0.99
+    1.0
 
 
 
@@ -127,17 +127,17 @@ with storage:
             print(acc)
 ```
 
-    AtomRef(hid='d16...', cid='12a...', in_memory=False)
+    AtomRef(hid='d16...', cid='b67...', in_memory=False)
     Training model
     Getting accuracy
-    AtomRef(0.99, hid='6fd...', cid='12a...')
+    AtomRef(1.0, hid='6fd...', cid='b67...')
     Loading data
     Training model
     Getting accuracy
-    AtomRef(0.82, hid='158...', cid='238...')
+    AtomRef(0.81, hid='158...', cid='5a4...')
     Training model
     Getting accuracy
-    AtomRef(0.9, hid='214...', cid='24c...')
+    AtomRef(0.84, hid='214...', cid='6c4...')
 
 
 Note that the first value of `acc` from the nested loop is with
@@ -165,8 +165,8 @@ with storage:
                 print(n_class, n_estimators, storage.unwrap(acc))
 ```
 
-    2 5 0.99
-    2 10 0.99
+    2 5 1.0
+    2 10 1.0
 
 
 ## Memoized code as storage interface
@@ -185,5 +185,5 @@ with storage:
             print(storage.unwrap(acc), storage.unwrap(model))
 ```
 
-    0.82 RandomForestClassifier(max_depth=2, n_estimators=5)
+    0.81 RandomForestClassifier(max_depth=2, n_estimators=5)
 
diff --git a/docs/docs/03_cf.md b/docs/docs/03_cf.md
@@ -50,7 +50,7 @@ from sklearn.datasets import load_digits
 from sklearn.model_selection import train_test_split
 from sklearn.ensemble import RandomForestClassifier
 
-from mandala._next.imports import *
+from mandala.imports import *
 try:
     import rich
     from rich import print as pprint
@@ -162,8 +162,8 @@ cf.draw(verbose=True)
 Computational graph:
     X_train@output_0, X_test@output_1, y_train@output_2, y_test@output_3 = 
 <span style="color: #800080; text-decoration-color: #800080; font-weight: bold">generate_dataset</span><span style="font-weight: bold">(</span><span style="color: #808000; text-decoration-color: #808000">random_seed</span>=<span style="color: #800080; text-decoration-color: #800080">random_seed</span><span style="font-weight: bold">)</span>
-    model@output_0 = <span style="color: #800080; text-decoration-color: #800080; font-weight: bold">train_model</span><span style="font-weight: bold">(</span><span style="color: #808000; text-decoration-color: #808000">X_train</span>=<span style="color: #800080; text-decoration-color: #800080">X_train</span>, <span style="color: #808000; text-decoration-color: #808000">n_estimators</span>=<span style="color: #800080; text-decoration-color: #800080">n_estimators</span>, <span style="color: #808000; text-decoration-color: #808000">y_train</span>=<span style="color: #800080; text-decoration-color: #800080">y_train</span><span style="font-weight: bold">)</span>
-    v@output_0 = <span style="color: #800080; text-decoration-color: #800080; font-weight: bold">eval_model</span><span style="font-weight: bold">(</span><span style="color: #808000; text-decoration-color: #808000">y_test</span>=<span style="color: #800080; text-decoration-color: #800080">y_test</span>, <span style="color: #808000; text-decoration-color: #808000">X_test</span>=<span style="color: #800080; text-decoration-color: #800080">X_test</span>, <span style="color: #808000; text-decoration-color: #808000">model</span>=<span style="color: #800080; text-decoration-color: #800080">model</span><span style="font-weight: bold">)</span>
+    model@output_0 = <span style="color: #800080; text-decoration-color: #800080; font-weight: bold">train_model</span><span style="font-weight: bold">(</span><span style="color: #808000; text-decoration-color: #808000">X_train</span>=<span style="color: #800080; text-decoration-color: #800080">X_train</span>, <span style="color: #808000; text-decoration-color: #808000">y_train</span>=<span style="color: #800080; text-decoration-color: #800080">y_train</span>, <span style="color: #808000; text-decoration-color: #808000">n_estimators</span>=<span style="color: #800080; text-decoration-color: #800080">n_estimators</span><span style="font-weight: bold">)</span>
+    v@output_0 = <span style="color: #800080; text-decoration-color: #800080; font-weight: bold">eval_model</span><span style="font-weight: bold">(</span><span style="color: #808000; text-decoration-color: #808000">X_test</span>=<span style="color: #800080; text-decoration-color: #800080">X_test</span>, <span style="color: #808000; text-decoration-color: #808000">y_test</span>=<span style="color: #800080; text-decoration-color: #800080">y_test</span>, <span style="color: #808000; text-decoration-color: #808000">model</span>=<span style="color: #800080; text-decoration-color: #800080">model</span><span style="font-weight: bold">)</span>
 </pre>
 
 
@@ -225,7 +225,7 @@ cf.draw(verbose=True)
     <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">5</span> <span style="color: #800080; text-decoration-color: #800080; font-weight: bold">variable</span><span style="font-weight: bold">(</span>s<span style="font-weight: bold">)</span> <span style="font-weight: bold">(</span><span style="color: #008080; text-decoration-color: #008080; font-weight: bold">14</span> unique refs<span style="font-weight: bold">)</span>
     <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">1</span> <span style="color: #800080; text-decoration-color: #800080; font-weight: bold">operation</span><span style="font-weight: bold">(</span>s<span style="font-weight: bold">)</span> <span style="font-weight: bold">(</span><span style="color: #008080; text-decoration-color: #008080; font-weight: bold">4</span> unique calls<span style="font-weight: bold">)</span>
 Computational graph:
-    output_0@output_0, output_1@output_1 = <span style="color: #800080; text-decoration-color: #800080; font-weight: bold">train_model</span><span style="font-weight: bold">(</span><span style="color: #808000; text-decoration-color: #808000">X_train</span>=<span style="color: #800080; text-decoration-color: #800080">X_train</span>, <span style="color: #808000; text-decoration-color: #808000">n_estimators</span>=<span style="color: #800080; text-decoration-color: #800080">n_estimators</span>, <span style="color: #808000; text-decoration-color: #808000">y_train</span>=<span style="color: #800080; text-decoration-color: #800080">y_train</span><span style="font-weight: bold">)</span>
+    output_0@output_0, output_1@output_1 = <span style="color: #800080; text-decoration-color: #800080; font-weight: bold">train_model</span><span style="font-weight: bold">(</span><span style="color: #808000; text-decoration-color: #808000">X_train</span>=<span style="color: #800080; text-decoration-color: #800080">X_train</span>, <span style="color: #808000; text-decoration-color: #808000">y_train</span>=<span style="color: #800080; text-decoration-color: #800080">y_train</span>, <span style="color: #808000; text-decoration-color: #808000">n_estimators</span>=<span style="color: #800080; text-decoration-color: #800080">n_estimators</span><span style="font-weight: bold">)</span>
 </pre>
 
 
@@ -246,13 +246,13 @@ print(cf.df(values='refs').to_markdown())
 ```
 
     Extracting tuples from the computation graph:
-        output_0@output_0, output_1@output_1 = train_model(n_estimators=n_estimators, y_train=y_train, X_train=X_train)
-    |    | y_train                                              | X_train                                              | n_estimators                                         | train_model                                   | output_1                                             | output_0                                             |
+        output_0@output_0, output_1@output_1 = train_model(y_train=y_train, X_train=X_train, n_estimators=n_estimators)
+    |    | X_train                                              | n_estimators                                         | y_train                                              | train_model                                   | output_0                                             | output_1                                             |
     |---:|:-----------------------------------------------------|:-----------------------------------------------------|:-----------------------------------------------------|:----------------------------------------------|:-----------------------------------------------------|:-----------------------------------------------------|
-    |  0 | AtomRef(hid='faf...', cid='83f...', in_memory=False) | AtomRef(hid='efa...', cid='a6d...', in_memory=False) | AtomRef(hid='9fd...', cid='4ac...', in_memory=False) | Call(train_model, cid='5af...', hid='514...') | AtomRef(hid='784...', cid='238...', in_memory=False) | AtomRef(hid='331...', cid='e64...', in_memory=False) |
-    |  1 | AtomRef(hid='faf...', cid='83f...', in_memory=False) | AtomRef(hid='efa...', cid='a6d...', in_memory=False) | AtomRef(hid='235...', cid='c04...', in_memory=False) | Call(train_model, cid='204...', hid='c55...') | AtomRef(hid='5b7...', cid='f0a...', in_memory=False) | AtomRef(hid='208...', cid='c75...', in_memory=False) |
-    |  2 | AtomRef(hid='faf...', cid='83f...', in_memory=False) | AtomRef(hid='efa...', cid='a6d...', in_memory=False) | AtomRef(hid='120...', cid='9bc...', in_memory=False) | Call(train_model, cid='3be...', hid='e60...') | AtomRef(hid='646...', cid='acb...', in_memory=False) | AtomRef(hid='522...', cid='d5a...', in_memory=False) |
-    |  3 | AtomRef(hid='faf...', cid='83f...', in_memory=False) | AtomRef(hid='efa...', cid='a6d...', in_memory=False) | AtomRef(hid='98c...', cid='29d...', in_memory=False) | Call(train_model, cid='c4f...', hid='5f7...') | AtomRef(hid='760...', cid='46b...', in_memory=False) | AtomRef(hid='b25...', cid='462...', in_memory=False) |
+    |  0 | AtomRef(hid='efa...', cid='a6d...', in_memory=False) | AtomRef(hid='120...', cid='9bc...', in_memory=False) | AtomRef(hid='faf...', cid='83f...', in_memory=False) | Call(train_model, cid='3be...', hid='e60...') | AtomRef(hid='522...', cid='d5a...', in_memory=False) | AtomRef(hid='646...', cid='acb...', in_memory=False) |
+    |  1 | AtomRef(hid='efa...', cid='a6d...', in_memory=False) | AtomRef(hid='9fd...', cid='4ac...', in_memory=False) | AtomRef(hid='faf...', cid='83f...', in_memory=False) | Call(train_model, cid='5af...', hid='514...') | AtomRef(hid='331...', cid='e64...', in_memory=False) | AtomRef(hid='784...', cid='238...', in_memory=False) |
+    |  2 | AtomRef(hid='efa...', cid='a6d...', in_memory=False) | AtomRef(hid='235...', cid='c04...', in_memory=False) | AtomRef(hid='faf...', cid='83f...', in_memory=False) | Call(train_model, cid='204...', hid='c55...') | AtomRef(hid='208...', cid='c75...', in_memory=False) | AtomRef(hid='5b7...', cid='f0a...', in_memory=False) |
+    |  3 | AtomRef(hid='efa...', cid='a6d...', in_memory=False) | AtomRef(hid='98c...', cid='29d...', in_memory=False) | AtomRef(hid='faf...', cid='83f...', in_memory=False) | Call(train_model, cid='c4f...', hid='5f7...') | AtomRef(hid='b25...', cid='462...', in_memory=False) | AtomRef(hid='760...', cid='46b...', in_memory=False) |
 
 
 ## 
@@ -496,14 +496,14 @@ print(cf.df().drop(columns=['X_train', 'y_train']).to_markdown())
 
     Extracting tuples from the computation graph:
         X_train@output_0, y_train@output_2 = generate_dataset(random_seed=random_seed)
-        output_0@output_0, output_1@output_1 = train_model(n_estimators=n_estimators, y_train=y_train, X_train=X_train)
+        output_0@output_0, output_1@output_1 = train_model(y_train=y_train, X_train=X_train, n_estimators=n_estimators)
         output_0_0@output_0 = eval_model(model=output_0)
-    |    |   random_seed | generate_dataset                                   |   n_estimators | train_model                                   |   output_1 | output_0                                             | eval_model                                   |   output_0_0 |
-    |---:|--------------:|:---------------------------------------------------|---------------:|:----------------------------------------------|-----------:|:-----------------------------------------------------|:---------------------------------------------|-------------:|
-    |  0 |            42 | Call(generate_dataset, cid='19a...', hid='c3f...') |             80 | Call(train_model, cid='3be...', hid='e60...') |       0.83 | RandomForestClassifier(max_depth=2, n_estimators=80) | Call(eval_model, cid='137...', hid='d32...') |         0.82 |
-    |  1 |            42 | Call(generate_dataset, cid='19a...', hid='c3f...') |             20 | Call(train_model, cid='204...', hid='c55...') |       0.8  | RandomForestClassifier(max_depth=2, n_estimators=20) |                                              |       nan    |
-    |  2 |            42 | Call(generate_dataset, cid='19a...', hid='c3f...') |             40 | Call(train_model, cid='5af...', hid='514...') |       0.82 | RandomForestClassifier(max_depth=2, n_estimators=40) | Call(eval_model, cid='38f...', hid='5d3...') |         0.81 |
-    |  3 |            42 | Call(generate_dataset, cid='19a...', hid='c3f...') |             10 | Call(train_model, cid='c4f...', hid='5f7...') |       0.74 | RandomForestClassifier(max_depth=2, n_estimators=10) |                                              |       nan    |
+    |    |   n_estimators |   random_seed | generate_dataset                                   | train_model                                   | output_0                                             | eval_model                                   |   output_0_0 |   output_1 |
+    |---:|---------------:|--------------:|:---------------------------------------------------|:----------------------------------------------|:-----------------------------------------------------|:---------------------------------------------|-------------:|-----------:|
+    |  0 |             10 |            42 | Call(generate_dataset, cid='19a...', hid='c3f...') | Call(train_model, cid='c4f...', hid='5f7...') | RandomForestClassifier(max_depth=2, n_estimators=10) |                                              |       nan    |       0.74 |
+    |  1 |             40 |            42 | Call(generate_dataset, cid='19a...', hid='c3f...') | Call(train_model, cid='5af...', hid='514...') | RandomForestClassifier(max_depth=2, n_estimators=40) | Call(eval_model, cid='38f...', hid='5d3...') |         0.81 |       0.82 |
+    |  2 |             20 |            42 | Call(generate_dataset, cid='19a...', hid='c3f...') | Call(train_model, cid='204...', hid='c55...') | RandomForestClassifier(max_depth=2, n_estimators=20) |                                              |       nan    |       0.8  |
+    |  3 |             80 |            42 | Call(generate_dataset, cid='19a...', hid='c3f...') | Call(train_model, cid='3be...', hid='e60...') | RandomForestClassifier(max_depth=2, n_estimators=80) | Call(eval_model, cid='137...', hid='d32...') |         0.82 |       0.83 |
 
 
 Importantly, we see that some computations only partially follow the full
@@ -554,7 +554,7 @@ cf.print_graph()
 ```
 
     X_train@output_0, y_train@output_2 = generate_dataset(random_seed=random_seed)
-    output_0@output_0, output_1@output_1 = train_model(X_train=X_train, n_estimators=n_estimators, y_train=y_train)
+    output_0@output_0, output_1@output_1 = train_model(X_train=X_train, y_train=y_train, n_estimators=n_estimators)
     output_0_0@output_0 = eval_model(model=output_0)