Merge branch '20171121-fsaad-bayeslite-refactoring-fixes'

Feras A Saad · Feras A Saad · commit 16abb210194b · 2017-12-17T11:39:54.000-05:00
diff --git a/src/crosscat/state.py b/src/crosscat/state.py
@@ -406,7 +406,7 @@ def logpdf(self, rowid, targets, constraints=None, inputs=None,
 
     def simulate(self, rowid, targets, constraints=None, inputs=None,
             N=None, accuracy=None):
-        assert isinstance(targets, list)
+        assert isinstance(targets, (list, tuple))
         assert inputs is None or isinstance(inputs, dict)
         self._validate_cgpm_query(rowid, targets, constraints)
         if not self._composite:
@@ -448,7 +448,7 @@ def _validate_cgpm_query(self, rowid, targets, constraints):
         # Is the rowid fresh?
         fresh = self.hypothetical(rowid)
         # Is the query simulate or logpdf?
-        simulate = isinstance(targets, list)
+        simulate = isinstance(targets, (list, tuple))
         # Disallow duplicated target cols.
         if simulate and len(set(targets)) != len(targets):
             raise ValueError('Columns in targets must be unique.')
diff --git a/src/mixtures/view.py b/src/mixtures/view.py
@@ -486,7 +486,7 @@ def _bulk_incorporate(self, dim):
 
     def _validate_cgpm_query(self, rowid, targets, constraints):
         # Is the query simulate or logpdf?
-        simulate = isinstance(targets, list)
+        simulate = isinstance(targets, (list, tuple))
         # Disallow duplicated target cols.
         if simulate and len(set(targets)) != len(targets):
             raise ValueError('Columns in targets must be unique.')
diff --git a/src/utils/validation.py b/src/utils/validation.py
@@ -106,10 +106,10 @@ def validate_crp_constrained_input(N, Cd, Ci, Rd, Ri):
 def partition_query_evidence(Z, query, evidence):
     """Returns queries[k], evidences[k] are queries, evidences for cluster k."""
     evidences = partition_dict(Z, evidence) if evidence is not None else dict()
-    if isinstance(query, list):
-        queries = partition_list(Z, query)
-    else:
+    if isinstance(query, dict):
         queries = partition_dict(Z, query)
+    else:
+        queries = partition_list(Z, query)
     return queries, evidences
 
 def partition_list(Z, L):
diff --git a/tests/test_lovecat.py b/tests/test_lovecat.py
@@ -31,12 +31,6 @@
 
 import numpy as np
 
-import bayeslite
-
-from bayeslite.read_csv import bayesdb_read_csv
-
-from crosscat.LocalEngine import LocalEngine
-
 from cgpm.crosscat import lovecat
 from cgpm.crosscat.engine import Engine
 from cgpm.crosscat.state import State
@@ -45,15 +39,6 @@
 from cgpm.utils import test as tu
 
 
-def nullify(bdb, table, null):
-    from bayeslite import bql_quote_name
-    qt = bql_quote_name(table)
-    for v in (r[1] for r in bdb.sql_execute('PRAGMA table_info(%s)' % (qt,))):
-        qv = bql_quote_name(v)
-        bdb.sql_execute(
-            'UPDATE %s SET %s = NULL WHERE %s = ?' % (qt, qv, qv),
-            (null,))
-
 # -- Global variables shared by all module functions.
 rng = gu.gen_rng(2)
 
@@ -100,43 +85,6 @@ def generate_dataset_2():
     return D
 
 
-# -------- Create a bdb instance with crosscat -------- #
-@contextlib.contextmanager
-def generate_bdb(T):
-    with bayeslite.bayesdb_open(':memory:') as bdb:
-        # Convert data into csv format and load it.
-        T_header = str.join(',', ['c%d' % (i,) for i in range(T.shape[1])])
-        T_data = str.join('\n', [str.join(',', map(str, row)) for row in T])
-        f = StringIO.StringIO('%s\n%s' % (T_header, T_data))
-        bayesdb_read_csv(bdb, 'data', f, header=True, create=True)
-        nullify(bdb, 'data', 'nan')
-
-        # Create a population, ignoring column 1.
-        bdb.execute('''
-            CREATE POPULATION data_p FOR data WITH SCHEMA(
-                IGNORE c1;
-                MODEL c0, c2, c4, c6, c7 AS NUMERICAL;
-                MODEL c3, c5 AS CATEGORICAL);
-        ''')
-
-        # Create a CrossCat metamodel.
-        bdb.execute('''
-            CREATE METAMODEL data_m FOR data_p USING crosscat(
-                c0 NUMERICAL,
-                c2 NUMERICAL,
-                c4 NUMERICAL,
-                c6 NUMERICAL,
-                c7 NUMERICAL,
-
-                c3 CATEGORICAL,
-                c5 CATEGORICAL);
-        ''')
-
-        bdb.execute('INITIALIZE 1 MODEL FOR data_m;')
-        bdb.execute('ANALYZE data_m FOR 2 ITERATION WAIT;')
-        yield bdb
-
-
 # -------- Create a cgpm.state crosscat instance -------- #
 def generate_state(T):
     # Remember that c1 is ignored.
@@ -156,69 +104,6 @@ def generate_state(T):
     return state
 
 
-def test_cgpm_lovecat_integration():
-    """A mix of unit and integration testing for lovecat analysis."""
-
-    T = generate_dataset()
-
-    with generate_bdb(T) as bdb:
-
-        # Retrieve the CrossCat metamodel instance.
-        metamodel = bdb.metamodels['crosscat']
-
-        # Retrieve the cgpm.state
-        state = generate_state(T)
-
-        # Assert that M_c_prime agrees with CrossCat M_c.
-        M_c_prime = lovecat._crosscat_M_c(state)
-        M_c = metamodel._crosscat_metadata(bdb, 1)
-
-        assert M_c['name_to_idx'] == M_c_prime['name_to_idx']
-        assert M_c['idx_to_name'] == M_c_prime['idx_to_name']
-        assert M_c['column_metadata'] == M_c_prime['column_metadata']
-
-        # Check that the converted datasets match.
-        bdb_data = metamodel._crosscat_data(bdb, 1, M_c)
-        cgpm_data = lovecat._crosscat_T(state, M_c_prime)
-        assert np.all(np.isclose(bdb_data, cgpm_data, atol=1e-2, equal_nan=True))
-
-        # X_L and X_D from the CrossCat state. Not sure what tests to write
-        # that acccess theta['X_L'] and theta['X_D'] directly.
-        theta = metamodel._crosscat_theta(bdb, 1, 0)
-
-        # Retrieve X_D and X_L from the cgpm.state, and check they can be used
-        # as arguments to LocalEngine.analyze.
-        X_D = lovecat._crosscat_X_D(state, M_c_prime)
-        X_L = lovecat._crosscat_X_L(state, M_c_prime, X_D)
-
-        LE = LocalEngine(seed=4)
-        start = time.time()
-        X_L_new, X_D_new = LE.analyze(
-            M_c_prime, lovecat._crosscat_T(state, M_c_prime),
-            X_L, X_D, 1, max_time=20, n_steps=100000000,
-            progress=lovecat._progress)
-        assert np.allclose(time.time() - start, 20, atol=2)
-
-        # This function call updates the cgpm.state internals to
-        # match X_L_new, X_D_new. Check it does not destory the cgpm.state and
-        # we can still run transitions.
-        lovecat._update_state(state, M_c, X_L_new, X_D_new)
-        state.transition(S=5)
-
-        # Invoke a lovecat transition directly through the cgpm.state,
-        # for 10000 iters with a 5 second timeout.
-        start = time.time()
-        state.transition_lovecat(S=7, N=100000)
-        # Give an extra second for function call overhead.
-        assert 7. <= time.time() - start <= 8.
-
-        # Now invoke by iterations only.
-        state.transition_lovecat(N=7, progress=False)
-
-        # Make sure we can now run regular cgpm.state transitions again.
-        state.transition(S=5)
-
-
 def test_lovecat_transition_columns():
     """Test transition_lovecat targeting specific rows and columns."""
     D = generate_dataset_2()