added timepoint decoding for precomputed ec correlationsg

lucywowen · lucywowen · commit 4914e5bd49cd · 2020-07-19T21:27:56.000-04:00
diff --git a/timecorr/helpers.py b/timecorr/helpers.py
@@ -472,6 +472,219 @@ def timepoint_decoder(data, mu=None, nfolds=2, level=0, cfun=isfc, weights_fun=l
 
     return results_pd
 
+def weighted_timepoint_decoder_ec(data, nfolds=2, level=0, optimize_levels=None, cfun=isfc, weights_fun=laplace_weights,
+                                        weights_params=laplace_params, combine=mean_combine, rfun=None, opt_init=None):
+    """
+    :param data: a list of number-of-observations by number-of-features matrices
+    :param nfolds: number of cross-validation folds (train using out-of-fold data;
+                   test using in-fold data)
+    :param level: integer or list of integers for levels to be evaluated (default:0)
+    :param cfun: function for transforming the group data (default: isfc)
+    :param weights_fun: used to compute per-timepoint weights for cfun; default: laplace_weights
+    :param  weights_params: parameters passed to weights_fun; default: laplace_params
+    :params combine: function for combining data within each group, or a list of such functions (default: mean_combine)
+    :param rfun: function for reducing output (default: None)
+    :return: results dictionary with the following keys:
+       'rank': mean percentile rank (across all timepoints and folds) in the
+               decoding distribution of the true timepoint
+       'accuracy': mean percent accuracy (across all timepoints and folds)
+       'error': mean estimation error (across all timepoints and folds) between
+                the decoded and actual window numbers, expressed as a percentage
+                of the total number of windows
+    """
+
+    if nfolds == 1:
+        sub_nfolds = 1
+        nfolds = 2
+        warnings.warn('When nfolds is set to one, the analysis will be circular.')
+    else:
+        sub_nfolds = 1
+
+    group_assignments = get_xval_assignments(data.shape[1], nfolds)
+
+
+    orig_level = level
+    orig_level = np.ravel(orig_level)
+
+    if type(level) is int:
+        level = np.arange(level + 1)
+
+    level = np.ravel(level)
+
+    assert type(level) is np.ndarray, 'level needs be an integer, list, or np.ndarray'
+    assert not np.any(level < 0), 'level cannot contain negative numbers'
+
+    if not np.all(np.arange(level.max()+1)==level):
+        level = np.arange(level.max()+1)
+
+    if callable(combine):
+        combine = [combine] * np.shape(level)[0]
+
+    combine = np.ravel(combine)
+
+    assert type(combine) is np.ndarray and type(combine[0]) is not np.str_, 'combine needs to be a function, list of ' \
+                                                                            'functions, or np.ndarray of functions'
+    assert len(level)==len(combine), 'combine length need to be the same as level if input is type np.ndarray or list'
+
+    if callable(cfun):
+        cfun = [cfun] * np.shape(level)[0]
+
+    cfun = np.ravel(cfun)
+
+    assert type(cfun) is np.ndarray and type(cfun[0]) is not np.str_, 'combine needs be a function, list of functions, ' \
+                                                                      'or np.ndarray of functions'
+    assert len(level)==len(cfun), 'cfun length need to be the same as level if input is type np.ndarray or list'
+
+
+    if type(rfun) not in [list, np.ndarray]:
+        rfun = [rfun] * np.shape(level)[0]
+
+    p_rfun = [None] * np.shape(level)[0]
+
+    assert len(level)==len(rfun), 'parameter lengths need to be the same as level if input is ' \
+                                                           'type np.ndarray or list'
+
+
+
+    results_pd = pd.DataFrame()
+
+
+    for i in range(0, nfolds):
+
+        sub_corrs = []
+        corrs = []
+
+        subgroup_assignments = get_xval_assignments(len(data[0][group_assignments == i]), nfolds)
+
+        in_data = [x for x in data[0][group_assignments == i]]
+        out_data = [x for x in data[0][group_assignments != i]]
+
+        for v in level:
+
+            if v==0:
+
+                in_smooth, out_smooth, in_raw, out_raw = folding_levels_ec(in_data, out_data, level=v, cfun=None, rfun=p_rfun,
+                                        combine=combine, weights_fun=weights_fun,
+                                        weights_params=weights_params)
+
+                next_corrs = (1 - sd.cdist(mean_combine(in_smooth), mean_combine(out_smooth),
+                                           'correlation'))
+                corrs.append(next_corrs)
+
+                for s in range(0, 1):
+
+                    sub_in_data = [x for x in data[0][group_assignments == i][subgroup_assignments==s]]
+                    sub_out_data = [x for x in data[0][group_assignments == i][subgroup_assignments!=s]]
+
+                    sub_in_smooth, sub_out_smooth, sub_in_raw, sub_out_raw = folding_levels_ec(sub_in_data, sub_out_data,
+                                                                                            level=v, cfun=None, rfun=p_rfun,
+                                                                                            combine=combine,                                                                                           weights_fun=weights_fun,
+                                                                                            weights_params=weights_params)
+                    next_subcorrs = (1 - sd.cdist(mean_combine(sub_in_smooth),
+                                                  mean_combine(sub_out_smooth), 'correlation'))
+                    sub_corrs.append(next_subcorrs)
+
+            elif v==1:
+
+                in_smooth, out_smooth, in_raw, out_raw = folding_levels_ec(in_raw, out_raw, level=v, cfun=cfun,
+                                   rfun=rfun, combine=combine,
+                                   weights_fun=weights_fun,
+                                   weights_params=weights_params)
+
+                next_corrs = (1 - sd.cdist(mean_combine(in_smooth), mean_combine(out_smooth),
+                                           'correlation'))
+                corrs.append(next_corrs)
+
+                for s in range(0, 1):
+
+
+                    sub_in_smooth, sub_out_smooth, sub_in_raw, sub_out_raw = folding_levels_ec(sub_in_raw,
+                                                                                                   sub_out_raw,
+                                                                                                   level=v,
+                                                                                                   cfun=cfun,
+                                                                                                   rfun=rfun,
+                                                                                                   combine=combine,
+                                                                                                   weights_fun=weights_fun,
+                                                                                                   weights_params=weights_params)
+                    next_subcorrs = (1 - sd.cdist(mean_combine(sub_in_smooth),
+                                                  mean_combine(sub_out_smooth), 'correlation'))
+                    sub_corrs.append(next_subcorrs)
+
+
+
+            else:
+
+                in_raw = [x for x in data[v-1][group_assignments == i]]
+                out_raw = [x for x in data[v-1][group_assignments != i]]
+
+                in_smooth, out_smooth, in_raw, out_raw = folding_levels_ec(in_raw, out_raw, level=v, cfun=cfun,
+                                   rfun=rfun, combine=combine,
+                                   weights_fun=weights_fun,
+                                   weights_params=weights_params)
+
+                next_corrs = (1 - sd.cdist(in_smooth, out_smooth, 'correlation'))
+                corrs.append(next_corrs)
+                print('corrs ' + str(v))
+
+                for s in range(0, 1):
+
+                    sub_in_raw = [x for x in data[v-1][group_assignments == i][subgroup_assignments==s]]
+                    sub_out_raw = [x for x in data[v-1][group_assignments == i][subgroup_assignments!=s]]
+
+                    sub_in_smooth, sub_out_smooth, sub_in_raw, sub_out_raw = folding_levels_ec(sub_in_raw,
+                                                                                                   sub_out_raw,
+                                                                                                   level=v,
+                                                                                                   cfun=cfun,
+                                                                                                   rfun=rfun,
+                                                                                                   combine=combine,
+                                                                                                   weights_fun=weights_fun,
+                                                                                                   weights_params=weights_params)
+                    print('sub corrs ' + str(v) + str(s))
+                    next_subcorrs = (1 - sd.cdist(sub_in_smooth, sub_out_smooth, 'correlation'))
+                    sub_corrs.append(next_subcorrs)
+
+
+        sub_corrs = np.array(sub_corrs)
+        corrs = np.array(corrs)
+
+        if sub_nfolds == 1:
+            sub_corrs = corrs
+
+        if not optimize_levels:
+            optimize_levels = range(v+1)
+
+        opt_over = []
+
+        for lev in optimize_levels:
+
+            opt_over.append(lev)
+
+            sub_out_corrs = sub_corrs[opt_over,:,:]
+            out_corrs = corrs[opt_over, :, :]
+
+            mu = optimize_weights(sub_out_corrs, opt_init)
+
+            w_corrs = weight_corrs(out_corrs, mu)
+
+            next_results_pd = decoder(w_corrs)
+            print(next_results_pd)
+            next_results_pd['level'] = lev
+            next_results_pd['folds'] = i
+
+            mu_pd = pd.DataFrame()
+
+            for c in opt_over:
+                mu_pd['level_' + str(c)] = [0]
+
+            mu_pd += mu
+
+            next_results_pd = pd.concat([next_results_pd, mu_pd], axis=1, join_axes=[next_results_pd.index])
+
+            results_pd = pd.concat([results_pd, next_results_pd])
+
+
+    return results_pd
+
 
 def weighted_timepoint_decoder(data, nfolds=2, level=0, optimize_levels=None, cfun=isfc, weights_fun=laplace_weights,
                                         weights_params=laplace_params, combine=mean_combine, rfun=None, opt_init=None):
@@ -575,7 +788,10 @@ def weighted_timepoint_decoder(data, nfolds=2, level=0, optimize_levels=None, cf
                                         combine=combine, weights_fun=weights_fun,
                                         weights_params=weights_params)
 
-                next_corrs = (1 - sd.cdist(mean_combine([x for x in in_raw]), mean_combine([x for x in out_raw]),
+                # next_corrs = (1 - sd.cdist(mean_combine([x for x in in_raw]), mean_combine([x for x in out_raw]),
+                #                            'correlation'))
+
+                next_corrs = (1 - sd.cdist(mean_combine(in_smooth), mean_combine(out_smooth),
                                            'correlation'))
                 corrs.append(next_corrs)
 
@@ -588,8 +804,10 @@ def weighted_timepoint_decoder(data, nfolds=2, level=0, optimize_levels=None, cf
                                                                                             level=v, cfun=None, rfun=p_rfun,
                                                                                             combine=combine,                                                                                           weights_fun=weights_fun,
                                                                                             weights_params=weights_params)
-                    next_subcorrs = (1 - sd.cdist(mean_combine([x for x in sub_in_raw]),
-                                                  mean_combine([x for x in sub_out_raw]), 'correlation'))
+                    # next_subcorrs = (1 - sd.cdist(mean_combine([x for x in sub_in_raw]),
+                    #                               mean_combine([x for x in sub_out_raw]), 'correlation'))
+                    next_subcorrs = (1 - sd.cdist(mean_combine(sub_in_smooth),
+                                                  mean_combine(sub_out_smooth), 'correlation'))
                     sub_corrs.append(next_subcorrs)
 
 
@@ -663,6 +881,42 @@ def weighted_timepoint_decoder(data, nfolds=2, level=0, optimize_levels=None, cf
     return results_pd
 
 
+def folding_levels_ec(infold_data, outfold_data, level=0, cfun=None, weights_fun=None, weights_params=None, combine=None,
+                   rfun=None):
+
+    from .timecorr import timecorr
+
+    if rfun is None:
+        rfun = [None] * np.shape(level)[0]
+
+    p_cfun = eval('autofc')
+
+    if level == 0:
+
+        in_fold_smooth = np.asarray(timecorr([x for x in infold_data], cfun=None,
+                                             rfun=rfun[level], combine=combine[level], weights_function=weights_fun,
+                                             weights_params=weights_params))
+        out_fold_smooth = np.asarray(timecorr([x for x in outfold_data], cfun=None,
+                                              rfun=rfun[level], combine=combine[level], weights_function=weights_fun,
+                                              weights_params=weights_params))
+
+        in_fold_raw = infold_data
+        out_fold_raw = outfold_data
+    else:
+
+        raw_rfun = [None] * (level + 1)
+
+        in_fold_smooth = np.asarray(timecorr(list(infold_data), cfun=cfun[level], rfun=rfun[level], combine=combine[level],
+                                                 weights_function=weights_fun, weights_params=weights_params))
+        out_fold_smooth = np.asarray(timecorr(list(outfold_data), cfun=cfun[level], rfun=rfun[level], combine=combine[level],
+                                                  weights_function=weights_fun, weights_params=weights_params))
+        in_fold_raw = infold_data
+        out_fold_raw = outfold_data
+
+    return in_fold_smooth, out_fold_smooth, in_fold_raw, out_fold_raw
+
+
+
 def pca_decoder(data, nfolds=2, dims=10, cfun=isfc, weights_fun=laplace_weights,
                                         weights_params=laplace_params, combine=mean_combine, rfun=None):
     """