From e90f70bd8e5eb81ad7990817965b0f2f90f2aca8 Mon Sep 17 00:00:00 2001 From: KevinWang905 <46271360+KevinWang905@users.noreply.github.com> Date: Thu, 18 Nov 2021 11:40:41 -0500 Subject: [PATCH 01/16] Add files via upload --- proglearn/forest.py | 234 ++++++++++++++++++++++++ proglearn/progressive_learner.py | 296 ++++++++++++++++++++++++++++++- proglearn/transformers.py | 8 + 3 files changed, 537 insertions(+), 1 deletion(-) diff --git a/proglearn/forest.py b/proglearn/forest.py index 1d642ffbca..4601dd41a3 100644 --- a/proglearn/forest.py +++ b/proglearn/forest.py @@ -328,6 +328,171 @@ def predict(self, X, task_id): """ return super().predict(check_array(X), task_id) +class LifelongClassificationForestStream(ClassificationProgressiveLearner): + + def __init__( + self, + default_n_estimators=100, + default_tree_construction_proportion=0.67, + default_kappa=np.inf, + default_max_depth=30, + ): + super().__init__( + default_transformer_class=TreeClassificationTransformer, + default_transformer_kwargs={}, + default_voter_class=TreeClassificationVoter, + default_voter_kwargs={"kappa": default_kappa}, + default_decider_class=SimpleArgmaxAverage, + default_decider_kwargs={}, + ) + + self.default_n_estimators = default_n_estimators + self.default_tree_construction_proportion = default_tree_construction_proportion + self.default_kappa = default_kappa + self.default_max_depth = default_max_depth + + def update_task( + self, + X, + y, + task_id=None, + n_estimators="default", + tree_construction_proportion="default", + kappa="default", + max_depth="default", + classes = None + ): + """ + adds a task with id task_id, max tree depth max_depth, given input data matrix X + and output data matrix y, to the Lifelong Classification Forest. Also splits + data for training and voting based on tree_construction_proportion and uses the + value of kappa to determine whether the learner will have + finite sample correction. + + Parameters + ---------- + X : ndarray + The input data matrix. + + y : ndarray + The output (response) data matrix. + + task_id : obj, default=None + The id corresponding to the task being added. + + n_estimators : int or str, default='default' + The number of trees used for the given task. + + tree_construction_proportion : int or str, default='default' + The proportions of the input data set aside to train each decision + tree. The remainder of the data is used to fill in voting posteriors. + The default is used if 'default' is provided. + + kappa : float or str, default='default' + The coefficient for finite sample correction. + The default is used if 'default' is provided. + + max_depth : int or str, default='default' + The maximum depth of a tree in the Lifelong Classification Forest. + The default is used if 'default' is provided. + + Returns + ------- + self : LifelongClassificationForest + The object itself. + """ + if n_estimators == "default": + n_estimators = self.default_n_estimators + if tree_construction_proportion == "default": + tree_construction_proportion = self.default_tree_construction_proportion + if kappa == "default": + kappa = self.default_kappa + if max_depth == "default": + max_depth = self.default_max_depth + + X, y = check_X_y(X, y) + return super().update_task( + X, + y, + classes = classes, + task_id=task_id, + transformer_voter_decider_split=[ + tree_construction_proportion, + 1 - tree_construction_proportion, + 0, + ], + num_transformers=n_estimators, + transformer_kwargs={"kwargs": {"max_depth": max_depth}}, + voter_kwargs={ + "classes": np.unique(y), + "kappa": kappa, + }, + decider_kwargs={"classes": np.unique(y)}, + ) + + def update_transformer( + self, + X, + y, + classes = None, + transformer_id=None, + n_estimators="default", + max_depth="default", + ): + + if n_estimators == "default": + n_estimators = self.default_n_estimators + if max_depth == "default": + max_depth = self.default_max_depth + + X, y = check_X_y(X, y) + return super().update_transformer( + X, + y, + classes = classes, + transformer_kwargs={"kwargs": {"max_depth": max_depth}}, + transformer_id=transformer_id, + num_transformers=n_estimators, + ) + + def predict_proba(self, X, task_id): + """ + estimates class posteriors under task_id for each example in input data X. + + Parameters + ---------- + X : ndarray + The input data matrix. + + task_id: + The id corresponding to the task being mapped to. + + Returns + ------- + y_proba_hat : ndarray of shape [n_samples, n_classes] + posteriors per example + """ + return super().predict_proba(check_array(X), task_id) + + def predict(self, X, task_id): + """ + predicts class labels under task_id for each example in input data X. + + Parameters + ---------- + X : ndarray + The input data matrix. + + task_id : obj + The id corresponding to the task being mapped to. + + Returns + ------- + y_hat : ndarray of shape [n_samples] + predicted class label per example + """ + return super().predict(check_array(X), task_id) + class UncertaintyForest(LifelongClassificationForest): """ @@ -450,3 +615,72 @@ def predict(self, X): predicted class label per example """ return super().predict(X, 0) + +class UncertaintyForestStream(LifelongClassificationForest): + + + def __init__( + self, + n_estimators=100, + kappa=np.inf, + max_depth=30, + tree_construction_proportion=0.67, + ): + super().__init__( + default_n_estimators=n_estimators, + default_tree_construction_proportion=tree_construction_proportion, + default_kappa=kappa, + default_max_depth=max_depth, + ) + + def fit(self, X, y, classes): + """ + fits forest to data X with labels y + + Parameters + ---------- + X : array of shape [n_samples, n_features] + The data that will be trained on + + y : array of shape [n_samples] + The label for cluster membership of the given data + + Returns + ------- + self : UncertaintyForest + The object itself. + """ + + return super().add_task(X, y, task_id=0, classes = classes) + + def predict_proba(self, X): + """ + estimates class posteriors for each example in input data X. + + Parameters + ---------- + X : array of shape [n_samples, n_features] + The data whose posteriors we are estimating. + + Returns + ------- + y_proba_hat : ndarray of shape [n_samples, n_classes] + posteriors per example + """ + return super().predict_proba(X, 0) + + def predict(self, X): + """ + predicts class labels for each example in input data X. + + Parameters + ---------- + X : array of shape [n_samples, n_features] + The data on which we are performing inference. + + Returns + ------- + y_hat : ndarray of shape [n_samples] + predicted class label per example + """ + return super().predict(X, 0) diff --git a/proglearn/progressive_learner.py b/proglearn/progressive_learner.py index e4d5cd4aab..7183721642 100755 --- a/proglearn/progressive_learner.py +++ b/proglearn/progressive_learner.py @@ -173,6 +173,9 @@ def _append_transformer(self, transformer_id, transformer): else: self.transformer_id_to_transformers[transformer_id] = [transformer] + def _replace_transformer(self, transformer_id, transformer): + self.transformer_id_to_transformers[transformer_id] = [transformer] + def _append_voter(self, transformer_id, task_id, voter): if task_id in list(self.task_id_to_transformer_id_to_voters.keys()): if transformer_id in list( @@ -218,10 +221,12 @@ def _bifurcate_decider_idxs(self, ra, transformer_voter_decider_split): ) return first_idx, second_idx - def _add_transformer( + + def _update_transformer( self, X, y, + classes = None, transformer_data_proportion, transformer_voter_data_idx, transformer_id, @@ -242,6 +247,142 @@ def _add_transformer( else transformer_voter_data_idx ) + if transformer_id not in list(self.task_id_to_X.keys()): + self.transformer_id_to_X[transformer_id] = X + if transformer_id not in list(self.task_id_to_y.keys()): + self.transformer_id_to_y[transformer_id] = y + + # train new transformers + for transformer_num in range(num_transformers): + if X is not None: + n = len(X) + elif y is not None: + n = len(y) + else: + n = None + if n is not None: + transformer_data_idx = np.random.choice( + transformer_voter_data_idx, + int(transformer_data_proportion * n), + replace=False, + ) + else: + transformer_data_idx = None + self.set_updated_transformer( + transformer_id=transformer_id, + transformer_data_idx=transformer_data_idx, + transformer_class=transformer_class, + transformer_kwargs=transformer_kwargs, + classes = None + ) + voter_data_idx = np.delete(transformer_voter_data_idx, transformer_data_idx) + self._append_voter_data_idx( + task_id=transformer_id, + bag_id=transformer_num, + voter_data_idx=voter_data_idx, + ) + + # train voters and deciders from new transformer to previous tasks + for existing_task_id in np.intersect1d(backward_task_ids, self.get_task_ids()): + self.set_voter(transformer_id=transformer_id, task_id=existing_task_id) + self.set_decider( + task_id=existing_task_id, + transformer_ids=list( + self.task_id_to_transformer_id_to_voters[existing_task_id].keys() + ), + ) + + return self + + def set_updated_transformer( + self, + transformer_id=None, + transformer=None, + transformer_data_idx=None, + transformer_class=None, + transformer_kwargs=None, + classes = None + ): + + if transformer_id is None: + transformer_id = len(self.get_transformer_ids()) + + X = ( + self.transformer_id_to_X[transformer_id] + if transformer_id in list(self.transformer_id_to_X.keys()) + else self.task_id_to_X[transformer_id] + ) + y = ( + self.transformer_id_to_y[transformer_id] + if transformer_id in list(self.transformer_id_to_y.keys()) + else self.task_id_to_y[transformer_id] + ) + if transformer_data_idx is not None: + X, y = X[transformer_data_idx], y[transformer_data_idx] + + if X is None and y is None: + if transformer.is_fitted(): + self._append_transformer(transformer_id, transformer) + else: + raise ValueError( + "transformer_class is not fitted and X is None and y is None." + ) + return + + # Type check X + + if transformer_class is None: + if self.default_transformer_class is None: + raise ValueError( + "transformer_class is None and 'default_transformer_class' is None." + ) + else: + transformer_class = self.default_transformer_class + + if transformer_kwargs is None: + if self.default_transformer_kwargs is None: + raise ValueError( + """transformer_kwargs is None and + 'default_transformer_kwargs' is None.""" + ) + else: + transformer_kwargs = self.default_transformer_kwargs + + # Fit transformer and new voter + if y is None: + self._replace_transformer( + transformer_id, transformer_class(**transformer_kwargs).partial_fit(X, classes) + ) + else: + # Type check y + self._append_transformer( + transformer_id, transformer_class(**transformer_kwargs).partial_fit(X, y, classes) + ) + + def _add_transformer( + self, + X, + y, + transformer_data_proportion, + transformer_voter_data_idx, + transformer_id, + num_transformers, + transformer_class, + transformer_kwargs, + backward_task_ids + ): + if transformer_id is None: + transformer_id = len(self.get_transformer_ids()) + + backward_task_ids = ( + backward_task_ids if backward_task_ids is not None else self.get_task_ids() + ) + transformer_voter_data_idx = ( + range(len(X)) + if transformer_voter_data_idx is None + else transformer_voter_data_idx + ) + if transformer_id not in list(self.task_id_to_X.keys()): self.transformer_id_to_X[transformer_id] = X if transformer_id not in list(self.task_id_to_y.keys()): @@ -472,6 +613,31 @@ def set_decider( self.task_id_to_decider_class[task_id] = decider_class self.task_id_to_decider_kwargs[task_id] = decider_kwargs + def update_transformer( + self, + X, + y, + classese = None, + transformer_data_proportion=1.0, + transformer_voter_data_idx=None, + transformer_id=None, + num_transformers=1, + transformer_class=None, + transformer_kwargs=None, + backward_task_ids=None, + ): + return self._update_transformer( + X, + y, + classes = classes, + transformer_data_proportion=transformer_data_proportion, + transformer_voter_data_idx=transformer_voter_data_idx, + transformer_id=transformer_id, + num_transformers=num_transformers, + transformer_class=transformer_class, + transformer_kwargs=transformer_kwargs, + backward_task_ids=backward_task_ids, + ) def add_transformer( self, X, @@ -699,6 +865,134 @@ def add_task( return self + def update_task( + self, + X, + y, + classes = None, + task_id=None, + transformer_voter_decider_split=[0.67, 0.33, 0], + num_transformers=1, + transformer_class=None, + transformer_kwargs=None, + voter_class=None, + voter_kwargs=None, + decider_class=None, + decider_kwargs=None, + backward_task_ids=None, + forward_transformer_ids=None, + ): + """ + Adds a task to the progressive learner. Optionally trains one or more + transformer from the input data (if num_transformers > 0), adds voters + and deciders from this/these new transformer(s) to the tasks specified + in backward_task_ids, and adds voters and deciders from the transformers + specified in forward_transformer_ids (and from the newly added transformer(s) + corresponding to the input task_id if num_transformers > 0) to the + new task_id. + + Parameters + ---------- + X : ndarray + Input data matrix. + + y : ndarray + Output (response) data matrix. + + task_id : obj, default=None + The id corresponding to the task being added. + + transformer_voter_decider_split : ndarray, default=[0.67, 0.33, 0] + A 1d array of length 3. The 0th index indicates the proportions of the input + data used to train the (optional) newly added transformer(s) corresponding to + the task_id provided in this function call. The 1st index indicates the proportion of + the data set aside to train the voter(s) from these (optional) newly added + transformer(s) to the task_id provided in this function call. For all other tasks, + the aggregate transformer and voter data pairs from those tasks are used to train + the voter(s) from these (optional) newly added transformer(s) to those tasks; + for all other transformers, the aggregate transformer and voter data provided in + this function call is used to train the voter(s) from those transformers to + the task_id provided in this function call. The 2nd index indicates the + proportion of the data set aside to train the decider - these indices are saved + internally and will be used to train all further deciders corresponding to this + task for all function calls. + + num_transformers : int, default=1 + The number of transformers to add corresponding to the given inputs. + + transformer_class : BaseTransformer, default=None + The class of the transformer(s) being added. + + transformer_kwargs : dict, default=None + A dictionary with keys of type string and values of type obj corresponding + to the given string kwarg. This determines the kwargs of the transformer(s) + being added. + + voter_class : BaseVoter, default=None + The class of the voter(s) being added. + + voter_kwargs : dict, default=None + A dictionary with keys of type string and values of type obj corresponding + to the given string kwarg. This determines the kwargs of the voter(s) + being added. + + decider_class : BaseDecider, default=None + The class of the decider(s) being added. + + decider_kwargs : dict, default=None + A dictionary with keys of type string and values of type obj corresponding + to the given string kwarg. This determines the kwargs of the decider(s) + being added. + + backward_task_ids : ndarray, default=None + A 1d array of type obj used to specify to which existing task voters and deciders + will be trained from the transformer(s) being added. + + foward_transformer_ids : ndarray, default=None + A 1d array of type obj used to specify from which existing transformer(s) voters and + deciders will be trained to the new task. If num_transformers > 0, the input task_id + corresponding to the task being added is automatically appended to this 1d array. + + Returns + ------- + self : ProgressiveLearner + The object itself. + """ + if task_id is None: + task_id = max( + len(self.get_transformer_ids()), len(self.get_task_ids()) + ) # come up with something that has fewer collisions + + self.task_id_to_X[task_id] = X + self.task_id_to_y[task_id] = y + + # split into transformer/voter and decider data + transformer_voter_data_idx, decider_idx = self._bifurcate_decider_idxs( + range(len(X)), transformer_voter_decider_split + ) + self._append_decider_idx(task_id, decider_idx) + + # add new transformer and train voters and decider + # from new transformer to previous tasks + if num_transformers > 0: + self._update_transformer( + X, + y, + classes = classes, + transformer_data_proportion=transformer_voter_decider_split[0] + if transformer_voter_decider_split + else 1, + transformer_voter_data_idx=transformer_voter_data_idx, + transformer_id=task_id, + num_transformers=num_transformers, + transformer_class=transformer_class, + transformer_kwargs=transformer_kwargs, + backward_task_ids=backward_task_ids, + ) + + return self + + def predict(self, X, task_id, transformer_ids=None): """ predicts labels under task_id for each example in input data X diff --git a/proglearn/transformers.py b/proglearn/transformers.py index f737486a3c..d10c64fcbd 100644 --- a/proglearn/transformers.py +++ b/proglearn/transformers.py @@ -105,6 +105,8 @@ def fit(self, X, y): self.fitted_ = True return self + + def transform(self, X): """ @@ -148,6 +150,12 @@ class TreeClassificationTransformer(BaseTransformer): def __init__(self, kwargs={}): self.kwargs = kwargs + def partial_fit(self, X, y, classes) + X, y = check_X_y(X, y) + + self.transformer_ = DecisionTreeClassifier(**self.kwargs).partial_fit(X, y, classes) + return self + def fit(self, X, y): """ Fits the transformer to data X with labels y. From 2a6ac4b7698ae616908fee11aa0af2fc21a89484 Mon Sep 17 00:00:00 2001 From: KevinWang905 <46271360+KevinWang905@users.noreply.github.com> Date: Mon, 22 Nov 2021 09:54:03 -0500 Subject: [PATCH 02/16] Add files via upload --- proglearn/deciders.py | 7 ++++ proglearn/forest.py | 4 ++- proglearn/progressive_learner.py | 57 ++++++++++++++++++++++++++++---- proglearn/transformers.py | 5 ++- 4 files changed, 63 insertions(+), 10 deletions(-) diff --git a/proglearn/deciders.py b/proglearn/deciders.py index 3d5c3412bb..0945256236 100755 --- a/proglearn/deciders.py +++ b/proglearn/deciders.py @@ -37,6 +37,8 @@ class SimpleArgmaxAverage(BaseClassificationDecider): def __init__(self, classes=[]): self.classes = classes + print("initialized decider") + print(self.classes) def fit( self, @@ -78,6 +80,9 @@ def fit( ValueError When the labels have not been provided and the classes are empty. """ + print("deciders.fit") + print(y) + print(self.classes) if not isinstance(self.classes, (list, np.ndarray)): if len(y) == 0: raise ValueError( @@ -170,4 +175,6 @@ def predict(self, X, transformer_ids=None): When the model is not fitted. """ vote_overall = self.predict_proba(X, transformer_ids=transformer_ids) + print(vote_overall) + print(self.classes) return self.classes[np.argmax(vote_overall, axis=1)] diff --git a/proglearn/forest.py b/proglearn/forest.py index 4601dd41a3..1c6e95f15a 100644 --- a/proglearn/forest.py +++ b/proglearn/forest.py @@ -650,8 +650,10 @@ def fit(self, X, y, classes): self : UncertaintyForest The object itself. """ + print("classes in forest.UncertaintyForestStream") + print(classes) - return super().add_task(X, y, task_id=0, classes = classes) + return super().update_task(X, y, classes = classes) def predict_proba(self, X): """ diff --git a/proglearn/progressive_learner.py b/proglearn/progressive_learner.py index 7183721642..3b994a3ba4 100755 --- a/proglearn/progressive_learner.py +++ b/proglearn/progressive_learner.py @@ -226,7 +226,6 @@ def _update_transformer( self, X, y, - classes = None, transformer_data_proportion, transformer_voter_data_idx, transformer_id, @@ -234,7 +233,9 @@ def _update_transformer( transformer_class, transformer_kwargs, backward_task_ids, + classes = None ): + if transformer_id is None: transformer_id = len(self.get_transformer_ids()) @@ -273,7 +274,7 @@ def _update_transformer( transformer_data_idx=transformer_data_idx, transformer_class=transformer_class, transformer_kwargs=transformer_kwargs, - classes = None + classes = classes ) voter_data_idx = np.delete(transformer_voter_data_idx, transformer_data_idx) self._append_voter_data_idx( @@ -307,6 +308,7 @@ def set_updated_transformer( if transformer_id is None: transformer_id = len(self.get_transformer_ids()) + X = ( self.transformer_id_to_X[transformer_id] if transformer_id in list(self.transformer_id_to_X.keys()) @@ -351,12 +353,13 @@ def set_updated_transformer( # Fit transformer and new voter if y is None: self._replace_transformer( - transformer_id, transformer_class(**transformer_kwargs).partial_fit(X, classes) + transformer_id, transformer_class(**transformer_kwargs)._partial_fit(X, classes = classes) ) else: # Type check y self._append_transformer( - transformer_id, transformer_class(**transformer_kwargs).partial_fit(X, y, classes) + transformer_id, transformer_class(**transformer_kwargs)._partial_fit(X, y, classes = classes) + #transformer_id, transformer_class(**transformer_kwargs).fit(X, y) ) def _add_transformer( @@ -569,6 +572,8 @@ def set_voter( def set_decider( self, task_id, transformer_ids, decider_class=None, decider_kwargs=None ): + + print("decider kwargs at start of set_decider: "+str(decider_kwargs)) if decider_class is None: if task_id in list(self.task_id_to_decider_class.keys()): decider_class = self.task_id_to_decider_class[task_id] @@ -600,9 +605,15 @@ def set_decider( } X, y = self.task_id_to_X[task_id], self.task_id_to_y[task_id] + print("decider kwargs at middle of set_decider: "+str(decider_kwargs)) + print("decider_class(decider kwargs) at middle of set_decider: "+str(decider_class(**decider_kwargs))) self.task_id_to_decider[task_id] = decider_class(**decider_kwargs) decider_idx = self.task_id_to_decider_idx[task_id] + + print("task_to_id_to_decider[task_id] at middle of set_decider: "+str(self.task_id_to_decider[task_id])) + print() + self.task_id_to_decider[task_id].fit( X[decider_idx], y[decider_idx], @@ -613,11 +624,11 @@ def set_decider( self.task_id_to_decider_class[task_id] = decider_class self.task_id_to_decider_kwargs[task_id] = decider_kwargs - def update_transformer( + def update_transformer( self, X, y, - classese = None, + classes = None, transformer_data_proportion=1.0, transformer_voter_data_idx=None, transformer_id=None, @@ -626,6 +637,7 @@ def update_transformer( transformer_kwargs=None, backward_task_ids=None, ): + return self._update_transformer( X, y, @@ -728,6 +740,7 @@ def add_task( backward_task_ids=None, forward_transformer_ids=None, ): + print("decider kwards at start of add_task: "+str(decider_kwargs)) """ Adds a task to the progressive learner. Optionally trains one or more transformer from the input data (if num_transformers > 0), adds voters @@ -882,6 +895,7 @@ def update_task( backward_task_ids=None, forward_transformer_ids=None, ): + print("decider kwargs at start of update_task: "+str(decider_kwargs)) """ Adds a task to the progressive learner. Optionally trains one or more transformer from the input data (if num_transformers > 0), adds voters @@ -958,6 +972,8 @@ def update_task( self : ProgressiveLearner The object itself. """ + + if task_id is None: task_id = max( len(self.get_transformer_ids()), len(self.get_task_ids()) @@ -989,6 +1005,34 @@ def update_task( transformer_kwargs=transformer_kwargs, backward_task_ids=backward_task_ids, ) + + # train voters and decider from previous (and current) transformers to new task + for transformer_id in ( + forward_transformer_ids + if forward_transformer_ids + else self.get_transformer_ids() + ): + self.set_voter( + transformer_id=transformer_id, + task_id=task_id, + voter_class=voter_class, + voter_kwargs=voter_kwargs, + ) + + # train decider of new task + if forward_transformer_ids: + if num_transformers == 0: + transformer_ids = forward_transformer_ids + else: + transformer_ids = np.concatenate([forward_transformer_ids, task_id]) + else: + transformer_ids = self.get_transformer_ids() + self.set_decider( + task_id=task_id, + transformer_ids=transformer_ids, + decider_class=decider_class, + decider_kwargs=decider_kwargs, + ) return self @@ -1016,6 +1060,7 @@ def predict(self, X, task_id, transformer_ids=None): y_hat : ndarray of shape [n_samples] predicted class label per example """ + if self.task_id_to_decider == {}: raise NotFittedError diff --git a/proglearn/transformers.py b/proglearn/transformers.py index d10c64fcbd..70c08b063a 100644 --- a/proglearn/transformers.py +++ b/proglearn/transformers.py @@ -150,10 +150,9 @@ class TreeClassificationTransformer(BaseTransformer): def __init__(self, kwargs={}): self.kwargs = kwargs - def partial_fit(self, X, y, classes) + def _partial_fit(self, X, y, classes=None): X, y = check_X_y(X, y) - - self.transformer_ = DecisionTreeClassifier(**self.kwargs).partial_fit(X, y, classes) + self.transformer_ = DecisionTreeClassifier(**self.kwargs).partial_fit(X, y, classes = classes) return self def fit(self, X, y): From 94958b731c68fca0314a632425d7bcac6b140ea1 Mon Sep 17 00:00:00 2001 From: KevinWang905 <46271360+KevinWang905@users.noreply.github.com> Date: Mon, 29 Nov 2021 11:44:49 -0500 Subject: [PATCH 03/16] Separated add task and update task --- proglearn/progressive_learner.py | 84 +++++++++++++++++--------------- 1 file changed, 46 insertions(+), 38 deletions(-) diff --git a/proglearn/progressive_learner.py b/proglearn/progressive_learner.py index 3b994a3ba4..61b48ac91d 100755 --- a/proglearn/progressive_learner.py +++ b/proglearn/progressive_learner.py @@ -233,7 +233,7 @@ def _update_transformer( transformer_class, transformer_kwargs, backward_task_ids, - classes = None + inputclasses = None ): if transformer_id is None: @@ -274,7 +274,7 @@ def _update_transformer( transformer_data_idx=transformer_data_idx, transformer_class=transformer_class, transformer_kwargs=transformer_kwargs, - classes = classes + inputclasses = inputclasses ) voter_data_idx = np.delete(transformer_voter_data_idx, transformer_data_idx) self._append_voter_data_idx( @@ -302,7 +302,7 @@ def set_updated_transformer( transformer_data_idx=None, transformer_class=None, transformer_kwargs=None, - classes = None + inputclasses = None ): if transformer_id is None: @@ -353,12 +353,12 @@ def set_updated_transformer( # Fit transformer and new voter if y is None: self._replace_transformer( - transformer_id, transformer_class(**transformer_kwargs)._partial_fit(X, classes = classes) + transformer_id, transformer_class(**transformer_kwargs)._partial_fit(X, inputclasses = inputclasses) ) else: # Type check y self._append_transformer( - transformer_id, transformer_class(**transformer_kwargs)._partial_fit(X, y, classes = classes) + transformer_id, transformer_class(**transformer_kwargs)._partial_fit(X, y, inputclasses = inputclasses) #transformer_id, transformer_class(**transformer_kwargs).fit(X, y) ) @@ -628,7 +628,7 @@ def update_transformer( self, X, y, - classes = None, + inputclasses = None, transformer_data_proportion=1.0, transformer_voter_data_idx=None, transformer_id=None, @@ -641,7 +641,7 @@ def update_transformer( return self._update_transformer( X, y, - classes = classes, + inputclasses = inputclasses, transformer_data_proportion=transformer_data_proportion, transformer_voter_data_idx=transformer_voter_data_idx, transformer_id=transformer_id, @@ -882,7 +882,7 @@ def update_task( self, X, y, - classes = None, + inputclasses = None, task_id=None, transformer_voter_decider_split=[0.67, 0.33, 0], num_transformers=1, @@ -975,8 +975,8 @@ def update_task( if task_id is None: - task_id = max( - len(self.get_transformer_ids()), len(self.get_task_ids()) + print("Error: No Task ID inputted") + return self ) # come up with something that has fewer collisions self.task_id_to_X[task_id] = X @@ -990,11 +990,15 @@ def update_task( # add new transformer and train voters and decider # from new transformer to previous tasks + print("length task ids: "+str(len(self.get_task_ids()))) + print("task_id: "+str(task_id)) + + print("updating transformer") if num_transformers > 0: self._update_transformer( X, y, - classes = classes, + inputclasses = inputclasses, transformer_data_proportion=transformer_voter_decider_split[0] if transformer_voter_decider_split else 1, @@ -1006,33 +1010,37 @@ def update_task( backward_task_ids=backward_task_ids, ) - # train voters and decider from previous (and current) transformers to new task - for transformer_id in ( - forward_transformer_ids - if forward_transformer_ids - else self.get_transformer_ids() - ): - self.set_voter( - transformer_id=transformer_id, - task_id=task_id, - voter_class=voter_class, - voter_kwargs=voter_kwargs, - ) - - # train decider of new task - if forward_transformer_ids: - if num_transformers == 0: - transformer_ids = forward_transformer_ids - else: - transformer_ids = np.concatenate([forward_transformer_ids, task_id]) - else: - transformer_ids = self.get_transformer_ids() - self.set_decider( - task_id=task_id, - transformer_ids=transformer_ids, - decider_class=decider_class, - decider_kwargs=decider_kwargs, - ) + # The following lines are commented out, needs to be tested if updating deciders and voters after updating task + # improves accuracy + + + # # train voters and decider from previous (and current) transformers to new task + # for transformer_id in ( + # forward_transformer_ids + # if forward_transformer_ids + # else self.get_transformer_ids() + # ): + # self.set_voter( + # transformer_id=transformer_id, + # task_id=task_id, + # voter_class=voter_class, + # voter_kwargs=voter_kwargs, + # ) + + # # train decider of new task + # if forward_transformer_ids: + # if num_transformers == 0: + # transformer_ids = forward_transformer_ids + # else: + # transformer_ids = np.concatenate([forward_transformer_ids, task_id]) + # else: + # transformer_ids = self.get_transformer_ids() + # self.set_decider( + # task_id=task_id, + # transformer_ids=transformer_ids, + # decider_class=decider_class, + # decider_kwargs=decider_kwargs, + # ) return self From 0f76927f1a034e60b518dfb7b1a0b2aad3a76386 Mon Sep 17 00:00:00 2001 From: KevinWang905 <46271360+KevinWang905@users.noreply.github.com> Date: Fri, 3 Dec 2021 10:42:56 -0500 Subject: [PATCH 04/16] Added update_task to LifelongClassificationForest, removed LifelongClassificationForestStream --- proglearn/forest.py | 383 ++++++++++++++++++++++++++++---------------- 1 file changed, 246 insertions(+), 137 deletions(-) diff --git a/proglearn/forest.py b/proglearn/forest.py index 1c6e95f15a..db2e5f9ef9 100644 --- a/proglearn/forest.py +++ b/proglearn/forest.py @@ -289,68 +289,6 @@ def add_transformer( transformer_id=transformer_id, num_transformers=n_estimators, ) - - def predict_proba(self, X, task_id): - """ - estimates class posteriors under task_id for each example in input data X. - - Parameters - ---------- - X : ndarray - The input data matrix. - - task_id: - The id corresponding to the task being mapped to. - - Returns - ------- - y_proba_hat : ndarray of shape [n_samples, n_classes] - posteriors per example - """ - return super().predict_proba(check_array(X), task_id) - - def predict(self, X, task_id): - """ - predicts class labels under task_id for each example in input data X. - - Parameters - ---------- - X : ndarray - The input data matrix. - - task_id : obj - The id corresponding to the task being mapped to. - - Returns - ------- - y_hat : ndarray of shape [n_samples] - predicted class label per example - """ - return super().predict(check_array(X), task_id) - -class LifelongClassificationForestStream(ClassificationProgressiveLearner): - - def __init__( - self, - default_n_estimators=100, - default_tree_construction_proportion=0.67, - default_kappa=np.inf, - default_max_depth=30, - ): - super().__init__( - default_transformer_class=TreeClassificationTransformer, - default_transformer_kwargs={}, - default_voter_class=TreeClassificationVoter, - default_voter_kwargs={"kappa": default_kappa}, - default_decider_class=SimpleArgmaxAverage, - default_decider_kwargs={}, - ) - - self.default_n_estimators = default_n_estimators - self.default_tree_construction_proportion = default_tree_construction_proportion - self.default_kappa = default_kappa - self.default_max_depth = default_max_depth - def update_task( self, X, @@ -360,7 +298,7 @@ def update_task( tree_construction_proportion="default", kappa="default", max_depth="default", - classes = None + inputclasses = None ): """ adds a task with id task_id, max tree depth max_depth, given input data matrix X @@ -411,10 +349,13 @@ def update_task( max_depth = self.default_max_depth X, y = check_X_y(X, y) + + print("unique y values in update_task: "+str(np.unique(y))) + return super().update_task( X, y, - classes = classes, + inputclasses = inputclasses, task_id=task_id, transformer_voter_decider_split=[ tree_construction_proportion, @@ -434,7 +375,7 @@ def update_transformer( self, X, y, - classes = None, + inputclasses = None, transformer_id=None, n_estimators="default", max_depth="default", @@ -449,12 +390,11 @@ def update_transformer( return super().update_transformer( X, y, - classes = classes, + inputclasses = inputclasses, transformer_kwargs={"kwargs": {"max_depth": max_depth}}, transformer_id=transformer_id, num_transformers=n_estimators, ) - def predict_proba(self, X, task_id): """ estimates class posteriors under task_id for each example in input data X. @@ -493,6 +433,174 @@ def predict(self, X, task_id): """ return super().predict(check_array(X), task_id) +# class LifelongClassificationForestStream(ClassificationProgressiveLearner): + +# def __init__( +# self, +# default_n_estimators=100, +# default_tree_construction_proportion=0.67, +# default_kappa=np.inf, +# default_max_depth=30, +# ): +# super().__init__( +# default_transformer_class=TreeClassificationTransformer, +# default_transformer_kwargs={}, +# default_voter_class=TreeClassificationVoter, +# default_voter_kwargs={"kappa": default_kappa}, +# default_decider_class=SimpleArgmaxAverage, +# default_decider_kwargs={}, +# ) + +# self.default_n_estimators = default_n_estimators +# self.default_tree_construction_proportion = default_tree_construction_proportion +# self.default_kappa = default_kappa +# self.default_max_depth = default_max_depth + +# def update_task( +# self, +# X, +# y, +# task_id=None, +# n_estimators="default", +# tree_construction_proportion="default", +# kappa="default", +# max_depth="default", +# inputclasses = None +# ): +# """ +# adds a task with id task_id, max tree depth max_depth, given input data matrix X +# and output data matrix y, to the Lifelong Classification Forest. Also splits +# data for training and voting based on tree_construction_proportion and uses the +# value of kappa to determine whether the learner will have +# finite sample correction. + +# Parameters +# ---------- +# X : ndarray +# The input data matrix. + +# y : ndarray +# The output (response) data matrix. + +# task_id : obj, default=None +# The id corresponding to the task being added. + +# n_estimators : int or str, default='default' +# The number of trees used for the given task. + +# tree_construction_proportion : int or str, default='default' +# The proportions of the input data set aside to train each decision +# tree. The remainder of the data is used to fill in voting posteriors. +# The default is used if 'default' is provided. + +# kappa : float or str, default='default' +# The coefficient for finite sample correction. +# The default is used if 'default' is provided. + +# max_depth : int or str, default='default' +# The maximum depth of a tree in the Lifelong Classification Forest. +# The default is used if 'default' is provided. + +# Returns +# ------- +# self : LifelongClassificationForest +# The object itself. +# """ +# if n_estimators == "default": +# n_estimators = self.default_n_estimators +# if tree_construction_proportion == "default": +# tree_construction_proportion = self.default_tree_construction_proportion +# if kappa == "default": +# kappa = self.default_kappa +# if max_depth == "default": +# max_depth = self.default_max_depth + +# X, y = check_X_y(X, y) + +# print("unique y values in update_task: "+str(np.unique(y))) + +# return super().update_task( +# X, +# y, +# inputclasses = inputclasses, +# task_id=task_id, +# transformer_voter_decider_split=[ +# tree_construction_proportion, +# 1 - tree_construction_proportion, +# 0, +# ], +# num_transformers=n_estimators, +# transformer_kwargs={"kwargs": {"max_depth": max_depth}}, +# voter_kwargs={ +# "classes": np.unique(y), +# "kappa": kappa, +# }, +# decider_kwargs={"classes": np.unique(y)}, +# ) + +# def update_transformer( +# self, +# X, +# y, +# inputclasses = None, +# transformer_id=None, +# n_estimators="default", +# max_depth="default", +# ): + +# if n_estimators == "default": +# n_estimators = self.default_n_estimators +# if max_depth == "default": +# max_depth = self.default_max_depth + +# X, y = check_X_y(X, y) +# return super().update_transformer( +# X, +# y, +# inputclasses = inputclasses, +# transformer_kwargs={"kwargs": {"max_depth": max_depth}}, +# transformer_id=transformer_id, +# num_transformers=n_estimators, +# ) + +# def predict_proba(self, X, task_id): +# """ +# estimates class posteriors under task_id for each example in input data X. + +# Parameters +# ---------- +# X : ndarray +# The input data matrix. + +# task_id: +# The id corresponding to the task being mapped to. + +# Returns +# ------- +# y_proba_hat : ndarray of shape [n_samples, n_classes] +# posteriors per example +# """ +# return super().predict_proba(check_array(X), task_id) + +# def predict(self, X, task_id): +# """ +# predicts class labels under task_id for each example in input data X. + +# Parameters +# ---------- +# X : ndarray +# The input data matrix. + +# task_id : obj +# The id corresponding to the task being mapped to. + +# Returns +# ------- +# y_hat : ndarray of shape [n_samples] +# predicted class label per example +# """ +# return super().predict(check_array(X), task_id) + class UncertaintyForest(LifelongClassificationForest): """ @@ -616,73 +724,74 @@ def predict(self, X): """ return super().predict(X, 0) -class UncertaintyForestStream(LifelongClassificationForest): - - - def __init__( - self, - n_estimators=100, - kappa=np.inf, - max_depth=30, - tree_construction_proportion=0.67, - ): - super().__init__( - default_n_estimators=n_estimators, - default_tree_construction_proportion=tree_construction_proportion, - default_kappa=kappa, - default_max_depth=max_depth, - ) - - def fit(self, X, y, classes): - """ - fits forest to data X with labels y - - Parameters - ---------- - X : array of shape [n_samples, n_features] - The data that will be trained on - - y : array of shape [n_samples] - The label for cluster membership of the given data - - Returns - ------- - self : UncertaintyForest - The object itself. - """ - print("classes in forest.UncertaintyForestStream") - print(classes) - - return super().update_task(X, y, classes = classes) - - def predict_proba(self, X): - """ - estimates class posteriors for each example in input data X. - - Parameters - ---------- - X : array of shape [n_samples, n_features] - The data whose posteriors we are estimating. - - Returns - ------- - y_proba_hat : ndarray of shape [n_samples, n_classes] - posteriors per example - """ - return super().predict_proba(X, 0) - - def predict(self, X): - """ - predicts class labels for each example in input data X. - - Parameters - ---------- - X : array of shape [n_samples, n_features] - The data on which we are performing inference. - - Returns - ------- - y_hat : ndarray of shape [n_samples] - predicted class label per example - """ - return super().predict(X, 0) +# class UncertaintyForestStream(LifelongClassificationForestStream): + + +# def __init__( +# self, +# n_estimators=100, +# kappa=np.inf, +# max_depth=30, +# tree_construction_proportion=0.67, +# ): +# super().__init__( +# default_n_estimators=n_estimators, +# default_tree_construction_proportion=tree_construction_proportion, +# default_kappa=kappa, +# default_max_depth=max_depth, +# ) + +# def fit(self, X, y, inputclasses): +# """ +# fits forest to data X with labels y + +# Parameters +# ---------- +# X : array of shape [n_samples, n_features] +# The data that will be trained on + +# y : array of shape [n_samples] +# The label for cluster membership of the given data + +# Returns +# ------- +# self : UncertaintyForest +# The object itself. +# """ +# print("classes in forest.UncertaintyForestStream") +# print(inputclasses) +# print("unique y values in UF.fit: "+str(np.unique(y))) + +# return super().update_task(X, y, inputclasses = inputclasses) + +# def predict_proba(self, X): +# """ +# estimates class posteriors for each example in input data X. + +# Parameters +# ---------- +# X : array of shape [n_samples, n_features] +# The data whose posteriors we are estimating. + +# Returns +# ------- +# y_proba_hat : ndarray of shape [n_samples, n_classes] +# posteriors per example +# """ +# return super().predict_proba(X, 0) + +# def predict(self, X): +# """ +# predicts class labels for each example in input data X. + +# Parameters +# ---------- +# X : array of shape [n_samples, n_features] +# The data on which we are performing inference. + +# Returns +# ------- +# y_hat : ndarray of shape [n_samples] +# predicted class label per example +# """ +# return super().predict(X, 0) From b2e67fd1742b429b9fa9df00b87a479a4264ae66 Mon Sep 17 00:00:00 2001 From: KevinWang905 <46271360+KevinWang905@users.noreply.github.com> Date: Sun, 5 Dec 2021 19:46:12 -0500 Subject: [PATCH 05/16] Various Fixes --- proglearn/forest.py | 2 + proglearn/progressive_learner.py | 112 ++++++++++++++++++++++++------- proglearn/transformers.py | 20 +++++- 3 files changed, 106 insertions(+), 28 deletions(-) diff --git a/proglearn/forest.py b/proglearn/forest.py index db2e5f9ef9..e496bb6a69 100644 --- a/proglearn/forest.py +++ b/proglearn/forest.py @@ -381,6 +381,8 @@ def update_transformer( max_depth="default", ): + print("update transformer in forest.py is being called!") + if n_estimators == "default": n_estimators = self.default_n_estimators if max_depth == "default": diff --git a/proglearn/progressive_learner.py b/proglearn/progressive_learner.py index 61b48ac91d..d2cf027d3a 100755 --- a/proglearn/progressive_learner.py +++ b/proglearn/progressive_learner.py @@ -168,12 +168,15 @@ def get_task_ids(self): return np.array(list(self.task_id_to_decider.keys())) def _append_transformer(self, transformer_id, transformer): + #print([transformer]) + if transformer_id in self.get_transformer_ids(): self.transformer_id_to_transformers[transformer_id].append(transformer) else: self.transformer_id_to_transformers[transformer_id] = [transformer] def _replace_transformer(self, transformer_id, transformer): + #print([transformer]) self.transformer_id_to_transformers[transformer_id] = [transformer] def _append_voter(self, transformer_id, task_id, voter): @@ -235,26 +238,20 @@ def _update_transformer( backward_task_ids, inputclasses = None ): - - if transformer_id is None: - transformer_id = len(self.get_transformer_ids()) - - backward_task_ids = ( - backward_task_ids if backward_task_ids is not None else self.get_task_ids() - ) - transformer_voter_data_idx = ( - range(len(X)) - if transformer_voter_data_idx is None - else transformer_voter_data_idx - ) + print("transformer id at _update_transformer = " + str(transformer_id)) + print("testtesttest") + counter = 0 if transformer_id not in list(self.task_id_to_X.keys()): self.transformer_id_to_X[transformer_id] = X if transformer_id not in list(self.task_id_to_y.keys()): self.transformer_id_to_y[transformer_id] = y - # train new transformers - for transformer_num in range(num_transformers): + for transformer in self.transformer_id_to_transformers[transformer_id]: + # print(transformer.transformer_) + # print(inputclasses) + # print(transformer) + if X is not None: n = len(X) elif y is not None: @@ -269,21 +266,79 @@ def _update_transformer( ) else: transformer_data_idx = None - self.set_updated_transformer( - transformer_id=transformer_id, - transformer_data_idx=transformer_data_idx, - transformer_class=transformer_class, - transformer_kwargs=transformer_kwargs, - inputclasses = inputclasses + + X2 = ( + self.transformer_id_to_X[transformer_id] + if transformer_id in list(self.transformer_id_to_X.keys()) + else self.task_id_to_X[transformer_id] + ) + y2 = ( + self.transformer_id_to_y[transformer_id] + if transformer_id in list(self.transformer_id_to_y.keys()) + else self.task_id_to_y[transformer_id] ) + if transformer_data_idx is not None: + X2, y2 = X2[transformer_data_idx], y2[transformer_data_idx] + + transformer.transformer_ = transformer._partial_fit(transformer.transformer_, X2,y2, inputclasses) + voter_data_idx = np.delete(transformer_voter_data_idx, transformer_data_idx) self._append_voter_data_idx( task_id=transformer_id, - bag_id=transformer_num, + bag_id=counter, voter_data_idx=voter_data_idx, ) + counter = counter + 1 + + print("testtesttest") + # if transformer_id is None: + # transformer_id = len(self.get_transformer_ids()) - # train voters and deciders from new transformer to previous tasks + backward_task_ids = ( + backward_task_ids if backward_task_ids is not None else self.get_task_ids() + ) + transformer_voter_data_idx = ( + range(len(X)) + if transformer_voter_data_idx is None + else transformer_voter_data_idx + ) + + # if transformer_id not in list(self.task_id_to_X.keys()): + # self.transformer_id_to_X[transformer_id] = X + # if transformer_id not in list(self.task_id_to_y.keys()): + # self.transformer_id_to_y[transformer_id] = y + + # # train new transformers + # for transformer_num in range(num_transformers): + # if X is not None: + # n = len(X) + # elif y is not None: + # n = len(y) + # else: + # n = None + # if n is not None: + # transformer_data_idx = np.random.choice( + # transformer_voter_data_idx, + # int(transformer_data_proportion * n), + # replace=False, + # ) + # else: + # transformer_data_idx = None + # self.set_updated_transformer( + # transformer_id=transformer_id, + # transformer_data_idx=transformer_data_idx, + # transformer_class=transformer_class, + # transformer_kwargs=transformer_kwargs, + # inputclasses = inputclasses + # ) + # voter_data_idx = np.delete(transformer_voter_data_idx, transformer_data_idx) + # self._append_voter_data_idx( + # task_id=transformer_id, + # bag_id=transformer_num, + # voter_data_idx=voter_data_idx, + # ) + + #train voters and deciders from new transformer to previous tasks for existing_task_id in np.intersect1d(backward_task_ids, self.get_task_ids()): self.set_voter(transformer_id=transformer_id, task_id=existing_task_id) self.set_decider( @@ -357,7 +412,8 @@ def set_updated_transformer( ) else: # Type check y - self._append_transformer( + print(transformer_id) + self._replace_transformer( transformer_id, transformer_class(**transformer_kwargs)._partial_fit(X, y, inputclasses = inputclasses) #transformer_id, transformer_class(**transformer_kwargs).fit(X, y) ) @@ -549,7 +605,10 @@ def set_voter( transformers = self.transformer_id_to_transformers[transformer_id] else: transformers = [self.transformer_id_to_transformers[transformer_id][bag_id]] + + print("trasnformsers length = " + str(len(transformers))) for transformer_num, transformer in enumerate(transformers): + #print(transformer_num) if transformer_id == task_id: voter_data_idx = self.task_id_to_bag_id_to_voter_data_idx[task_id][ transformer_num @@ -741,6 +800,7 @@ def add_task( forward_transformer_ids=None, ): print("decider kwards at start of add_task: "+str(decider_kwargs)) + print("num_transformers at add_task = "+str(num_transformers)) """ Adds a task to the progressive learner. Optionally trains one or more transformer from the input data (if num_transformers > 0), adds voters @@ -834,6 +894,7 @@ def add_task( # add new transformer and train voters and decider # from new transformer to previous tasks if num_transformers > 0: + print("num_transformers = "+str(num_transformers)) self._add_transformer( X, y, @@ -973,11 +1034,10 @@ def update_task( The object itself. """ - if task_id is None: print("Error: No Task ID inputted") return self - ) # come up with something that has fewer collisions + # come up with something that has fewer collisions self.task_id_to_X[task_id] = X self.task_id_to_y[task_id] = y @@ -994,6 +1054,7 @@ def update_task( print("task_id: "+str(task_id)) print("updating transformer") + print("num_transformers = "+str(num_transformers)) if num_transformers > 0: self._update_transformer( X, @@ -1073,6 +1134,7 @@ def predict(self, X, task_id, transformer_ids=None): raise NotFittedError decider = self.task_id_to_decider[task_id] + return decider.predict(X, transformer_ids=transformer_ids) diff --git a/proglearn/transformers.py b/proglearn/transformers.py index 70c08b063a..e9e223b6e6 100644 --- a/proglearn/transformers.py +++ b/proglearn/transformers.py @@ -150,10 +150,23 @@ class TreeClassificationTransformer(BaseTransformer): def __init__(self, kwargs={}): self.kwargs = kwargs - def _partial_fit(self, X, y, classes=None): + + def _partial_fit(transformer,test, X, y, inputclasses): + # print("in partial fit") + # print(transformer) + # print(test) + X, y = check_X_y(X, y) - self.transformer_ = DecisionTreeClassifier(**self.kwargs).partial_fit(X, y, classes = classes) - return self + test.partial_fit(X, y, classes = [0,1]) # set to [0,1] for testing + #print(self) + #print("leaving partial fit") + return test + + # def _partial_fit(up_transformer, X, y, inputclasses=None): + # X, y = check_X_y(X, y) + # #self.transformer_ = DecisionTreeClassifier(**self.kwargs).partial_fit(X, y, classes = inputclasses) + # up_transformer.partial_fit(X, y, classes = inputclasses) + # return up_transformer def fit(self, X, y): """ @@ -173,6 +186,7 @@ def fit(self, X, y): """ X, y = check_X_y(X, y) self.transformer_ = DecisionTreeClassifier(**self.kwargs).fit(X, y) + print(self.transformer_) return self def transform(self, X): From 50ba01244fbe6124dcafa78c85cadc41ebfe0a4d Mon Sep 17 00:00:00 2001 From: KevinWang905 <46271360+KevinWang905@users.noreply.github.com> Date: Thu, 9 Dec 2021 14:59:39 -0500 Subject: [PATCH 06/16] Added Documentation --- proglearn/forest.py | 242 +------------------------------ proglearn/progressive_learner.py | 52 ++----- proglearn/transformers.py | 23 +++ 3 files changed, 35 insertions(+), 282 deletions(-) diff --git a/proglearn/forest.py b/proglearn/forest.py index e496bb6a69..00eea3fd5e 100644 --- a/proglearn/forest.py +++ b/proglearn/forest.py @@ -301,7 +301,7 @@ def update_task( inputclasses = None ): """ - adds a task with id task_id, max tree depth max_depth, given input data matrix X + updates a task with id task_id, max tree depth max_depth, given input data matrix X and output data matrix y, to the Lifelong Classification Forest. Also splits data for training and voting based on tree_construction_proportion and uses the value of kappa to determine whether the learner will have @@ -435,174 +435,6 @@ def predict(self, X, task_id): """ return super().predict(check_array(X), task_id) -# class LifelongClassificationForestStream(ClassificationProgressiveLearner): - -# def __init__( -# self, -# default_n_estimators=100, -# default_tree_construction_proportion=0.67, -# default_kappa=np.inf, -# default_max_depth=30, -# ): -# super().__init__( -# default_transformer_class=TreeClassificationTransformer, -# default_transformer_kwargs={}, -# default_voter_class=TreeClassificationVoter, -# default_voter_kwargs={"kappa": default_kappa}, -# default_decider_class=SimpleArgmaxAverage, -# default_decider_kwargs={}, -# ) - -# self.default_n_estimators = default_n_estimators -# self.default_tree_construction_proportion = default_tree_construction_proportion -# self.default_kappa = default_kappa -# self.default_max_depth = default_max_depth - -# def update_task( -# self, -# X, -# y, -# task_id=None, -# n_estimators="default", -# tree_construction_proportion="default", -# kappa="default", -# max_depth="default", -# inputclasses = None -# ): -# """ -# adds a task with id task_id, max tree depth max_depth, given input data matrix X -# and output data matrix y, to the Lifelong Classification Forest. Also splits -# data for training and voting based on tree_construction_proportion and uses the -# value of kappa to determine whether the learner will have -# finite sample correction. - -# Parameters -# ---------- -# X : ndarray -# The input data matrix. - -# y : ndarray -# The output (response) data matrix. - -# task_id : obj, default=None -# The id corresponding to the task being added. - -# n_estimators : int or str, default='default' -# The number of trees used for the given task. - -# tree_construction_proportion : int or str, default='default' -# The proportions of the input data set aside to train each decision -# tree. The remainder of the data is used to fill in voting posteriors. -# The default is used if 'default' is provided. - -# kappa : float or str, default='default' -# The coefficient for finite sample correction. -# The default is used if 'default' is provided. - -# max_depth : int or str, default='default' -# The maximum depth of a tree in the Lifelong Classification Forest. -# The default is used if 'default' is provided. - -# Returns -# ------- -# self : LifelongClassificationForest -# The object itself. -# """ -# if n_estimators == "default": -# n_estimators = self.default_n_estimators -# if tree_construction_proportion == "default": -# tree_construction_proportion = self.default_tree_construction_proportion -# if kappa == "default": -# kappa = self.default_kappa -# if max_depth == "default": -# max_depth = self.default_max_depth - -# X, y = check_X_y(X, y) - -# print("unique y values in update_task: "+str(np.unique(y))) - -# return super().update_task( -# X, -# y, -# inputclasses = inputclasses, -# task_id=task_id, -# transformer_voter_decider_split=[ -# tree_construction_proportion, -# 1 - tree_construction_proportion, -# 0, -# ], -# num_transformers=n_estimators, -# transformer_kwargs={"kwargs": {"max_depth": max_depth}}, -# voter_kwargs={ -# "classes": np.unique(y), -# "kappa": kappa, -# }, -# decider_kwargs={"classes": np.unique(y)}, -# ) - -# def update_transformer( -# self, -# X, -# y, -# inputclasses = None, -# transformer_id=None, -# n_estimators="default", -# max_depth="default", -# ): - -# if n_estimators == "default": -# n_estimators = self.default_n_estimators -# if max_depth == "default": -# max_depth = self.default_max_depth - -# X, y = check_X_y(X, y) -# return super().update_transformer( -# X, -# y, -# inputclasses = inputclasses, -# transformer_kwargs={"kwargs": {"max_depth": max_depth}}, -# transformer_id=transformer_id, -# num_transformers=n_estimators, -# ) - -# def predict_proba(self, X, task_id): -# """ -# estimates class posteriors under task_id for each example in input data X. - -# Parameters -# ---------- -# X : ndarray -# The input data matrix. - -# task_id: -# The id corresponding to the task being mapped to. - -# Returns -# ------- -# y_proba_hat : ndarray of shape [n_samples, n_classes] -# posteriors per example -# """ -# return super().predict_proba(check_array(X), task_id) - -# def predict(self, X, task_id): -# """ -# predicts class labels under task_id for each example in input data X. - -# Parameters -# ---------- -# X : ndarray -# The input data matrix. - -# task_id : obj -# The id corresponding to the task being mapped to. - -# Returns -# ------- -# y_hat : ndarray of shape [n_samples] -# predicted class label per example -# """ -# return super().predict(check_array(X), task_id) - class UncertaintyForest(LifelongClassificationForest): """ @@ -725,75 +557,3 @@ def predict(self, X): predicted class label per example """ return super().predict(X, 0) - -# class UncertaintyForestStream(LifelongClassificationForestStream): - - -# def __init__( -# self, -# n_estimators=100, -# kappa=np.inf, -# max_depth=30, -# tree_construction_proportion=0.67, -# ): -# super().__init__( -# default_n_estimators=n_estimators, -# default_tree_construction_proportion=tree_construction_proportion, -# default_kappa=kappa, -# default_max_depth=max_depth, -# ) - -# def fit(self, X, y, inputclasses): -# """ -# fits forest to data X with labels y - -# Parameters -# ---------- -# X : array of shape [n_samples, n_features] -# The data that will be trained on - -# y : array of shape [n_samples] -# The label for cluster membership of the given data - -# Returns -# ------- -# self : UncertaintyForest -# The object itself. -# """ -# print("classes in forest.UncertaintyForestStream") -# print(inputclasses) -# print("unique y values in UF.fit: "+str(np.unique(y))) - -# return super().update_task(X, y, inputclasses = inputclasses) - -# def predict_proba(self, X): -# """ -# estimates class posteriors for each example in input data X. - -# Parameters -# ---------- -# X : array of shape [n_samples, n_features] -# The data whose posteriors we are estimating. - -# Returns -# ------- -# y_proba_hat : ndarray of shape [n_samples, n_classes] -# posteriors per example -# """ -# return super().predict_proba(X, 0) - -# def predict(self, X): -# """ -# predicts class labels for each example in input data X. - -# Parameters -# ---------- -# X : array of shape [n_samples, n_features] -# The data on which we are performing inference. - -# Returns -# ------- -# y_hat : ndarray of shape [n_samples] -# predicted class label per example -# """ -# return super().predict(X, 0) diff --git a/proglearn/progressive_learner.py b/proglearn/progressive_learner.py index d2cf027d3a..36634bb2de 100755 --- a/proglearn/progressive_learner.py +++ b/proglearn/progressive_learner.py @@ -247,10 +247,12 @@ def _update_transformer( if transformer_id not in list(self.task_id_to_y.keys()): self.transformer_id_to_y[transformer_id] = y + # for all transformers referring to specified task + for transformer in self.transformer_id_to_transformers[transformer_id]: - # print(transformer.transformer_) - # print(inputclasses) - # print(transformer) + + + # Check data and assign data for training if X is not None: n = len(X) @@ -280,8 +282,12 @@ def _update_transformer( if transformer_data_idx is not None: X2, y2 = X2[transformer_data_idx], y2[transformer_data_idx] + # replace transformer with updated tranformer + transformer.transformer_ = transformer._partial_fit(transformer.transformer_, X2,y2, inputclasses) + # update voter to new transformer + voter_data_idx = np.delete(transformer_voter_data_idx, transformer_data_idx) self._append_voter_data_idx( task_id=transformer_id, @@ -291,9 +297,8 @@ def _update_transformer( counter = counter + 1 print("testtesttest") - # if transformer_id is None: - # transformer_id = len(self.get_transformer_ids()) + # Update backwards backward_task_ids = ( backward_task_ids if backward_task_ids is not None else self.get_task_ids() ) @@ -303,41 +308,6 @@ def _update_transformer( else transformer_voter_data_idx ) - # if transformer_id not in list(self.task_id_to_X.keys()): - # self.transformer_id_to_X[transformer_id] = X - # if transformer_id not in list(self.task_id_to_y.keys()): - # self.transformer_id_to_y[transformer_id] = y - - # # train new transformers - # for transformer_num in range(num_transformers): - # if X is not None: - # n = len(X) - # elif y is not None: - # n = len(y) - # else: - # n = None - # if n is not None: - # transformer_data_idx = np.random.choice( - # transformer_voter_data_idx, - # int(transformer_data_proportion * n), - # replace=False, - # ) - # else: - # transformer_data_idx = None - # self.set_updated_transformer( - # transformer_id=transformer_id, - # transformer_data_idx=transformer_data_idx, - # transformer_class=transformer_class, - # transformer_kwargs=transformer_kwargs, - # inputclasses = inputclasses - # ) - # voter_data_idx = np.delete(transformer_voter_data_idx, transformer_data_idx) - # self._append_voter_data_idx( - # task_id=transformer_id, - # bag_id=transformer_num, - # voter_data_idx=voter_data_idx, - # ) - #train voters and deciders from new transformer to previous tasks for existing_task_id in np.intersect1d(backward_task_ids, self.get_task_ids()): self.set_voter(transformer_id=transformer_id, task_id=existing_task_id) @@ -405,7 +375,7 @@ def set_updated_transformer( else: transformer_kwargs = self.default_transformer_kwargs - # Fit transformer and new voter + # Update transformer if y is None: self._replace_transformer( transformer_id, transformer_class(**transformer_kwargs)._partial_fit(X, inputclasses = inputclasses) diff --git a/proglearn/transformers.py b/proglearn/transformers.py index e9e223b6e6..a7148d21f8 100644 --- a/proglearn/transformers.py +++ b/proglearn/transformers.py @@ -152,6 +152,29 @@ def __init__(self, kwargs={}): def _partial_fit(transformer,test, X, y, inputclasses): + + """ + Updates the transformer to data X with labels y using partial fit. + + Parameters + ---------- + transformer : TreeClassificationTransformer + TreeClassificationTransformer + test : DecisionClassifierTree + DecisionClassifierTree + X : ndarray + Input data matrix. + y : ndarray + Output (i.e. response data matrix). + inputclasses : ndarray + Classes in X + + Returns + ------- + test : DecisionClassifierTree + The object itself. + """ + # print("in partial fit") # print(transformer) # print(test) From 33c1463ddf08caa7f5a18bc92cb0505e9c8ea14b Mon Sep 17 00:00:00 2001 From: KevinWang905 <46271360+KevinWang905@users.noreply.github.com> Date: Fri, 10 Dec 2021 14:23:33 -0500 Subject: [PATCH 07/16] Updated transformer call --- proglearn/progressive_learner.py | 131 ++++++++++++++++++------------- 1 file changed, 75 insertions(+), 56 deletions(-) diff --git a/proglearn/progressive_learner.py b/proglearn/progressive_learner.py index 36634bb2de..f3a62c3639 100755 --- a/proglearn/progressive_learner.py +++ b/proglearn/progressive_learner.py @@ -240,15 +240,15 @@ def _update_transformer( ): print("transformer id at _update_transformer = " + str(transformer_id)) print("testtesttest") - counter = 0 - + print('backward_task_ids: '+str(backward_task_ids)) + if transformer_id not in list(self.task_id_to_X.keys()): self.transformer_id_to_X[transformer_id] = X if transformer_id not in list(self.task_id_to_y.keys()): self.transformer_id_to_y[transformer_id] = y # for all transformers referring to specified task - + counter = 0 for transformer in self.transformer_id_to_transformers[transformer_id]: @@ -279,44 +279,50 @@ def _update_transformer( if transformer_id in list(self.transformer_id_to_y.keys()) else self.task_id_to_y[transformer_id] ) + + + if transformer_data_idx is not None: X2, y2 = X2[transformer_data_idx], y2[transformer_data_idx] # replace transformer with updated tranformer - - transformer.transformer_ = transformer._partial_fit(transformer.transformer_, X2,y2, inputclasses) + transformer.transformer_.partial_fit(X2,y2, inputclasses) # update voter to new transformer - voter_data_idx = np.delete(transformer_voter_data_idx, transformer_data_idx) - self._append_voter_data_idx( - task_id=transformer_id, - bag_id=counter, - voter_data_idx=voter_data_idx, - ) + # voter_data_idx = np.delete(transformer_voter_data_idx, transformer_data_idx) + # self._append_voter_data_idx( + # task_id=transformer_id, + # bag_id=counter, + # voter_data_idx=voter_data_idx, + #) counter = counter + 1 - print("testtesttest") - # Update backwards - backward_task_ids = ( - backward_task_ids if backward_task_ids is not None else self.get_task_ids() - ) - transformer_voter_data_idx = ( - range(len(X)) - if transformer_voter_data_idx is None - else transformer_voter_data_idx - ) + # # Update backwards + # backward_task_ids = ( + # backward_task_ids if backward_task_ids is not None else self.get_task_ids() + # ) + # print('backward_task_ids: '+str(backward_task_ids)) + # transformer_voter_data_idx = ( + # range(len(X)) + # if transformer_voter_data_idx is None + # else transformer_voter_data_idx + # ) - #train voters and deciders from new transformer to previous tasks - for existing_task_id in np.intersect1d(backward_task_ids, self.get_task_ids()): - self.set_voter(transformer_id=transformer_id, task_id=existing_task_id) - self.set_decider( - task_id=existing_task_id, - transformer_ids=list( - self.task_id_to_transformer_id_to_voters[existing_task_id].keys() - ), - ) + # #train voters and deciders from new transformer to previous tasks + # for existing_task_id in np.intersect1d(backward_task_ids, self.get_task_ids()): + # self.set_voter(transformer_id=transformer_id, task_id=existing_task_id) + # print("backpropagation") + # print(list( + # self.task_id_to_transformer_id_to_voters[existing_task_id].keys())) + + # self.set_decider( + # task_id=existing_task_id, + # transformer_ids=list( + # self.task_id_to_transformer_id_to_voters[existing_task_id].keys() + # ), + # ) return self @@ -400,12 +406,18 @@ def _add_transformer( transformer_kwargs, backward_task_ids ): + + print('in _add_transformer \n') + if transformer_id is None: transformer_id = len(self.get_transformer_ids()) backward_task_ids = ( backward_task_ids if backward_task_ids is not None else self.get_task_ids() ) + + print('backward_task_ids: '+str(backward_task_ids)) + transformer_voter_data_idx = ( range(len(X)) if transformer_voter_data_idx is None @@ -447,7 +459,14 @@ def _add_transformer( ) # train voters and deciders from new transformer to previous tasks + + print(np.intersect1d(backward_task_ids, self.get_task_ids())) for existing_task_id in np.intersect1d(backward_task_ids, self.get_task_ids()): + + print("backpropagation") + print(list( + self.task_id_to_transformer_id_to_voters[existing_task_id].keys())) + self.set_voter(transformer_id=transformer_id, task_id=existing_task_id) self.set_decider( task_id=existing_task_id, @@ -1045,33 +1064,33 @@ def update_task( # improves accuracy - # # train voters and decider from previous (and current) transformers to new task - # for transformer_id in ( - # forward_transformer_ids - # if forward_transformer_ids - # else self.get_transformer_ids() - # ): - # self.set_voter( - # transformer_id=transformer_id, - # task_id=task_id, - # voter_class=voter_class, - # voter_kwargs=voter_kwargs, - # ) + # train voters and decider from previous (and current) transformers to new task + for transformer_id in ( + forward_transformer_ids + if forward_transformer_ids + else self.get_transformer_ids() + ): + self.set_voter( + transformer_id=transformer_id, + task_id=task_id, + voter_class=voter_class, + voter_kwargs=voter_kwargs, + ) - # # train decider of new task - # if forward_transformer_ids: - # if num_transformers == 0: - # transformer_ids = forward_transformer_ids - # else: - # transformer_ids = np.concatenate([forward_transformer_ids, task_id]) - # else: - # transformer_ids = self.get_transformer_ids() - # self.set_decider( - # task_id=task_id, - # transformer_ids=transformer_ids, - # decider_class=decider_class, - # decider_kwargs=decider_kwargs, - # ) + # train decider of new task + if forward_transformer_ids: + if num_transformers == 0: + transformer_ids = forward_transformer_ids + else: + transformer_ids = np.concatenate([forward_transformer_ids, task_id]) + else: + transformer_ids = self.get_transformer_ids() + self.set_decider( + task_id=task_id, + transformer_ids=transformer_ids, + decider_class=decider_class, + decider_kwargs=decider_kwargs, + ) return self From ebfdbac302bd22ca887d5e7d3e9e832c90f467ed Mon Sep 17 00:00:00 2001 From: KevinWang905 <46271360+KevinWang905@users.noreply.github.com> Date: Fri, 10 Dec 2021 14:28:39 -0500 Subject: [PATCH 08/16] deleted _partial_fit --- proglearn/transformers.py | 41 +-------------------------------------- 1 file changed, 1 insertion(+), 40 deletions(-) diff --git a/proglearn/transformers.py b/proglearn/transformers.py index a7148d21f8..2b86ac8261 100644 --- a/proglearn/transformers.py +++ b/proglearn/transformers.py @@ -151,45 +151,6 @@ def __init__(self, kwargs={}): self.kwargs = kwargs - def _partial_fit(transformer,test, X, y, inputclasses): - - """ - Updates the transformer to data X with labels y using partial fit. - - Parameters - ---------- - transformer : TreeClassificationTransformer - TreeClassificationTransformer - test : DecisionClassifierTree - DecisionClassifierTree - X : ndarray - Input data matrix. - y : ndarray - Output (i.e. response data matrix). - inputclasses : ndarray - Classes in X - - Returns - ------- - test : DecisionClassifierTree - The object itself. - """ - - # print("in partial fit") - # print(transformer) - # print(test) - - X, y = check_X_y(X, y) - test.partial_fit(X, y, classes = [0,1]) # set to [0,1] for testing - #print(self) - #print("leaving partial fit") - return test - - # def _partial_fit(up_transformer, X, y, inputclasses=None): - # X, y = check_X_y(X, y) - # #self.transformer_ = DecisionTreeClassifier(**self.kwargs).partial_fit(X, y, classes = inputclasses) - # up_transformer.partial_fit(X, y, classes = inputclasses) - # return up_transformer def fit(self, X, y): """ @@ -209,7 +170,7 @@ def fit(self, X, y): """ X, y = check_X_y(X, y) self.transformer_ = DecisionTreeClassifier(**self.kwargs).fit(X, y) - print(self.transformer_) + #print(self.transformer_) return self def transform(self, X): From bd668a930069ea35c84cea86bb9623ae45c8a27c Mon Sep 17 00:00:00 2001 From: KevinWang905 <46271360+KevinWang905@users.noreply.github.com> Date: Fri, 10 Dec 2021 14:42:52 -0500 Subject: [PATCH 09/16] removed updating for deciders/voters --- proglearn/progressive_learner.py | 52 ++++++++++++++++---------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/proglearn/progressive_learner.py b/proglearn/progressive_learner.py index f3a62c3639..f58b28a255 100755 --- a/proglearn/progressive_learner.py +++ b/proglearn/progressive_learner.py @@ -340,7 +340,7 @@ def set_updated_transformer( transformer_id = len(self.get_transformer_ids()) - X = ( + X = ( self.transformer_id_to_X[transformer_id] if transformer_id in list(self.transformer_id_to_X.keys()) else self.task_id_to_X[transformer_id] @@ -1065,32 +1065,32 @@ def update_task( # train voters and decider from previous (and current) transformers to new task - for transformer_id in ( - forward_transformer_ids - if forward_transformer_ids - else self.get_transformer_ids() - ): - self.set_voter( - transformer_id=transformer_id, - task_id=task_id, - voter_class=voter_class, - voter_kwargs=voter_kwargs, - ) + # for transformer_id in ( + # forward_transformer_ids + # if forward_transformer_ids + # else self.get_transformer_ids() + # ): + # self.set_voter( + # transformer_id=transformer_id, + # task_id=task_id, + # voter_class=voter_class, + # voter_kwargs=voter_kwargs, + # ) - # train decider of new task - if forward_transformer_ids: - if num_transformers == 0: - transformer_ids = forward_transformer_ids - else: - transformer_ids = np.concatenate([forward_transformer_ids, task_id]) - else: - transformer_ids = self.get_transformer_ids() - self.set_decider( - task_id=task_id, - transformer_ids=transformer_ids, - decider_class=decider_class, - decider_kwargs=decider_kwargs, - ) + # # train decider of new task + # if forward_transformer_ids: + # if num_transformers == 0: + # transformer_ids = forward_transformer_ids + # else: + # transformer_ids = np.concatenate([forward_transformer_ids, task_id]) + # else: + # transformer_ids = self.get_transformer_ids() + # self.set_decider( + # task_id=task_id, + # transformer_ids=transformer_ids, + # decider_class=decider_class, + # decider_kwargs=decider_kwargs, + # ) return self From 5c28da4fba552bf9adeb74301b77391b206af9e8 Mon Sep 17 00:00:00 2001 From: KevinWang905 <46271360+KevinWang905@users.noreply.github.com> Date: Mon, 13 Dec 2021 15:08:15 -0500 Subject: [PATCH 10/16] Proper voter updating implemented --- deciders.py | 173 +++++++ progressive_learner.py | 1023 ++++++++++++++++++++++++++++++++++++++++ transformers.py | 196 ++++++++ voters.py | 293 ++++++++++++ 4 files changed, 1685 insertions(+) create mode 100644 deciders.py create mode 100644 progressive_learner.py create mode 100644 transformers.py create mode 100644 voters.py diff --git a/deciders.py b/deciders.py new file mode 100644 index 0000000000..3d5c3412bb --- /dev/null +++ b/deciders.py @@ -0,0 +1,173 @@ +""" +Main Author: Will LeVine +Corresponding Email: levinewill@icloud.com +""" +import numpy as np + +from .base import BaseClassificationDecider + +from sklearn.utils.validation import ( + check_X_y, + check_array, + check_is_fitted, +) + + +class SimpleArgmaxAverage(BaseClassificationDecider): + """ + A class for a decider that uses the average vote for classification. + + Parameters + ---------- + classes : list, default=[] + List of final output classification labels of type obj. + + Attributes + ---------- + transformer_id_to_transformers_ : dict + A dictionary with keys of type obj corresponding to transformer ids + and values of type obj corresponding to a transformer. This dictionary + maps transformers to a particular transformer id. + + transformer_id_to_voters_ : dict + A dictionary with keys of type obj corresponding to transformer ids + and values of type obj corresponding to a voter class. This dictionary + maps voter classes to a particular transformer id. + """ + + def __init__(self, classes=[]): + self.classes = classes + + def fit( + self, + X, + y, + transformer_id_to_transformers, + transformer_id_to_voters, + ): + """ + Function for fitting. + Stores attributes (classes, transformer_id_to_transformers, + and transformer_id_to_voters) of a ClassificationDecider. + + Parameters: + ----------- + X : ndarray + Input data matrix. + + y : ndarray + Output (i.e. response) data matrix. + + transformer_id_to_transformers : dict + A dictionary with keys of type obj corresponding to transformer ids + and values of type obj corresponding to a transformer. This dictionary + maps transformers to a particular transformer id. + + transformer_id_to_voters : dict + A dictionary with keys of type obj corresponding to transformer ids + and values of type obj corresponding to a voter class. This dictionary thus + maps voter classes to a particular transformer id. + + Returns + ------- + self : SimpleArgmaxAverage + The object itself. + + Raises + ------- + ValueError + When the labels have not been provided and the classes are empty. + """ + if not isinstance(self.classes, (list, np.ndarray)): + if len(y) == 0: + raise ValueError( + "Classification Decider classes undefined with no class labels fed to fit" + ) + else: + self.classes = np.unique(y) + else: + self.classes = np.array(self.classes) + self.transformer_id_to_transformers_ = transformer_id_to_transformers + self.transformer_id_to_voters_ = transformer_id_to_voters + return self + + def predict_proba(self, X, transformer_ids=None): + """ + Predicts posterior probabilities per input example. + + Loops through each transformer and bag of transformers. + Performs a transformation of the input data with the transformer. + Gets a voter to map the transformed input data into a posterior distribution. + Gets the mean vote per bagging component and append it to a vote per transformer id. + Returns the aggregate average vote. + + Parameters + ---------- + X : ndarray + Input data matrix. + + transformer_ids : list, default=None + A list with specific transformer ids that will be used for inference. Defaults + to using all transformers if no transformer ids are given. + + Returns + ------- + y_proba_hat : ndarray of shape [n_samples, n_classes] + posteriors per example + + + Raises + ------ + NotFittedError + When the model is not fitted. + """ + check_is_fitted(self) + vote_per_transformer_id = [] + for transformer_id in ( + transformer_ids + if transformer_ids is not None + else self.transformer_id_to_voters_.keys() + ): + check_is_fitted(self) + vote_per_bag_id = [] + for bag_id in range( + len(self.transformer_id_to_transformers_[transformer_id]) + ): + transformer = self.transformer_id_to_transformers_[transformer_id][ + bag_id + ] + X_transformed = transformer.transform(X) + voter = self.transformer_id_to_voters_[transformer_id][bag_id] + vote = voter.predict_proba(X_transformed) + vote_per_bag_id.append(vote) + vote_per_transformer_id.append(np.mean(vote_per_bag_id, axis=0)) + return np.mean(vote_per_transformer_id, axis=0) + + def predict(self, X, transformer_ids=None): + """ + Predicts the most likely class per input example. + + Uses the predict_proba method to get the mean vote per id. + Returns the class with the highest vote. + + Parameters + ---------- + X : ndarray + Input data matrix. + + transformer_ids : list, default=None + A list with all transformer ids. Defaults to None if no transformer ids + are given. + + Returns + ------- + y_hat : ndarray of shape [n_samples] + predicted class label per example + + Raises + ------ + NotFittedError + When the model is not fitted. + """ + vote_overall = self.predict_proba(X, transformer_ids=transformer_ids) + return self.classes[np.argmax(vote_overall, axis=1)] diff --git a/progressive_learner.py b/progressive_learner.py new file mode 100644 index 0000000000..d093c8ac89 --- /dev/null +++ b/progressive_learner.py @@ -0,0 +1,1023 @@ +""" +Main Author: Will LeVine +Corresponding Email: levinewill@icloud.com +""" +import numpy as np +from sklearn.exceptions import NotFittedError + +from .base import BaseClassificationProgressiveLearner, BaseProgressiveLearner + + +class ProgressiveLearner(BaseProgressiveLearner): + """ + A (mostly) internal class for progressive learning. Most users who desire to + utilize ProgLearn should use the classes defined in {network, forest}.py instead + of this class. + + Parameters + ---------- + default_transformer_class : BaseTransformer, default=None + The class of transformer to which the progressive learner defaults + if None is provided in any of the functions which add or set + transformers. + + default_transformer_kwargs : dict, default=None + A dictionary with keys of type string and values of type obj corresponding + to the given string kwarg. This determines to which type of transformer the + progressive learner defaults if None is provided in any of the functions + which add or set transformers. + + default_voter_class : BaseVoter, default=None + The class of voter to which the progressive learner defaults + if None is provided in any of the functions which add or set + voters. + + default_voter_kwargs : dict, default=None + A dictionary with keys of type string and values of type obj corresponding + to the given string kwarg. This determines to which type of voter the + progressive learner defaults if None is provided in any of the functions + which add or set voters. + + default_decider_class : BaseDecider, default=None + The class of decider to which the progressive learner defaults + if None is provided in any of the functions which add or set + deciders. + + default_decider_kwargs : dict, default=None + A dictionary with keys of type string and values of type obj corresponding + to the given string kwarg. This determines to which type of decider the + progressive learner defaults if None is provided in any of the functions + which add or set deciders. + + Attributes + ---------- + task_id_to_X : dict + A dictionary with keys of type obj corresponding to task ids + and values of type ndarray corresponding to the input data matrix X. + This dictionary thus maps input data matrix to the task where posteriors + are to be estimated. + + task_id_to_y : dict + A dictionary with keys of type obj corresponding to task ids + and values of type ndarray corresponding to output data matrix y. + This dictionary thus maps output data matrix to the task where posteriors + are to be estimated. + + transformer_id_to_X : dict + A dictionary with keys of type obj corresponding to transformer ids + and values of type ndarray corresponding to the output data matrix X. + This dictionary thus maps input data matrix to a particular transformer. + + transformer_id_to_y : dict + A dictionary with keys of type obj corresponding to transformer ids + and values of type ndarray corresponding to the output data matrix y. + This dictionary thus maps output data matrix to a particular transformer. + + transformer_id_to_transformers : dict + A dictionary with keys of type obj corresponding to transformer ids + and values of type obj corresponding to a transformer. This dictionary thus + maps transformer ids to the corresponding transformers. + + task_id_to_transformer_id_to_voters : dict + A nested dictionary with outer key of type obj, corresponding to task ids + inner key of type obj, corresponding to transformer ids, + and values of type obj, corresponding to a voter. This dictionary thus maps + voters to a corresponding transformer assigned to a particular task. + + task_id_to_decider : dict + A dictionary with keys of type obj, corresponding to task ids, + and values of type obj corresponding to a decider. This dictionary thus + maps deciders to a particular task. + + task_id_to_decider_class : dict + A dictionary with keys of type obj corresponding to task ids + and values of type obj corresponding to a decider class. This dictionary + thus maps decider classes to a particular task id. + + task_id_to_voter_class : dict + A dictionary with keys of type obj corresponding to task ids + and values of type obj corresponding to a voter class. This dictionary thus + maps voter classes to a particular task id. + + task_id_to_voter_kwargs : dict + A dictionary with keys of type obj corresponding to task ids + and values of type obj corresponding to a voter kwargs. This dictionary thus + maps voter kwargs to a particular task id. + + task_id_to_decider_kwargs : dict + A dictionary with keys of type obj corresponding to task ids + and values of type obj corresponding to a decider kwargs. This dictionary + thus maps decider kwargs to a particular task id. + + task_id_to_bag_id_to_voter_data_idx : dict + A nested dictionary with outer keys of type obj corresponding to task ids + inner keys of type obj corresponding to bag ids + and values of type obj corresponding to voter data indices. + This dictionary thus maps voter data indices to particular bags + for particular tasks. + + task_id_to_decider_idx : dict + A dictionary with keys of type obj corresponding to task ids + and values of type obj corresponding to decider indices. This dictionary + thus maps decider indices to particular tasks. + """ + + def __init__( + self, + default_transformer_class=None, + default_transformer_kwargs=None, + default_voter_class=None, + default_voter_kwargs=None, + default_decider_class=None, + default_decider_kwargs=None, + ): + + ( + self.task_id_to_X, + self.task_id_to_y, + self.transformer_id_to_X, + self.transformer_id_to_y, + ) = ({}, {}, {}, {}) + + self.transformer_id_to_transformers = {} + self.task_id_to_transformer_id_to_voters = {} + self.task_id_to_decider = {} + + self.task_id_to_decider_class = {} + self.task_id_to_decider_kwargs = {} + + self.task_id_to_voter_class = {} + self.task_id_to_voter_kwargs = {} + + self.task_id_to_bag_id_to_voter_data_idx = {} + self.task_id_to_decider_idx = {} + + self.default_transformer_class = default_transformer_class + self.default_transformer_kwargs = default_transformer_kwargs + + self.default_voter_class = default_voter_class + self.default_voter_kwargs = default_voter_kwargs + + self.default_decider_class = default_decider_class + self.default_decider_kwargs = default_decider_kwargs + + def get_transformer_ids(self): + return np.array(list(self.transformer_id_to_transformers.keys())) + + def get_task_ids(self): + return np.array(list(self.task_id_to_decider.keys())) + + def _append_transformer(self, transformer_id, transformer): + + + if transformer_id in self.get_transformer_ids(): + self.transformer_id_to_transformers[transformer_id].append(transformer) + else: + self.transformer_id_to_transformers[transformer_id] = [transformer] + + def _replace_transformer(self, transformer_id, transformer): + + self.transformer_id_to_transformers[transformer_id] = [transformer] + + def _append_voter(self, transformer_id, task_id, voter): + if task_id in list(self.task_id_to_transformer_id_to_voters.keys()): + if transformer_id in list( + self.task_id_to_transformer_id_to_voters[task_id].keys() + ): + self.task_id_to_transformer_id_to_voters[task_id][ + transformer_id + ].append(voter) + else: + self.task_id_to_transformer_id_to_voters[task_id][transformer_id] = [ + voter + ] + else: + self.task_id_to_transformer_id_to_voters[task_id] = { + transformer_id: [voter] + } + + def _append_voter_data_idx(self, task_id, bag_id, voter_data_idx): + + if task_id in list(self.task_id_to_bag_id_to_voter_data_idx.keys()): + + self.task_id_to_bag_id_to_voter_data_idx[task_id][bag_id] = voter_data_idx + else: + self.task_id_to_bag_id_to_voter_data_idx[task_id] = {bag_id: voter_data_idx} + + def _update_voter_data_idx(self, task_id, bag_id, voter_data_idx): + + if task_id in list(self.task_id_to_bag_id_to_voter_data_idx.keys()): + prev = self.task_id_to_bag_id_to_voter_data_idx[task_id][bag_id] + new = voter_data_idx + self.task_id_to_bag_id_to_voter_data_idx[task_id][bag_id] = np.append(prev, new) + else: + self.task_id_to_bag_id_to_voter_data_idx[task_id] = {bag_id: voter_data_idx} + + def _append_decider_idx(self, task_id, decider_idx): + self.task_id_to_decider_idx[task_id] = decider_idx + + def _bifurcate_decider_idxs(self, ra, transformer_voter_decider_split): + if transformer_voter_decider_split is None: + return ra, ra + else: + split = [ + np.sum(np.array(transformer_voter_decider_split)[:2]), + transformer_voter_decider_split[2], + ] + if np.sum(split) > 1: + return [ + np.random.choice(ra, int(len(ra) * p), replace=False) for p in split + ] + else: + first_idx = np.random.choice(ra, int(len(ra) * split[0]), replace=False) + second_idx = np.random.choice( + np.delete(ra, first_idx), int(len(ra) * split[1]), replace=False + ) + return first_idx, second_idx + + + def _update_transformer( + self, + X, + y, + transformer_data_proportion, + transformer_voter_data_idx, + transformer_id, + num_transformers, + transformer_class, + transformer_kwargs, + backward_task_ids, + inputclasses = None, + decider_kwargs = None + ): + + + if transformer_id not in list(self.task_id_to_X.keys()): + self.transformer_id_to_X[transformer_id] = X + if transformer_id not in list(self.task_id_to_y.keys()): + self.transformer_id_to_y[transformer_id] = y + + backward_task_ids = ( + backward_task_ids if backward_task_ids is not None else self.get_task_ids() + ) + + # for all transformers referring to specified task + counter = 0 + for transformer in self.transformer_id_to_transformers[transformer_id]: + + + # Check data and assign data for training + + if X is not None: + n = len(X) + elif y is not None: + n = len(y) + else: + n = None + if n is not None: + transformer_data_idx = np.random.choice( + transformer_voter_data_idx, + int(transformer_data_proportion * n), + replace=False, + ) + else: + transformer_data_idx = None + + X2 = ( + self.transformer_id_to_X[transformer_id] + if transformer_id in list(self.transformer_id_to_X.keys()) + else self.task_id_to_X[transformer_id] + ) + y2 = ( + self.transformer_id_to_y[transformer_id] + if transformer_id in list(self.transformer_id_to_y.keys()) + else self.task_id_to_y[transformer_id] + ) + + + + if transformer_data_idx is not None: + X2, y2 = X2[transformer_data_idx], y2[transformer_data_idx] + + transformer.transformer_.partial_fit(X2,y2, inputclasses) + + voter_data_idx = np.delete(transformer_voter_data_idx, transformer_data_idx) + + self._update_voter_data_idx( + task_id=transformer_id, + bag_id=counter, + voter_data_idx=voter_data_idx, + ) + counter = counter + 1 + + for existing_task_id in np.intersect1d(backward_task_ids, self.get_task_ids()): + self.set_voter(transformer_id=transformer_id, task_id=existing_task_id) + self.set_decider( + task_id=existing_task_id, + transformer_ids=list( + self.task_id_to_transformer_id_to_voters[existing_task_id].keys() + ), + ) + + return self + + + def _add_transformer( + self, + X, + y, + transformer_data_proportion, + transformer_voter_data_idx, + transformer_id, + num_transformers, + transformer_class, + transformer_kwargs, + backward_task_ids + ): + + + if transformer_id is None: + transformer_id = len(self.get_transformer_ids()) + + backward_task_ids = ( + backward_task_ids if backward_task_ids is not None else self.get_task_ids() + ) + + + transformer_voter_data_idx = ( + range(len(X)) + if transformer_voter_data_idx is None + else transformer_voter_data_idx + ) + + if transformer_id not in list(self.task_id_to_X.keys()): + self.transformer_id_to_X[transformer_id] = X + if transformer_id not in list(self.task_id_to_y.keys()): + self.transformer_id_to_y[transformer_id] = y + + # train new transformers + for transformer_num in range(num_transformers): + if X is not None: + n = len(X) + elif y is not None: + n = len(y) + else: + n = None + if n is not None: + transformer_data_idx = np.random.choice( + transformer_voter_data_idx, + int(transformer_data_proportion * n), + replace=False, + ) + else: + transformer_data_idx = None + self.set_transformer( + transformer_id=transformer_id, + transformer_data_idx=transformer_data_idx, + transformer_class=transformer_class, + transformer_kwargs=transformer_kwargs, + ) + voter_data_idx = np.delete(transformer_voter_data_idx, transformer_data_idx) + self._append_voter_data_idx( + task_id=transformer_id, + bag_id=transformer_num, + voter_data_idx=voter_data_idx, + ) + + # train voters and deciders from new transformer to previous tasks + for existing_task_id in np.intersect1d(backward_task_ids, self.get_task_ids()): + self.set_voter(transformer_id=transformer_id, task_id=existing_task_id) + self.set_decider( + task_id=existing_task_id, + transformer_ids=list( + self.task_id_to_transformer_id_to_voters[existing_task_id].keys() + ), + ) + + return self + + # make sure the below ganular functions work without add_{transformer, task} + def set_transformer( + self, + transformer_id=None, + transformer=None, + transformer_data_idx=None, + transformer_class=None, + transformer_kwargs=None, + ): + if transformer_id is None: + transformer_id = len(self.get_transformer_ids()) + + X = ( + self.transformer_id_to_X[transformer_id] + if transformer_id in list(self.transformer_id_to_X.keys()) + else self.task_id_to_X[transformer_id] + ) + y = ( + self.transformer_id_to_y[transformer_id] + if transformer_id in list(self.transformer_id_to_y.keys()) + else self.task_id_to_y[transformer_id] + ) + if transformer_data_idx is not None: + X, y = X[transformer_data_idx], y[transformer_data_idx] + + if X is None and y is None: + if transformer.is_fitted(): + self._append_transformer(transformer_id, transformer) + else: + raise ValueError( + "transformer_class is not fitted and X is None and y is None." + ) + return + + # Type check X + + if transformer_class is None: + if self.default_transformer_class is None: + raise ValueError( + "transformer_class is None and 'default_transformer_class' is None." + ) + else: + transformer_class = self.default_transformer_class + + if transformer_kwargs is None: + if self.default_transformer_kwargs is None: + raise ValueError( + """transformer_kwargs is None and + 'default_transformer_kwargs' is None.""" + ) + else: + transformer_kwargs = self.default_transformer_kwargs + + # Fit transformer and new voter + if y is None: + self._append_transformer( + transformer_id, transformer_class(**transformer_kwargs).fit(X) + ) + else: + # Type check y + self._append_transformer( + transformer_id, transformer_class(**transformer_kwargs).fit(X, y) + ) + + def set_voter( + self, + transformer_id, + task_id=None, + voter_class=None, + voter_kwargs=None, + bag_id=None, + ): + + + # Type check X + + # Type check y + + if task_id is None: + task_id = len(self.get_task_ids()) + + if voter_class is None: + if ( + task_id in list(self.task_id_to_voter_class.keys()) + and self.task_id_to_voter_class[task_id] is not None + ): + voter_class = self.task_id_to_voter_class[task_id] + elif self.default_voter_class is not None: + voter_class = self.default_voter_class + else: + raise ValueError( + """voter_class is None, the default voter class for the overall + learner is None, and the default voter class + for this transformer is None.""" + ) + + if voter_kwargs is None: + if ( + task_id in list(self.task_id_to_voter_kwargs.keys()) + and self.task_id_to_voter_kwargs[task_id] is not None + ): + voter_kwargs = self.task_id_to_voter_kwargs[task_id] + elif self.default_voter_kwargs is not None: + voter_kwargs = self.default_voter_kwargs + else: + raise ValueError( + """voter_kwargs is None, the default voter kwargs for the overall + learner is None, and the default voter kwargs + for this transformer is None.""" + ) + + X = self.task_id_to_X[task_id] + y = self.task_id_to_y[task_id] + if bag_id is None: + transformers = self.transformer_id_to_transformers[transformer_id] + else: + transformers = [self.transformer_id_to_transformers[transformer_id][bag_id]] + for transformer_num, transformer in enumerate(transformers): + if transformer_id == task_id: + voter_data_idx = self.task_id_to_bag_id_to_voter_data_idx[task_id][ + transformer_num + ] + else: + voter_data_idx = np.delete( + range(len(X)), self.task_id_to_decider_idx[task_id] + ) + self._append_voter( + transformer_id, + task_id, + voter_class(**voter_kwargs).fit( + transformer.transform(X[voter_data_idx]), y[voter_data_idx] + ), + ) + + self.task_id_to_voter_class[task_id] = voter_class + self.task_id_to_voter_kwargs[task_id] = voter_kwargs + + def set_decider( + self, task_id, transformer_ids, decider_class=None, decider_kwargs=None + ): + if decider_class is None: + if task_id in list(self.task_id_to_decider_class.keys()): + decider_class = self.task_id_to_decider_class[task_id] + elif self.default_decider_class is not None: + decider_class = self.default_decider_class + else: + raise ValueError( + "decider_class is None and 'default_decider_class' is None." + ) + if decider_kwargs is None: + if task_id in list(self.task_id_to_decider_kwargs.keys()): + decider_kwargs = self.task_id_to_decider_kwargs[task_id] + elif self.default_decider_kwargs is not None: + decider_kwargs = self.default_decider_kwargs + else: + raise ValueError( + "decider_kwargs is None and 'default_decider_kwargs' is None." + ) + + transformer_id_to_transformers = { + transformer_id: self.transformer_id_to_transformers[transformer_id] + for transformer_id in transformer_ids + } + transformer_id_to_voters = { + transformer_id: self.task_id_to_transformer_id_to_voters[task_id][ + transformer_id + ] + for transformer_id in transformer_ids + } + + X, y = self.task_id_to_X[task_id], self.task_id_to_y[task_id] + + self.task_id_to_decider[task_id] = decider_class(**decider_kwargs) + decider_idx = self.task_id_to_decider_idx[task_id] + + self.task_id_to_decider[task_id].fit( + X[decider_idx], + y[decider_idx], + transformer_id_to_transformers, + transformer_id_to_voters, + ) + + self.task_id_to_decider_class[task_id] = decider_class + self.task_id_to_decider_kwargs[task_id] = decider_kwargs + + def add_transformer( + self, + X, + y, + transformer_data_proportion=1.0, + transformer_voter_data_idx=None, + transformer_id=None, + num_transformers=1, + transformer_class=None, + transformer_kwargs=None, + backward_task_ids=None, + ): + """ + Adds a transformer to the progressive learner and trains the voters and + deciders from this new transformer to the specified backward_task_ids. + + Parameters + ---------- + X : ndarray + Input data matrix. + + y : ndarray + Output (response) data matrix. + + transformer_data_proportion : float, default=1.0 + The proportion of the data set aside to train the transformer. The + remainder of the data is used to train voters. This is used in the + case that you are using a bagging algorithm and want the various + components in that bagging ensemble to train on disjoint subsets of + the data. This parameter is mostly for internal use. + + transformer_voter_data_idx : ndarray, default=None + A 1d array of type int used to specify the aggregate indices of the input + data used to train the transformers and voters. This is used in the + case that X and/or y contain data that you do not want to use to train + transformers or voters (e.g. X and/or y contains decider training data + disjoint from the transformer/voter data). This parameter is mostly + for internal use. + + transformer_id : obj, default=None + The id corresponding to the transformer being added. + + num_transformers : int, default=1 + The number of transformers to add corresponding to the given inputs. + + transformer_class : BaseTransformer, default=None + The class of the transformer(s) being added. + + transformer_kwargs : dict, default=None + A dictionary with keys of type string and values of type obj corresponding + to the given string kwarg. This determines the kwargs of the transformer(s) + being added. + + backward_task_ids : ndarray, default=None + A 1d array of type obj used to specify to which existing task voters and deciders + will be trained from the transformer(s) being added. + + Returns + ------- + self : ProgressiveLearner + The object itself. + """ + return self._add_transformer( + X, + y, + transformer_data_proportion=transformer_data_proportion, + transformer_voter_data_idx=transformer_voter_data_idx, + transformer_id=transformer_id, + num_transformers=num_transformers, + transformer_class=transformer_class, + transformer_kwargs=transformer_kwargs, + backward_task_ids=backward_task_ids, + ) + + def add_task( + self, + X, + y, + task_id=None, + transformer_voter_decider_split=[0.67, 0.33, 0], + num_transformers=1, + transformer_class=None, + transformer_kwargs=None, + voter_class=None, + voter_kwargs=None, + decider_class=None, + decider_kwargs=None, + backward_task_ids=None, + forward_transformer_ids=None, + ): + + """ + Adds a task to the progressive learner. Optionally trains one or more + transformer from the input data (if num_transformers > 0), adds voters + and deciders from this/these new transformer(s) to the tasks specified + in backward_task_ids, and adds voters and deciders from the transformers + specified in forward_transformer_ids (and from the newly added transformer(s) + corresponding to the input task_id if num_transformers > 0) to the + new task_id. + + Parameters + ---------- + X : ndarray + Input data matrix. + + y : ndarray + Output (response) data matrix. + + task_id : obj, default=None + The id corresponding to the task being added. + + transformer_voter_decider_split : ndarray, default=[0.67, 0.33, 0] + A 1d array of length 3. The 0th index indicates the proportions of the input + data used to train the (optional) newly added transformer(s) corresponding to + the task_id provided in this function call. The 1st index indicates the proportion of + the data set aside to train the voter(s) from these (optional) newly added + transformer(s) to the task_id provided in this function call. For all other tasks, + the aggregate transformer and voter data pairs from those tasks are used to train + the voter(s) from these (optional) newly added transformer(s) to those tasks; + for all other transformers, the aggregate transformer and voter data provided in + this function call is used to train the voter(s) from those transformers to + the task_id provided in this function call. The 2nd index indicates the + proportion of the data set aside to train the decider - these indices are saved + internally and will be used to train all further deciders corresponding to this + task for all function calls. + + num_transformers : int, default=1 + The number of transformers to add corresponding to the given inputs. + + transformer_class : BaseTransformer, default=None + The class of the transformer(s) being added. + + transformer_kwargs : dict, default=None + A dictionary with keys of type string and values of type obj corresponding + to the given string kwarg. This determines the kwargs of the transformer(s) + being added. + + voter_class : BaseVoter, default=None + The class of the voter(s) being added. + + voter_kwargs : dict, default=None + A dictionary with keys of type string and values of type obj corresponding + to the given string kwarg. This determines the kwargs of the voter(s) + being added. + + decider_class : BaseDecider, default=None + The class of the decider(s) being added. + + decider_kwargs : dict, default=None + A dictionary with keys of type string and values of type obj corresponding + to the given string kwarg. This determines the kwargs of the decider(s) + being added. + + backward_task_ids : ndarray, default=None + A 1d array of type obj used to specify to which existing task voters and deciders + will be trained from the transformer(s) being added. + + foward_transformer_ids : ndarray, default=None + A 1d array of type obj used to specify from which existing transformer(s) voters and + deciders will be trained to the new task. If num_transformers > 0, the input task_id + corresponding to the task being added is automatically appended to this 1d array. + + Returns + ------- + self : ProgressiveLearner + The object itself. + """ + if task_id is None: + task_id = max( + len(self.get_transformer_ids()), len(self.get_task_ids()) + ) # come up with something that has fewer collisions + + self.task_id_to_X[task_id] = X + self.task_id_to_y[task_id] = y + + # split into transformer/voter and decider data + transformer_voter_data_idx, decider_idx = self._bifurcate_decider_idxs( + range(len(X)), transformer_voter_decider_split + ) + + self._append_decider_idx(task_id, decider_idx) + + # add new transformer and train voters and decider + # from new transformer to previous tasks + if num_transformers > 0: + + self._add_transformer( + X, + y, + transformer_data_proportion=transformer_voter_decider_split[0] + if transformer_voter_decider_split + else 1, + transformer_voter_data_idx=transformer_voter_data_idx, + transformer_id=task_id, + num_transformers=num_transformers, + transformer_class=transformer_class, + transformer_kwargs=transformer_kwargs, + backward_task_ids=backward_task_ids, + ) + + # train voters and decider from previous (and current) transformers to new task + for transformer_id in ( + forward_transformer_ids + if forward_transformer_ids + else self.get_transformer_ids() + ): + self.set_voter( + transformer_id=transformer_id, + task_id=task_id, + voter_class=voter_class, + voter_kwargs=voter_kwargs, + ) + + # train decider of new task + if forward_transformer_ids: + if num_transformers == 0: + transformer_ids = forward_transformer_ids + else: + transformer_ids = np.concatenate([forward_transformer_ids, task_id]) + else: + transformer_ids = self.get_transformer_ids() + + self.set_decider( + task_id=task_id, + transformer_ids=transformer_ids, + decider_class=decider_class, + decider_kwargs=decider_kwargs, + ) + + return self + + def update_task( + self, + X, + y, + inputclasses = None, + task_id=None, + transformer_voter_decider_split=[0.67, 0.33, 0], + num_transformers=1, + transformer_class=None, + transformer_kwargs=None, + voter_class=None, + voter_kwargs=None, + decider_class=None, + decider_kwargs=None, + backward_task_ids=None, + forward_transformer_ids=None, + ): + + """ + Adds a task to the progressive learner. Optionally trains one or more + transformer from the input data (if num_transformers > 0), adds voters + and deciders from this/these new transformer(s) to the tasks specified + in backward_task_ids, and adds voters and deciders from the transformers + specified in forward_transformer_ids (and from the newly added transformer(s) + corresponding to the input task_id if num_transformers > 0) to the + new task_id. + + Parameters + ---------- + X : ndarray + Input data matrix. + + y : ndarray + Output (response) data matrix. + + task_id : obj, default=None + The id corresponding to the task being added. + + transformer_voter_decider_split : ndarray, default=[0.67, 0.33, 0] + A 1d array of length 3. The 0th index indicates the proportions of the input + data used to train the (optional) newly added transformer(s) corresponding to + the task_id provided in this function call. The 1st index indicates the proportion of + the data set aside to train the voter(s) from these (optional) newly added + transformer(s) to the task_id provided in this function call. For all other tasks, + the aggregate transformer and voter data pairs from those tasks are used to train + the voter(s) from these (optional) newly added transformer(s) to those tasks; + for all other transformers, the aggregate transformer and voter data provided in + this function call is used to train the voter(s) from those transformers to + the task_id provided in this function call. The 2nd index indicates the + proportion of the data set aside to train the decider - these indices are saved + internally and will be used to train all further deciders corresponding to this + task for all function calls. + + num_transformers : int, default=1 + The number of transformers to add corresponding to the given inputs. + + transformer_class : BaseTransformer, default=None + The class of the transformer(s) being added. + + transformer_kwargs : dict, default=None + A dictionary with keys of type string and values of type obj corresponding + to the given string kwarg. This determines the kwargs of the transformer(s) + being added. + + voter_class : BaseVoter, default=None + The class of the voter(s) being added. + + voter_kwargs : dict, default=None + A dictionary with keys of type string and values of type obj corresponding + to the given string kwarg. This determines the kwargs of the voter(s) + being added. + + decider_class : BaseDecider, default=None + The class of the decider(s) being added. + + decider_kwargs : dict, default=None + A dictionary with keys of type string and values of type obj corresponding + to the given string kwarg. This determines the kwargs of the decider(s) + being added. + + backward_task_ids : ndarray, default=None + A 1d array of type obj used to specify to which existing task voters and deciders + will be trained from the transformer(s) being added. + + foward_transformer_ids : ndarray, default=None + A 1d array of type obj used to specify from which existing transformer(s) voters and + deciders will be trained to the new task. If num_transformers > 0, the input task_id + corresponding to the task being added is automatically appended to this 1d array. + + Returns + ------- + self : ProgressiveLearner + The object itself. + """ + + if task_id is None: + print("Error: No Task ID inputted") + return self + # come up with something that has fewer collision + self.task_id_to_transformer_id_to_voters[task_id] = {} + + self.task_id_to_X[task_id] = np.concatenate((self.task_id_to_X[task_id],X),axis=0) + self.task_id_to_y[task_id] = np.concatenate((self.task_id_to_y[task_id],y),axis=0) + + # split into transformer/voter and decider data + + transformer_voter_data_idx, decider_idx = self._bifurcate_decider_idxs( + range(len(X)), transformer_voter_decider_split + ) + self._append_decider_idx(task_id, decider_idx) + + # add new transformer and train voters and decider + # from new transformer to previous tasks + if num_transformers > 0: + self._update_transformer( + X, + y, + inputclasses = inputclasses, + transformer_data_proportion=transformer_voter_decider_split[0] + if transformer_voter_decider_split + else 1, + transformer_voter_data_idx=transformer_voter_data_idx, + transformer_id=task_id, + num_transformers=num_transformers, + transformer_class=transformer_class, + transformer_kwargs=transformer_kwargs, + backward_task_ids=backward_task_ids, + decider_kwargs = decider_kwargs + ) + + self.set_voter( + transformer_id=0, + task_id=task_id, + voter_class=voter_class, + voter_kwargs=voter_kwargs, + ) + + return self + + + def predict(self, X, task_id, transformer_ids=None): + """ + predicts labels under task_id for each example in input data X + using the given transformer_ids. + + Parameters + ---------- + X : ndarray + The input data matrix. + + task_id : obj + The id corresponding to the task being mapped to. + + transformer_ids : list, default=None + The list of transformer_ids through which a user would like + to send X (which will be pipelined with their corresponding + voters) to make an inference prediction. + + Returns + ------- + y_hat : ndarray of shape [n_samples] + predicted class label per example + """ + + if self.task_id_to_decider == {}: + raise NotFittedError + + decider = self.task_id_to_decider[task_id] + + return decider.predict(X, transformer_ids=transformer_ids) + + +class ClassificationProgressiveLearner( + ProgressiveLearner, BaseClassificationProgressiveLearner +): + """ + A (mostly) internal class for progressive learning in the classification + setting. Most users who desire to utilize ProgLearn should use the classes + defined in {network, forest}.py instead of this class. + """ + + def predict_proba(self, X, task_id, transformer_ids=None): + """ + predicts posteriors under task_id for each example in input data X + using the given transformer_ids. + + Parameters + ---------- + X : ndarray + The input data matrix. + + task_id : obj + The id corresponding to the task being mapped to. + + transformer_ids : list, default=None + The list of transformer_ids through which a user would like + to send X (which will be pipelined with their corresponding + voters) to estimate posteriors. + + Returns + ------- + y_proba_hat : ndarray of shape [n_samples, n_classes] + posteriors per example + """ + if self.task_id_to_decider == {}: + raise NotFittedError + + decider = self.task_id_to_decider[task_id] + return decider.predict_proba(X, transformer_ids=transformer_ids) diff --git a/transformers.py b/transformers.py new file mode 100644 index 0000000000..c5c9177ef0 --- /dev/null +++ b/transformers.py @@ -0,0 +1,196 @@ +""" +Main Author: Will LeVine +Corresponding Email: levinewill@icloud.com +""" +from tensorflow import keras +import numpy as np +from sklearn.tree import DecisionTreeClassifier +from sklearn.utils.validation import check_array, check_is_fitted, check_X_y + +from .base import BaseTransformer + + +class NeuralClassificationTransformer(BaseTransformer): + """ + A class used to transform data from a category to a specialized representation. + + Parameters + ---------- + network : object + A neural network used in the classification transformer. + + euclidean_layer_idx : int + An integer to represent the final layer of the transformer. + + optimizer : str or keras.optimizers instance + An optimizer used when compiling the neural network. + + loss : str, default="categorical_crossentropy" + A loss function used when compiling the neural network. + + pretrained : bool, default=False + A boolean used to identify if the network is pretrained. + + compile_kwargs : dict, default={"metrics": ["acc"]} + A dictionary containing metrics for judging network performance. + + fit_kwargs : dict, default={ + "epochs": 100, + "callbacks": [keras.callbacks.EarlyStopping(patience=5, monitor="val_acc")], + "verbose": False, + "validation_split": 0.33, + }, + A dictionary to hold epochs, callbacks, verbose, and validation split for the network. + + Attributes + ---------- + encoder_ : object + A Keras model with inputs and outputs based on the network attribute. + Output layers are determined by the euclidean_layer_idx parameter. + + fitted_ : boolean + A boolean flag initialized after the model is fitted. + """ + + def __init__( + self, + network, + euclidean_layer_idx, + optimizer, + loss="categorical_crossentropy", + pretrained=False, + compile_kwargs={"metrics": ["acc"]}, + fit_kwargs={ + "epochs": 100, + "callbacks": [keras.callbacks.EarlyStopping(patience=5, monitor="val_acc")], + "verbose": False, + "validation_split": 0.33, + }, + ): + self.network = keras.models.clone_model(network) + self.encoder_ = keras.models.Model( + inputs=self.network.inputs, + outputs=self.network.layers[euclidean_layer_idx].output, + ) + self.pretrained = pretrained + self.optimizer = optimizer + self.loss = loss + self.compile_kwargs = compile_kwargs + self.fit_kwargs = fit_kwargs + + def fit(self, X, y): + """ + Fits the transformer to data X with labels y. + + Parameters + ---------- + X : ndarray + Input data matrix. + y : ndarray + Output (i.e. response data matrix). + + Returns + ------- + self : NeuralClassificationTransformer + The object itself. + """ + check_X_y(X, y, ensure_2d=False, allow_nd=True) + _, y = np.unique(y, return_inverse=True) + + self.network.compile( + loss=self.loss, optimizer=self.optimizer, **self.compile_kwargs + ) + + self.network.fit(X, keras.utils.to_categorical(y), **self.fit_kwargs) + self.fitted_ = True + + return self + + + + def transform(self, X): + """ + Performs inference using the transformer. + + Parameters + ---------- + X : ndarray + Input data matrix. + + Returns + ------- + X_transformed : ndarray + The transformed input. + + Raises + ------ + NotFittedError + When the model is not fitted. + """ + check_array(X, ensure_2d=False, allow_nd=True) + check_is_fitted(self, attributes="fitted_") + return self.encoder_.predict(X) + + +class TreeClassificationTransformer(BaseTransformer): + """ + A class used to transform data from a category to a specialized representation. + + Parameters + ---------- + kwargs : dict, default={} + A dictionary to contain parameters of the tree. + + Attributes + ---------- + transformer : sklearn.tree.DecisionTreeClassifier + an internal sklearn DecisionTreeClassifier + """ + + def __init__(self, kwargs={}): + self.kwargs = kwargs + + + + def fit(self, X, y): + """ + Fits the transformer to data X with labels y. + + Parameters + ---------- + X : ndarray + Input data matrix. + y : ndarray + Output (i.e. response data matrix). + + Returns + ------- + self : TreeClassificationTransformer + The object itself. + """ + X, y = check_X_y(X, y) + self.transformer_ = DecisionTreeClassifier(**self.kwargs).fit(X, y) + return self + + def transform(self, X): + """ + Performs inference using the transformer. + + Parameters + ---------- + X : ndarray + Input data matrix. + + Returns + ------- + X_transformed : ndarray + The transformed input. + + Raises + ------ + NotFittedError + When the model is not fitted. + """ + X = check_array(X) + check_is_fitted(self) + return self.transformer_.apply(X) diff --git a/voters.py b/voters.py new file mode 100644 index 0000000000..4381d6aa25 --- /dev/null +++ b/voters.py @@ -0,0 +1,293 @@ +""" +Main Author: Will LeVine +Corresponding Email: levinewill@icloud.com +""" +import numpy as np +from sklearn.neighbors import KNeighborsClassifier +from sklearn.utils.validation import ( + check_X_y, + check_array, + check_is_fitted, +) +from sklearn.utils.multiclass import check_classification_targets +from .base import BaseClassificationVoter + + +class TreeClassificationVoter(BaseClassificationVoter): + """ + A class used to vote on data transformed under a tree, which inherits from + the BaseClassificationVoter class in base.py. + + Parameters + ---------- + kappa : float + coefficient for finite sample correction + If set to default, no finite sample correction is performed. + + classes : list, default=[] + list of all possible output label values + + Attributes + ---------- + missing_label_indices_ : list + a (potentially empty) list of label values + that exist in the ``classes`` parameter but + are missing in the latest ``fit`` function + call + + uniform_posterior_ : ndarray of shape (n_classes,) + the uniform posterior associated with the + """ + + def __init__(self, kappa=np.inf, classes=[]): + self.kappa = kappa + self.classes = np.asarray(classes) + + def fit(self, X, y): + """ + Fits transformed data X given corresponding class labels y. + + Parameters + ---------- + X : array of shape [n_samples, n_features] + the transformed input data + y : array of shape [n_samples] + the class labels + + Returns + ------- + self : TreeClassificationVoter + The object itself. + """ + check_classification_targets(y) + + num_fit_classes = len(np.unique(y)) + self.missing_label_indices_ = [] + + if self.classes.size != 0 and num_fit_classes < len(self.classes): + for idx, label in enumerate(self.classes): + if label not in np.unique(y): + self.missing_label_indices_.append(idx) + + self.uniform_posterior_ = np.ones(num_fit_classes) / num_fit_classes + + self.leaf_to_posterior_ = {} + + for leaf_id in np.unique(X): + idxs_in_leaf = np.where(X == leaf_id)[0] + class_counts = [ + len(np.where(y[idxs_in_leaf] == y_val)[0]) for y_val in np.unique(y) + ] + posteriors = np.nan_to_num(np.array(class_counts) / np.sum(class_counts)) + posteriors = self._finite_sample_correction( + posteriors, len(idxs_in_leaf), self.kappa + ) + self.leaf_to_posterior_[leaf_id] = posteriors + + + + return self + + def predict_proba(self, X): + """ + Returns the posterior probabilities of each class for data X. + + Parameters + ---------- + X : array of shape [n_samples, n_features] + the transformed input data + + Returns + ------- + y_proba_hat : ndarray of shape [n_samples, n_classes] + posteriors per example + + Raises + ------ + NotFittedError + When the model is not fitted. + """ + check_is_fitted(self) + votes_per_example = [] + for x in X: + if x in list(self.leaf_to_posterior_.keys()): + votes_per_example.append(self.leaf_to_posterior_[x]) + else: + votes_per_example.append(self.uniform_posterior_) + + votes_per_example = np.array(votes_per_example) + + if len(self.missing_label_indices_) > 0: + for i in self.missing_label_indices_: + new_col = np.zeros(votes_per_example.shape[0]) + votes_per_example = np.insert(votes_per_example, i, new_col, axis=1) + + return votes_per_example + + def predict(self, X): + """ + Returns the predicted class labels for data X. + + Parameters + ---------- + X : array of shape [n_samples, n_features] + the transformed input data + + Returns + ------- + y_hat : ndarray of shape [n_samples] + predicted class label per example + + Raises + ------ + NotFittedError + When the model is not fitted. + """ + return self.classes[np.argmax(self.predict_proba(X), axis=1)] + + def _finite_sample_correction(self, posteriors, num_points_in_partition, kappa): + """ + Encourage posteriors to approach uniform when there is low data through a finite sample correction. + + + Parameters + ---------- + posteriors : array of shape[n_samples, n_classes] + posterior of each class for each sample + num_points_in_partition : int + number of samples in this particular transformation + kappa : float + coefficient for finite sample correction + + Returns + ------- + y_proba_hat : ndarray of shape [n_samples, n_classes] + posteriors per example + """ + correction_constant = 1 / (kappa * num_points_in_partition) + + zero_posterior_idxs = np.where(posteriors == 0)[0] + posteriors[zero_posterior_idxs] = correction_constant + + posteriors /= sum(posteriors) + + return posteriors + + +class KNNClassificationVoter(BaseClassificationVoter): + """ + A class used to vote on data under any transformer outputting data + in continuous Euclidean space, which inherits from the BaseClassificationVoter + class in base.py. + + Parameters + ---------- + k : int + integer indicating number of neighbors to use for each prediction during + fitting and voting + + kwargs : dictionary, default={} + contains all keyword arguments for the underlying KNN + + classes : list, default=[] + list of all possible output label values + + Attributes + ---------- + missing_label_indices_ : list + a (potentially empty) list of label values + that exist in the ``classes`` parameter but + are missing in the latest ``fit`` function + call + + knn_ : sklearn.neighbors.KNeighborsClassifier + the internal sklearn instance of KNN + classifier + """ + + def __init__(self, k=None, kwargs={}, classes=[]): + self.k = k + self.kwargs = kwargs + self.classes = np.asarray(classes) + + def fit(self, X, y): + """ + Fits data X given class labels y. + + Parameters + ---------- + X : array of shape [n_samples, n_features] + the transformed data that will be trained on + y : array of shape [n_samples] + the label for class membership of the given data + + Returns + ------- + self : KNNClassificationVoter + The object itself. + """ + X, y = check_X_y(X, y) + k = int(np.log2(len(X))) if self.k == None else self.k + self.knn_ = KNeighborsClassifier(k, **self.kwargs) + self.knn_.fit(X, y) + + num_classes = len(np.unique(y)) + self.missing_label_indices_ = [] + + if self.classes.size != 0 and num_classes < len(self.classes): + for idx, label in enumerate(self.classes): + if label not in np.unique(y): + self.missing_label_indices_.append(idx) + + return self + + def predict_proba(self, X): + """ + Returns the posterior probabilities of each class for data X. + + Parameters + ---------- + X : array of shape [n_samples, n_features] + the transformed input data + + Returns + ------- + y_proba_hat : ndarray of shape [n_samples, n_classes] + posteriors per example + + Raises + ------ + NotFittedError + When the model is not fitted. + """ + check_is_fitted(self) + X = check_array(X) + votes_per_example = self.knn_.predict_proba(X) + + if len(self.missing_label_indices_) > 0: + for i in self.missing_label_indices_: + new_col = np.zeros(votes_per_example.shape[0]) + votes_per_example = np.insert(votes_per_example, i, new_col, axis=1) + + return votes_per_example + + def predict(self, X): + """ + Returns the predicted class labels for data X. + + Parameters + ---------- + X : array of shape [n_samples, n_features] + the transformed input data + + Returns + ------- + y_hat : ndarray of shape [n_samples] + predicted class label per example + + Raises + ------ + NotFittedError + When the model is not fitted. + """ + return self.classes[np.argmax(self.predict_proba(X), axis=1)] From d82c9b94c8d6c5ba1bd88dbd7efc8da786a5dd27 Mon Sep 17 00:00:00 2001 From: KevinWang905 <46271360+KevinWang905@users.noreply.github.com> Date: Mon, 13 Dec 2021 15:08:42 -0500 Subject: [PATCH 11/16] Delete voters.py --- voters.py | 293 ------------------------------------------------------ 1 file changed, 293 deletions(-) delete mode 100644 voters.py diff --git a/voters.py b/voters.py deleted file mode 100644 index 4381d6aa25..0000000000 --- a/voters.py +++ /dev/null @@ -1,293 +0,0 @@ -""" -Main Author: Will LeVine -Corresponding Email: levinewill@icloud.com -""" -import numpy as np -from sklearn.neighbors import KNeighborsClassifier -from sklearn.utils.validation import ( - check_X_y, - check_array, - check_is_fitted, -) -from sklearn.utils.multiclass import check_classification_targets -from .base import BaseClassificationVoter - - -class TreeClassificationVoter(BaseClassificationVoter): - """ - A class used to vote on data transformed under a tree, which inherits from - the BaseClassificationVoter class in base.py. - - Parameters - ---------- - kappa : float - coefficient for finite sample correction - If set to default, no finite sample correction is performed. - - classes : list, default=[] - list of all possible output label values - - Attributes - ---------- - missing_label_indices_ : list - a (potentially empty) list of label values - that exist in the ``classes`` parameter but - are missing in the latest ``fit`` function - call - - uniform_posterior_ : ndarray of shape (n_classes,) - the uniform posterior associated with the - """ - - def __init__(self, kappa=np.inf, classes=[]): - self.kappa = kappa - self.classes = np.asarray(classes) - - def fit(self, X, y): - """ - Fits transformed data X given corresponding class labels y. - - Parameters - ---------- - X : array of shape [n_samples, n_features] - the transformed input data - y : array of shape [n_samples] - the class labels - - Returns - ------- - self : TreeClassificationVoter - The object itself. - """ - check_classification_targets(y) - - num_fit_classes = len(np.unique(y)) - self.missing_label_indices_ = [] - - if self.classes.size != 0 and num_fit_classes < len(self.classes): - for idx, label in enumerate(self.classes): - if label not in np.unique(y): - self.missing_label_indices_.append(idx) - - self.uniform_posterior_ = np.ones(num_fit_classes) / num_fit_classes - - self.leaf_to_posterior_ = {} - - for leaf_id in np.unique(X): - idxs_in_leaf = np.where(X == leaf_id)[0] - class_counts = [ - len(np.where(y[idxs_in_leaf] == y_val)[0]) for y_val in np.unique(y) - ] - posteriors = np.nan_to_num(np.array(class_counts) / np.sum(class_counts)) - posteriors = self._finite_sample_correction( - posteriors, len(idxs_in_leaf), self.kappa - ) - self.leaf_to_posterior_[leaf_id] = posteriors - - - - return self - - def predict_proba(self, X): - """ - Returns the posterior probabilities of each class for data X. - - Parameters - ---------- - X : array of shape [n_samples, n_features] - the transformed input data - - Returns - ------- - y_proba_hat : ndarray of shape [n_samples, n_classes] - posteriors per example - - Raises - ------ - NotFittedError - When the model is not fitted. - """ - check_is_fitted(self) - votes_per_example = [] - for x in X: - if x in list(self.leaf_to_posterior_.keys()): - votes_per_example.append(self.leaf_to_posterior_[x]) - else: - votes_per_example.append(self.uniform_posterior_) - - votes_per_example = np.array(votes_per_example) - - if len(self.missing_label_indices_) > 0: - for i in self.missing_label_indices_: - new_col = np.zeros(votes_per_example.shape[0]) - votes_per_example = np.insert(votes_per_example, i, new_col, axis=1) - - return votes_per_example - - def predict(self, X): - """ - Returns the predicted class labels for data X. - - Parameters - ---------- - X : array of shape [n_samples, n_features] - the transformed input data - - Returns - ------- - y_hat : ndarray of shape [n_samples] - predicted class label per example - - Raises - ------ - NotFittedError - When the model is not fitted. - """ - return self.classes[np.argmax(self.predict_proba(X), axis=1)] - - def _finite_sample_correction(self, posteriors, num_points_in_partition, kappa): - """ - Encourage posteriors to approach uniform when there is low data through a finite sample correction. - - - Parameters - ---------- - posteriors : array of shape[n_samples, n_classes] - posterior of each class for each sample - num_points_in_partition : int - number of samples in this particular transformation - kappa : float - coefficient for finite sample correction - - Returns - ------- - y_proba_hat : ndarray of shape [n_samples, n_classes] - posteriors per example - """ - correction_constant = 1 / (kappa * num_points_in_partition) - - zero_posterior_idxs = np.where(posteriors == 0)[0] - posteriors[zero_posterior_idxs] = correction_constant - - posteriors /= sum(posteriors) - - return posteriors - - -class KNNClassificationVoter(BaseClassificationVoter): - """ - A class used to vote on data under any transformer outputting data - in continuous Euclidean space, which inherits from the BaseClassificationVoter - class in base.py. - - Parameters - ---------- - k : int - integer indicating number of neighbors to use for each prediction during - fitting and voting - - kwargs : dictionary, default={} - contains all keyword arguments for the underlying KNN - - classes : list, default=[] - list of all possible output label values - - Attributes - ---------- - missing_label_indices_ : list - a (potentially empty) list of label values - that exist in the ``classes`` parameter but - are missing in the latest ``fit`` function - call - - knn_ : sklearn.neighbors.KNeighborsClassifier - the internal sklearn instance of KNN - classifier - """ - - def __init__(self, k=None, kwargs={}, classes=[]): - self.k = k - self.kwargs = kwargs - self.classes = np.asarray(classes) - - def fit(self, X, y): - """ - Fits data X given class labels y. - - Parameters - ---------- - X : array of shape [n_samples, n_features] - the transformed data that will be trained on - y : array of shape [n_samples] - the label for class membership of the given data - - Returns - ------- - self : KNNClassificationVoter - The object itself. - """ - X, y = check_X_y(X, y) - k = int(np.log2(len(X))) if self.k == None else self.k - self.knn_ = KNeighborsClassifier(k, **self.kwargs) - self.knn_.fit(X, y) - - num_classes = len(np.unique(y)) - self.missing_label_indices_ = [] - - if self.classes.size != 0 and num_classes < len(self.classes): - for idx, label in enumerate(self.classes): - if label not in np.unique(y): - self.missing_label_indices_.append(idx) - - return self - - def predict_proba(self, X): - """ - Returns the posterior probabilities of each class for data X. - - Parameters - ---------- - X : array of shape [n_samples, n_features] - the transformed input data - - Returns - ------- - y_proba_hat : ndarray of shape [n_samples, n_classes] - posteriors per example - - Raises - ------ - NotFittedError - When the model is not fitted. - """ - check_is_fitted(self) - X = check_array(X) - votes_per_example = self.knn_.predict_proba(X) - - if len(self.missing_label_indices_) > 0: - for i in self.missing_label_indices_: - new_col = np.zeros(votes_per_example.shape[0]) - votes_per_example = np.insert(votes_per_example, i, new_col, axis=1) - - return votes_per_example - - def predict(self, X): - """ - Returns the predicted class labels for data X. - - Parameters - ---------- - X : array of shape [n_samples, n_features] - the transformed input data - - Returns - ------- - y_hat : ndarray of shape [n_samples] - predicted class label per example - - Raises - ------ - NotFittedError - When the model is not fitted. - """ - return self.classes[np.argmax(self.predict_proba(X), axis=1)] From 6e7e51ea723515036a5a7442e6ed632ec9b9e454 Mon Sep 17 00:00:00 2001 From: KevinWang905 <46271360+KevinWang905@users.noreply.github.com> Date: Mon, 13 Dec 2021 15:08:49 -0500 Subject: [PATCH 12/16] Delete transformers.py --- transformers.py | 196 ------------------------------------------------ 1 file changed, 196 deletions(-) delete mode 100644 transformers.py diff --git a/transformers.py b/transformers.py deleted file mode 100644 index c5c9177ef0..0000000000 --- a/transformers.py +++ /dev/null @@ -1,196 +0,0 @@ -""" -Main Author: Will LeVine -Corresponding Email: levinewill@icloud.com -""" -from tensorflow import keras -import numpy as np -from sklearn.tree import DecisionTreeClassifier -from sklearn.utils.validation import check_array, check_is_fitted, check_X_y - -from .base import BaseTransformer - - -class NeuralClassificationTransformer(BaseTransformer): - """ - A class used to transform data from a category to a specialized representation. - - Parameters - ---------- - network : object - A neural network used in the classification transformer. - - euclidean_layer_idx : int - An integer to represent the final layer of the transformer. - - optimizer : str or keras.optimizers instance - An optimizer used when compiling the neural network. - - loss : str, default="categorical_crossentropy" - A loss function used when compiling the neural network. - - pretrained : bool, default=False - A boolean used to identify if the network is pretrained. - - compile_kwargs : dict, default={"metrics": ["acc"]} - A dictionary containing metrics for judging network performance. - - fit_kwargs : dict, default={ - "epochs": 100, - "callbacks": [keras.callbacks.EarlyStopping(patience=5, monitor="val_acc")], - "verbose": False, - "validation_split": 0.33, - }, - A dictionary to hold epochs, callbacks, verbose, and validation split for the network. - - Attributes - ---------- - encoder_ : object - A Keras model with inputs and outputs based on the network attribute. - Output layers are determined by the euclidean_layer_idx parameter. - - fitted_ : boolean - A boolean flag initialized after the model is fitted. - """ - - def __init__( - self, - network, - euclidean_layer_idx, - optimizer, - loss="categorical_crossentropy", - pretrained=False, - compile_kwargs={"metrics": ["acc"]}, - fit_kwargs={ - "epochs": 100, - "callbacks": [keras.callbacks.EarlyStopping(patience=5, monitor="val_acc")], - "verbose": False, - "validation_split": 0.33, - }, - ): - self.network = keras.models.clone_model(network) - self.encoder_ = keras.models.Model( - inputs=self.network.inputs, - outputs=self.network.layers[euclidean_layer_idx].output, - ) - self.pretrained = pretrained - self.optimizer = optimizer - self.loss = loss - self.compile_kwargs = compile_kwargs - self.fit_kwargs = fit_kwargs - - def fit(self, X, y): - """ - Fits the transformer to data X with labels y. - - Parameters - ---------- - X : ndarray - Input data matrix. - y : ndarray - Output (i.e. response data matrix). - - Returns - ------- - self : NeuralClassificationTransformer - The object itself. - """ - check_X_y(X, y, ensure_2d=False, allow_nd=True) - _, y = np.unique(y, return_inverse=True) - - self.network.compile( - loss=self.loss, optimizer=self.optimizer, **self.compile_kwargs - ) - - self.network.fit(X, keras.utils.to_categorical(y), **self.fit_kwargs) - self.fitted_ = True - - return self - - - - def transform(self, X): - """ - Performs inference using the transformer. - - Parameters - ---------- - X : ndarray - Input data matrix. - - Returns - ------- - X_transformed : ndarray - The transformed input. - - Raises - ------ - NotFittedError - When the model is not fitted. - """ - check_array(X, ensure_2d=False, allow_nd=True) - check_is_fitted(self, attributes="fitted_") - return self.encoder_.predict(X) - - -class TreeClassificationTransformer(BaseTransformer): - """ - A class used to transform data from a category to a specialized representation. - - Parameters - ---------- - kwargs : dict, default={} - A dictionary to contain parameters of the tree. - - Attributes - ---------- - transformer : sklearn.tree.DecisionTreeClassifier - an internal sklearn DecisionTreeClassifier - """ - - def __init__(self, kwargs={}): - self.kwargs = kwargs - - - - def fit(self, X, y): - """ - Fits the transformer to data X with labels y. - - Parameters - ---------- - X : ndarray - Input data matrix. - y : ndarray - Output (i.e. response data matrix). - - Returns - ------- - self : TreeClassificationTransformer - The object itself. - """ - X, y = check_X_y(X, y) - self.transformer_ = DecisionTreeClassifier(**self.kwargs).fit(X, y) - return self - - def transform(self, X): - """ - Performs inference using the transformer. - - Parameters - ---------- - X : ndarray - Input data matrix. - - Returns - ------- - X_transformed : ndarray - The transformed input. - - Raises - ------ - NotFittedError - When the model is not fitted. - """ - X = check_array(X) - check_is_fitted(self) - return self.transformer_.apply(X) From c68723f4dc7328a84d00567125db8ec68fd7a877 Mon Sep 17 00:00:00 2001 From: KevinWang905 <46271360+KevinWang905@users.noreply.github.com> Date: Mon, 13 Dec 2021 15:08:58 -0500 Subject: [PATCH 13/16] Delete progressive_learner.py --- progressive_learner.py | 1023 ---------------------------------------- 1 file changed, 1023 deletions(-) delete mode 100644 progressive_learner.py diff --git a/progressive_learner.py b/progressive_learner.py deleted file mode 100644 index d093c8ac89..0000000000 --- a/progressive_learner.py +++ /dev/null @@ -1,1023 +0,0 @@ -""" -Main Author: Will LeVine -Corresponding Email: levinewill@icloud.com -""" -import numpy as np -from sklearn.exceptions import NotFittedError - -from .base import BaseClassificationProgressiveLearner, BaseProgressiveLearner - - -class ProgressiveLearner(BaseProgressiveLearner): - """ - A (mostly) internal class for progressive learning. Most users who desire to - utilize ProgLearn should use the classes defined in {network, forest}.py instead - of this class. - - Parameters - ---------- - default_transformer_class : BaseTransformer, default=None - The class of transformer to which the progressive learner defaults - if None is provided in any of the functions which add or set - transformers. - - default_transformer_kwargs : dict, default=None - A dictionary with keys of type string and values of type obj corresponding - to the given string kwarg. This determines to which type of transformer the - progressive learner defaults if None is provided in any of the functions - which add or set transformers. - - default_voter_class : BaseVoter, default=None - The class of voter to which the progressive learner defaults - if None is provided in any of the functions which add or set - voters. - - default_voter_kwargs : dict, default=None - A dictionary with keys of type string and values of type obj corresponding - to the given string kwarg. This determines to which type of voter the - progressive learner defaults if None is provided in any of the functions - which add or set voters. - - default_decider_class : BaseDecider, default=None - The class of decider to which the progressive learner defaults - if None is provided in any of the functions which add or set - deciders. - - default_decider_kwargs : dict, default=None - A dictionary with keys of type string and values of type obj corresponding - to the given string kwarg. This determines to which type of decider the - progressive learner defaults if None is provided in any of the functions - which add or set deciders. - - Attributes - ---------- - task_id_to_X : dict - A dictionary with keys of type obj corresponding to task ids - and values of type ndarray corresponding to the input data matrix X. - This dictionary thus maps input data matrix to the task where posteriors - are to be estimated. - - task_id_to_y : dict - A dictionary with keys of type obj corresponding to task ids - and values of type ndarray corresponding to output data matrix y. - This dictionary thus maps output data matrix to the task where posteriors - are to be estimated. - - transformer_id_to_X : dict - A dictionary with keys of type obj corresponding to transformer ids - and values of type ndarray corresponding to the output data matrix X. - This dictionary thus maps input data matrix to a particular transformer. - - transformer_id_to_y : dict - A dictionary with keys of type obj corresponding to transformer ids - and values of type ndarray corresponding to the output data matrix y. - This dictionary thus maps output data matrix to a particular transformer. - - transformer_id_to_transformers : dict - A dictionary with keys of type obj corresponding to transformer ids - and values of type obj corresponding to a transformer. This dictionary thus - maps transformer ids to the corresponding transformers. - - task_id_to_transformer_id_to_voters : dict - A nested dictionary with outer key of type obj, corresponding to task ids - inner key of type obj, corresponding to transformer ids, - and values of type obj, corresponding to a voter. This dictionary thus maps - voters to a corresponding transformer assigned to a particular task. - - task_id_to_decider : dict - A dictionary with keys of type obj, corresponding to task ids, - and values of type obj corresponding to a decider. This dictionary thus - maps deciders to a particular task. - - task_id_to_decider_class : dict - A dictionary with keys of type obj corresponding to task ids - and values of type obj corresponding to a decider class. This dictionary - thus maps decider classes to a particular task id. - - task_id_to_voter_class : dict - A dictionary with keys of type obj corresponding to task ids - and values of type obj corresponding to a voter class. This dictionary thus - maps voter classes to a particular task id. - - task_id_to_voter_kwargs : dict - A dictionary with keys of type obj corresponding to task ids - and values of type obj corresponding to a voter kwargs. This dictionary thus - maps voter kwargs to a particular task id. - - task_id_to_decider_kwargs : dict - A dictionary with keys of type obj corresponding to task ids - and values of type obj corresponding to a decider kwargs. This dictionary - thus maps decider kwargs to a particular task id. - - task_id_to_bag_id_to_voter_data_idx : dict - A nested dictionary with outer keys of type obj corresponding to task ids - inner keys of type obj corresponding to bag ids - and values of type obj corresponding to voter data indices. - This dictionary thus maps voter data indices to particular bags - for particular tasks. - - task_id_to_decider_idx : dict - A dictionary with keys of type obj corresponding to task ids - and values of type obj corresponding to decider indices. This dictionary - thus maps decider indices to particular tasks. - """ - - def __init__( - self, - default_transformer_class=None, - default_transformer_kwargs=None, - default_voter_class=None, - default_voter_kwargs=None, - default_decider_class=None, - default_decider_kwargs=None, - ): - - ( - self.task_id_to_X, - self.task_id_to_y, - self.transformer_id_to_X, - self.transformer_id_to_y, - ) = ({}, {}, {}, {}) - - self.transformer_id_to_transformers = {} - self.task_id_to_transformer_id_to_voters = {} - self.task_id_to_decider = {} - - self.task_id_to_decider_class = {} - self.task_id_to_decider_kwargs = {} - - self.task_id_to_voter_class = {} - self.task_id_to_voter_kwargs = {} - - self.task_id_to_bag_id_to_voter_data_idx = {} - self.task_id_to_decider_idx = {} - - self.default_transformer_class = default_transformer_class - self.default_transformer_kwargs = default_transformer_kwargs - - self.default_voter_class = default_voter_class - self.default_voter_kwargs = default_voter_kwargs - - self.default_decider_class = default_decider_class - self.default_decider_kwargs = default_decider_kwargs - - def get_transformer_ids(self): - return np.array(list(self.transformer_id_to_transformers.keys())) - - def get_task_ids(self): - return np.array(list(self.task_id_to_decider.keys())) - - def _append_transformer(self, transformer_id, transformer): - - - if transformer_id in self.get_transformer_ids(): - self.transformer_id_to_transformers[transformer_id].append(transformer) - else: - self.transformer_id_to_transformers[transformer_id] = [transformer] - - def _replace_transformer(self, transformer_id, transformer): - - self.transformer_id_to_transformers[transformer_id] = [transformer] - - def _append_voter(self, transformer_id, task_id, voter): - if task_id in list(self.task_id_to_transformer_id_to_voters.keys()): - if transformer_id in list( - self.task_id_to_transformer_id_to_voters[task_id].keys() - ): - self.task_id_to_transformer_id_to_voters[task_id][ - transformer_id - ].append(voter) - else: - self.task_id_to_transformer_id_to_voters[task_id][transformer_id] = [ - voter - ] - else: - self.task_id_to_transformer_id_to_voters[task_id] = { - transformer_id: [voter] - } - - def _append_voter_data_idx(self, task_id, bag_id, voter_data_idx): - - if task_id in list(self.task_id_to_bag_id_to_voter_data_idx.keys()): - - self.task_id_to_bag_id_to_voter_data_idx[task_id][bag_id] = voter_data_idx - else: - self.task_id_to_bag_id_to_voter_data_idx[task_id] = {bag_id: voter_data_idx} - - def _update_voter_data_idx(self, task_id, bag_id, voter_data_idx): - - if task_id in list(self.task_id_to_bag_id_to_voter_data_idx.keys()): - prev = self.task_id_to_bag_id_to_voter_data_idx[task_id][bag_id] - new = voter_data_idx - self.task_id_to_bag_id_to_voter_data_idx[task_id][bag_id] = np.append(prev, new) - else: - self.task_id_to_bag_id_to_voter_data_idx[task_id] = {bag_id: voter_data_idx} - - def _append_decider_idx(self, task_id, decider_idx): - self.task_id_to_decider_idx[task_id] = decider_idx - - def _bifurcate_decider_idxs(self, ra, transformer_voter_decider_split): - if transformer_voter_decider_split is None: - return ra, ra - else: - split = [ - np.sum(np.array(transformer_voter_decider_split)[:2]), - transformer_voter_decider_split[2], - ] - if np.sum(split) > 1: - return [ - np.random.choice(ra, int(len(ra) * p), replace=False) for p in split - ] - else: - first_idx = np.random.choice(ra, int(len(ra) * split[0]), replace=False) - second_idx = np.random.choice( - np.delete(ra, first_idx), int(len(ra) * split[1]), replace=False - ) - return first_idx, second_idx - - - def _update_transformer( - self, - X, - y, - transformer_data_proportion, - transformer_voter_data_idx, - transformer_id, - num_transformers, - transformer_class, - transformer_kwargs, - backward_task_ids, - inputclasses = None, - decider_kwargs = None - ): - - - if transformer_id not in list(self.task_id_to_X.keys()): - self.transformer_id_to_X[transformer_id] = X - if transformer_id not in list(self.task_id_to_y.keys()): - self.transformer_id_to_y[transformer_id] = y - - backward_task_ids = ( - backward_task_ids if backward_task_ids is not None else self.get_task_ids() - ) - - # for all transformers referring to specified task - counter = 0 - for transformer in self.transformer_id_to_transformers[transformer_id]: - - - # Check data and assign data for training - - if X is not None: - n = len(X) - elif y is not None: - n = len(y) - else: - n = None - if n is not None: - transformer_data_idx = np.random.choice( - transformer_voter_data_idx, - int(transformer_data_proportion * n), - replace=False, - ) - else: - transformer_data_idx = None - - X2 = ( - self.transformer_id_to_X[transformer_id] - if transformer_id in list(self.transformer_id_to_X.keys()) - else self.task_id_to_X[transformer_id] - ) - y2 = ( - self.transformer_id_to_y[transformer_id] - if transformer_id in list(self.transformer_id_to_y.keys()) - else self.task_id_to_y[transformer_id] - ) - - - - if transformer_data_idx is not None: - X2, y2 = X2[transformer_data_idx], y2[transformer_data_idx] - - transformer.transformer_.partial_fit(X2,y2, inputclasses) - - voter_data_idx = np.delete(transformer_voter_data_idx, transformer_data_idx) - - self._update_voter_data_idx( - task_id=transformer_id, - bag_id=counter, - voter_data_idx=voter_data_idx, - ) - counter = counter + 1 - - for existing_task_id in np.intersect1d(backward_task_ids, self.get_task_ids()): - self.set_voter(transformer_id=transformer_id, task_id=existing_task_id) - self.set_decider( - task_id=existing_task_id, - transformer_ids=list( - self.task_id_to_transformer_id_to_voters[existing_task_id].keys() - ), - ) - - return self - - - def _add_transformer( - self, - X, - y, - transformer_data_proportion, - transformer_voter_data_idx, - transformer_id, - num_transformers, - transformer_class, - transformer_kwargs, - backward_task_ids - ): - - - if transformer_id is None: - transformer_id = len(self.get_transformer_ids()) - - backward_task_ids = ( - backward_task_ids if backward_task_ids is not None else self.get_task_ids() - ) - - - transformer_voter_data_idx = ( - range(len(X)) - if transformer_voter_data_idx is None - else transformer_voter_data_idx - ) - - if transformer_id not in list(self.task_id_to_X.keys()): - self.transformer_id_to_X[transformer_id] = X - if transformer_id not in list(self.task_id_to_y.keys()): - self.transformer_id_to_y[transformer_id] = y - - # train new transformers - for transformer_num in range(num_transformers): - if X is not None: - n = len(X) - elif y is not None: - n = len(y) - else: - n = None - if n is not None: - transformer_data_idx = np.random.choice( - transformer_voter_data_idx, - int(transformer_data_proportion * n), - replace=False, - ) - else: - transformer_data_idx = None - self.set_transformer( - transformer_id=transformer_id, - transformer_data_idx=transformer_data_idx, - transformer_class=transformer_class, - transformer_kwargs=transformer_kwargs, - ) - voter_data_idx = np.delete(transformer_voter_data_idx, transformer_data_idx) - self._append_voter_data_idx( - task_id=transformer_id, - bag_id=transformer_num, - voter_data_idx=voter_data_idx, - ) - - # train voters and deciders from new transformer to previous tasks - for existing_task_id in np.intersect1d(backward_task_ids, self.get_task_ids()): - self.set_voter(transformer_id=transformer_id, task_id=existing_task_id) - self.set_decider( - task_id=existing_task_id, - transformer_ids=list( - self.task_id_to_transformer_id_to_voters[existing_task_id].keys() - ), - ) - - return self - - # make sure the below ganular functions work without add_{transformer, task} - def set_transformer( - self, - transformer_id=None, - transformer=None, - transformer_data_idx=None, - transformer_class=None, - transformer_kwargs=None, - ): - if transformer_id is None: - transformer_id = len(self.get_transformer_ids()) - - X = ( - self.transformer_id_to_X[transformer_id] - if transformer_id in list(self.transformer_id_to_X.keys()) - else self.task_id_to_X[transformer_id] - ) - y = ( - self.transformer_id_to_y[transformer_id] - if transformer_id in list(self.transformer_id_to_y.keys()) - else self.task_id_to_y[transformer_id] - ) - if transformer_data_idx is not None: - X, y = X[transformer_data_idx], y[transformer_data_idx] - - if X is None and y is None: - if transformer.is_fitted(): - self._append_transformer(transformer_id, transformer) - else: - raise ValueError( - "transformer_class is not fitted and X is None and y is None." - ) - return - - # Type check X - - if transformer_class is None: - if self.default_transformer_class is None: - raise ValueError( - "transformer_class is None and 'default_transformer_class' is None." - ) - else: - transformer_class = self.default_transformer_class - - if transformer_kwargs is None: - if self.default_transformer_kwargs is None: - raise ValueError( - """transformer_kwargs is None and - 'default_transformer_kwargs' is None.""" - ) - else: - transformer_kwargs = self.default_transformer_kwargs - - # Fit transformer and new voter - if y is None: - self._append_transformer( - transformer_id, transformer_class(**transformer_kwargs).fit(X) - ) - else: - # Type check y - self._append_transformer( - transformer_id, transformer_class(**transformer_kwargs).fit(X, y) - ) - - def set_voter( - self, - transformer_id, - task_id=None, - voter_class=None, - voter_kwargs=None, - bag_id=None, - ): - - - # Type check X - - # Type check y - - if task_id is None: - task_id = len(self.get_task_ids()) - - if voter_class is None: - if ( - task_id in list(self.task_id_to_voter_class.keys()) - and self.task_id_to_voter_class[task_id] is not None - ): - voter_class = self.task_id_to_voter_class[task_id] - elif self.default_voter_class is not None: - voter_class = self.default_voter_class - else: - raise ValueError( - """voter_class is None, the default voter class for the overall - learner is None, and the default voter class - for this transformer is None.""" - ) - - if voter_kwargs is None: - if ( - task_id in list(self.task_id_to_voter_kwargs.keys()) - and self.task_id_to_voter_kwargs[task_id] is not None - ): - voter_kwargs = self.task_id_to_voter_kwargs[task_id] - elif self.default_voter_kwargs is not None: - voter_kwargs = self.default_voter_kwargs - else: - raise ValueError( - """voter_kwargs is None, the default voter kwargs for the overall - learner is None, and the default voter kwargs - for this transformer is None.""" - ) - - X = self.task_id_to_X[task_id] - y = self.task_id_to_y[task_id] - if bag_id is None: - transformers = self.transformer_id_to_transformers[transformer_id] - else: - transformers = [self.transformer_id_to_transformers[transformer_id][bag_id]] - for transformer_num, transformer in enumerate(transformers): - if transformer_id == task_id: - voter_data_idx = self.task_id_to_bag_id_to_voter_data_idx[task_id][ - transformer_num - ] - else: - voter_data_idx = np.delete( - range(len(X)), self.task_id_to_decider_idx[task_id] - ) - self._append_voter( - transformer_id, - task_id, - voter_class(**voter_kwargs).fit( - transformer.transform(X[voter_data_idx]), y[voter_data_idx] - ), - ) - - self.task_id_to_voter_class[task_id] = voter_class - self.task_id_to_voter_kwargs[task_id] = voter_kwargs - - def set_decider( - self, task_id, transformer_ids, decider_class=None, decider_kwargs=None - ): - if decider_class is None: - if task_id in list(self.task_id_to_decider_class.keys()): - decider_class = self.task_id_to_decider_class[task_id] - elif self.default_decider_class is not None: - decider_class = self.default_decider_class - else: - raise ValueError( - "decider_class is None and 'default_decider_class' is None." - ) - if decider_kwargs is None: - if task_id in list(self.task_id_to_decider_kwargs.keys()): - decider_kwargs = self.task_id_to_decider_kwargs[task_id] - elif self.default_decider_kwargs is not None: - decider_kwargs = self.default_decider_kwargs - else: - raise ValueError( - "decider_kwargs is None and 'default_decider_kwargs' is None." - ) - - transformer_id_to_transformers = { - transformer_id: self.transformer_id_to_transformers[transformer_id] - for transformer_id in transformer_ids - } - transformer_id_to_voters = { - transformer_id: self.task_id_to_transformer_id_to_voters[task_id][ - transformer_id - ] - for transformer_id in transformer_ids - } - - X, y = self.task_id_to_X[task_id], self.task_id_to_y[task_id] - - self.task_id_to_decider[task_id] = decider_class(**decider_kwargs) - decider_idx = self.task_id_to_decider_idx[task_id] - - self.task_id_to_decider[task_id].fit( - X[decider_idx], - y[decider_idx], - transformer_id_to_transformers, - transformer_id_to_voters, - ) - - self.task_id_to_decider_class[task_id] = decider_class - self.task_id_to_decider_kwargs[task_id] = decider_kwargs - - def add_transformer( - self, - X, - y, - transformer_data_proportion=1.0, - transformer_voter_data_idx=None, - transformer_id=None, - num_transformers=1, - transformer_class=None, - transformer_kwargs=None, - backward_task_ids=None, - ): - """ - Adds a transformer to the progressive learner and trains the voters and - deciders from this new transformer to the specified backward_task_ids. - - Parameters - ---------- - X : ndarray - Input data matrix. - - y : ndarray - Output (response) data matrix. - - transformer_data_proportion : float, default=1.0 - The proportion of the data set aside to train the transformer. The - remainder of the data is used to train voters. This is used in the - case that you are using a bagging algorithm and want the various - components in that bagging ensemble to train on disjoint subsets of - the data. This parameter is mostly for internal use. - - transformer_voter_data_idx : ndarray, default=None - A 1d array of type int used to specify the aggregate indices of the input - data used to train the transformers and voters. This is used in the - case that X and/or y contain data that you do not want to use to train - transformers or voters (e.g. X and/or y contains decider training data - disjoint from the transformer/voter data). This parameter is mostly - for internal use. - - transformer_id : obj, default=None - The id corresponding to the transformer being added. - - num_transformers : int, default=1 - The number of transformers to add corresponding to the given inputs. - - transformer_class : BaseTransformer, default=None - The class of the transformer(s) being added. - - transformer_kwargs : dict, default=None - A dictionary with keys of type string and values of type obj corresponding - to the given string kwarg. This determines the kwargs of the transformer(s) - being added. - - backward_task_ids : ndarray, default=None - A 1d array of type obj used to specify to which existing task voters and deciders - will be trained from the transformer(s) being added. - - Returns - ------- - self : ProgressiveLearner - The object itself. - """ - return self._add_transformer( - X, - y, - transformer_data_proportion=transformer_data_proportion, - transformer_voter_data_idx=transformer_voter_data_idx, - transformer_id=transformer_id, - num_transformers=num_transformers, - transformer_class=transformer_class, - transformer_kwargs=transformer_kwargs, - backward_task_ids=backward_task_ids, - ) - - def add_task( - self, - X, - y, - task_id=None, - transformer_voter_decider_split=[0.67, 0.33, 0], - num_transformers=1, - transformer_class=None, - transformer_kwargs=None, - voter_class=None, - voter_kwargs=None, - decider_class=None, - decider_kwargs=None, - backward_task_ids=None, - forward_transformer_ids=None, - ): - - """ - Adds a task to the progressive learner. Optionally trains one or more - transformer from the input data (if num_transformers > 0), adds voters - and deciders from this/these new transformer(s) to the tasks specified - in backward_task_ids, and adds voters and deciders from the transformers - specified in forward_transformer_ids (and from the newly added transformer(s) - corresponding to the input task_id if num_transformers > 0) to the - new task_id. - - Parameters - ---------- - X : ndarray - Input data matrix. - - y : ndarray - Output (response) data matrix. - - task_id : obj, default=None - The id corresponding to the task being added. - - transformer_voter_decider_split : ndarray, default=[0.67, 0.33, 0] - A 1d array of length 3. The 0th index indicates the proportions of the input - data used to train the (optional) newly added transformer(s) corresponding to - the task_id provided in this function call. The 1st index indicates the proportion of - the data set aside to train the voter(s) from these (optional) newly added - transformer(s) to the task_id provided in this function call. For all other tasks, - the aggregate transformer and voter data pairs from those tasks are used to train - the voter(s) from these (optional) newly added transformer(s) to those tasks; - for all other transformers, the aggregate transformer and voter data provided in - this function call is used to train the voter(s) from those transformers to - the task_id provided in this function call. The 2nd index indicates the - proportion of the data set aside to train the decider - these indices are saved - internally and will be used to train all further deciders corresponding to this - task for all function calls. - - num_transformers : int, default=1 - The number of transformers to add corresponding to the given inputs. - - transformer_class : BaseTransformer, default=None - The class of the transformer(s) being added. - - transformer_kwargs : dict, default=None - A dictionary with keys of type string and values of type obj corresponding - to the given string kwarg. This determines the kwargs of the transformer(s) - being added. - - voter_class : BaseVoter, default=None - The class of the voter(s) being added. - - voter_kwargs : dict, default=None - A dictionary with keys of type string and values of type obj corresponding - to the given string kwarg. This determines the kwargs of the voter(s) - being added. - - decider_class : BaseDecider, default=None - The class of the decider(s) being added. - - decider_kwargs : dict, default=None - A dictionary with keys of type string and values of type obj corresponding - to the given string kwarg. This determines the kwargs of the decider(s) - being added. - - backward_task_ids : ndarray, default=None - A 1d array of type obj used to specify to which existing task voters and deciders - will be trained from the transformer(s) being added. - - foward_transformer_ids : ndarray, default=None - A 1d array of type obj used to specify from which existing transformer(s) voters and - deciders will be trained to the new task. If num_transformers > 0, the input task_id - corresponding to the task being added is automatically appended to this 1d array. - - Returns - ------- - self : ProgressiveLearner - The object itself. - """ - if task_id is None: - task_id = max( - len(self.get_transformer_ids()), len(self.get_task_ids()) - ) # come up with something that has fewer collisions - - self.task_id_to_X[task_id] = X - self.task_id_to_y[task_id] = y - - # split into transformer/voter and decider data - transformer_voter_data_idx, decider_idx = self._bifurcate_decider_idxs( - range(len(X)), transformer_voter_decider_split - ) - - self._append_decider_idx(task_id, decider_idx) - - # add new transformer and train voters and decider - # from new transformer to previous tasks - if num_transformers > 0: - - self._add_transformer( - X, - y, - transformer_data_proportion=transformer_voter_decider_split[0] - if transformer_voter_decider_split - else 1, - transformer_voter_data_idx=transformer_voter_data_idx, - transformer_id=task_id, - num_transformers=num_transformers, - transformer_class=transformer_class, - transformer_kwargs=transformer_kwargs, - backward_task_ids=backward_task_ids, - ) - - # train voters and decider from previous (and current) transformers to new task - for transformer_id in ( - forward_transformer_ids - if forward_transformer_ids - else self.get_transformer_ids() - ): - self.set_voter( - transformer_id=transformer_id, - task_id=task_id, - voter_class=voter_class, - voter_kwargs=voter_kwargs, - ) - - # train decider of new task - if forward_transformer_ids: - if num_transformers == 0: - transformer_ids = forward_transformer_ids - else: - transformer_ids = np.concatenate([forward_transformer_ids, task_id]) - else: - transformer_ids = self.get_transformer_ids() - - self.set_decider( - task_id=task_id, - transformer_ids=transformer_ids, - decider_class=decider_class, - decider_kwargs=decider_kwargs, - ) - - return self - - def update_task( - self, - X, - y, - inputclasses = None, - task_id=None, - transformer_voter_decider_split=[0.67, 0.33, 0], - num_transformers=1, - transformer_class=None, - transformer_kwargs=None, - voter_class=None, - voter_kwargs=None, - decider_class=None, - decider_kwargs=None, - backward_task_ids=None, - forward_transformer_ids=None, - ): - - """ - Adds a task to the progressive learner. Optionally trains one or more - transformer from the input data (if num_transformers > 0), adds voters - and deciders from this/these new transformer(s) to the tasks specified - in backward_task_ids, and adds voters and deciders from the transformers - specified in forward_transformer_ids (and from the newly added transformer(s) - corresponding to the input task_id if num_transformers > 0) to the - new task_id. - - Parameters - ---------- - X : ndarray - Input data matrix. - - y : ndarray - Output (response) data matrix. - - task_id : obj, default=None - The id corresponding to the task being added. - - transformer_voter_decider_split : ndarray, default=[0.67, 0.33, 0] - A 1d array of length 3. The 0th index indicates the proportions of the input - data used to train the (optional) newly added transformer(s) corresponding to - the task_id provided in this function call. The 1st index indicates the proportion of - the data set aside to train the voter(s) from these (optional) newly added - transformer(s) to the task_id provided in this function call. For all other tasks, - the aggregate transformer and voter data pairs from those tasks are used to train - the voter(s) from these (optional) newly added transformer(s) to those tasks; - for all other transformers, the aggregate transformer and voter data provided in - this function call is used to train the voter(s) from those transformers to - the task_id provided in this function call. The 2nd index indicates the - proportion of the data set aside to train the decider - these indices are saved - internally and will be used to train all further deciders corresponding to this - task for all function calls. - - num_transformers : int, default=1 - The number of transformers to add corresponding to the given inputs. - - transformer_class : BaseTransformer, default=None - The class of the transformer(s) being added. - - transformer_kwargs : dict, default=None - A dictionary with keys of type string and values of type obj corresponding - to the given string kwarg. This determines the kwargs of the transformer(s) - being added. - - voter_class : BaseVoter, default=None - The class of the voter(s) being added. - - voter_kwargs : dict, default=None - A dictionary with keys of type string and values of type obj corresponding - to the given string kwarg. This determines the kwargs of the voter(s) - being added. - - decider_class : BaseDecider, default=None - The class of the decider(s) being added. - - decider_kwargs : dict, default=None - A dictionary with keys of type string and values of type obj corresponding - to the given string kwarg. This determines the kwargs of the decider(s) - being added. - - backward_task_ids : ndarray, default=None - A 1d array of type obj used to specify to which existing task voters and deciders - will be trained from the transformer(s) being added. - - foward_transformer_ids : ndarray, default=None - A 1d array of type obj used to specify from which existing transformer(s) voters and - deciders will be trained to the new task. If num_transformers > 0, the input task_id - corresponding to the task being added is automatically appended to this 1d array. - - Returns - ------- - self : ProgressiveLearner - The object itself. - """ - - if task_id is None: - print("Error: No Task ID inputted") - return self - # come up with something that has fewer collision - self.task_id_to_transformer_id_to_voters[task_id] = {} - - self.task_id_to_X[task_id] = np.concatenate((self.task_id_to_X[task_id],X),axis=0) - self.task_id_to_y[task_id] = np.concatenate((self.task_id_to_y[task_id],y),axis=0) - - # split into transformer/voter and decider data - - transformer_voter_data_idx, decider_idx = self._bifurcate_decider_idxs( - range(len(X)), transformer_voter_decider_split - ) - self._append_decider_idx(task_id, decider_idx) - - # add new transformer and train voters and decider - # from new transformer to previous tasks - if num_transformers > 0: - self._update_transformer( - X, - y, - inputclasses = inputclasses, - transformer_data_proportion=transformer_voter_decider_split[0] - if transformer_voter_decider_split - else 1, - transformer_voter_data_idx=transformer_voter_data_idx, - transformer_id=task_id, - num_transformers=num_transformers, - transformer_class=transformer_class, - transformer_kwargs=transformer_kwargs, - backward_task_ids=backward_task_ids, - decider_kwargs = decider_kwargs - ) - - self.set_voter( - transformer_id=0, - task_id=task_id, - voter_class=voter_class, - voter_kwargs=voter_kwargs, - ) - - return self - - - def predict(self, X, task_id, transformer_ids=None): - """ - predicts labels under task_id for each example in input data X - using the given transformer_ids. - - Parameters - ---------- - X : ndarray - The input data matrix. - - task_id : obj - The id corresponding to the task being mapped to. - - transformer_ids : list, default=None - The list of transformer_ids through which a user would like - to send X (which will be pipelined with their corresponding - voters) to make an inference prediction. - - Returns - ------- - y_hat : ndarray of shape [n_samples] - predicted class label per example - """ - - if self.task_id_to_decider == {}: - raise NotFittedError - - decider = self.task_id_to_decider[task_id] - - return decider.predict(X, transformer_ids=transformer_ids) - - -class ClassificationProgressiveLearner( - ProgressiveLearner, BaseClassificationProgressiveLearner -): - """ - A (mostly) internal class for progressive learning in the classification - setting. Most users who desire to utilize ProgLearn should use the classes - defined in {network, forest}.py instead of this class. - """ - - def predict_proba(self, X, task_id, transformer_ids=None): - """ - predicts posteriors under task_id for each example in input data X - using the given transformer_ids. - - Parameters - ---------- - X : ndarray - The input data matrix. - - task_id : obj - The id corresponding to the task being mapped to. - - transformer_ids : list, default=None - The list of transformer_ids through which a user would like - to send X (which will be pipelined with their corresponding - voters) to estimate posteriors. - - Returns - ------- - y_proba_hat : ndarray of shape [n_samples, n_classes] - posteriors per example - """ - if self.task_id_to_decider == {}: - raise NotFittedError - - decider = self.task_id_to_decider[task_id] - return decider.predict_proba(X, transformer_ids=transformer_ids) From 2f4dd6784cf893d7e8996fd399ddbc83e0ebef21 Mon Sep 17 00:00:00 2001 From: KevinWang905 <46271360+KevinWang905@users.noreply.github.com> Date: Mon, 13 Dec 2021 15:09:06 -0500 Subject: [PATCH 14/16] Delete deciders.py --- deciders.py | 173 ---------------------------------------------------- 1 file changed, 173 deletions(-) delete mode 100644 deciders.py diff --git a/deciders.py b/deciders.py deleted file mode 100644 index 3d5c3412bb..0000000000 --- a/deciders.py +++ /dev/null @@ -1,173 +0,0 @@ -""" -Main Author: Will LeVine -Corresponding Email: levinewill@icloud.com -""" -import numpy as np - -from .base import BaseClassificationDecider - -from sklearn.utils.validation import ( - check_X_y, - check_array, - check_is_fitted, -) - - -class SimpleArgmaxAverage(BaseClassificationDecider): - """ - A class for a decider that uses the average vote for classification. - - Parameters - ---------- - classes : list, default=[] - List of final output classification labels of type obj. - - Attributes - ---------- - transformer_id_to_transformers_ : dict - A dictionary with keys of type obj corresponding to transformer ids - and values of type obj corresponding to a transformer. This dictionary - maps transformers to a particular transformer id. - - transformer_id_to_voters_ : dict - A dictionary with keys of type obj corresponding to transformer ids - and values of type obj corresponding to a voter class. This dictionary - maps voter classes to a particular transformer id. - """ - - def __init__(self, classes=[]): - self.classes = classes - - def fit( - self, - X, - y, - transformer_id_to_transformers, - transformer_id_to_voters, - ): - """ - Function for fitting. - Stores attributes (classes, transformer_id_to_transformers, - and transformer_id_to_voters) of a ClassificationDecider. - - Parameters: - ----------- - X : ndarray - Input data matrix. - - y : ndarray - Output (i.e. response) data matrix. - - transformer_id_to_transformers : dict - A dictionary with keys of type obj corresponding to transformer ids - and values of type obj corresponding to a transformer. This dictionary - maps transformers to a particular transformer id. - - transformer_id_to_voters : dict - A dictionary with keys of type obj corresponding to transformer ids - and values of type obj corresponding to a voter class. This dictionary thus - maps voter classes to a particular transformer id. - - Returns - ------- - self : SimpleArgmaxAverage - The object itself. - - Raises - ------- - ValueError - When the labels have not been provided and the classes are empty. - """ - if not isinstance(self.classes, (list, np.ndarray)): - if len(y) == 0: - raise ValueError( - "Classification Decider classes undefined with no class labels fed to fit" - ) - else: - self.classes = np.unique(y) - else: - self.classes = np.array(self.classes) - self.transformer_id_to_transformers_ = transformer_id_to_transformers - self.transformer_id_to_voters_ = transformer_id_to_voters - return self - - def predict_proba(self, X, transformer_ids=None): - """ - Predicts posterior probabilities per input example. - - Loops through each transformer and bag of transformers. - Performs a transformation of the input data with the transformer. - Gets a voter to map the transformed input data into a posterior distribution. - Gets the mean vote per bagging component and append it to a vote per transformer id. - Returns the aggregate average vote. - - Parameters - ---------- - X : ndarray - Input data matrix. - - transformer_ids : list, default=None - A list with specific transformer ids that will be used for inference. Defaults - to using all transformers if no transformer ids are given. - - Returns - ------- - y_proba_hat : ndarray of shape [n_samples, n_classes] - posteriors per example - - - Raises - ------ - NotFittedError - When the model is not fitted. - """ - check_is_fitted(self) - vote_per_transformer_id = [] - for transformer_id in ( - transformer_ids - if transformer_ids is not None - else self.transformer_id_to_voters_.keys() - ): - check_is_fitted(self) - vote_per_bag_id = [] - for bag_id in range( - len(self.transformer_id_to_transformers_[transformer_id]) - ): - transformer = self.transformer_id_to_transformers_[transformer_id][ - bag_id - ] - X_transformed = transformer.transform(X) - voter = self.transformer_id_to_voters_[transformer_id][bag_id] - vote = voter.predict_proba(X_transformed) - vote_per_bag_id.append(vote) - vote_per_transformer_id.append(np.mean(vote_per_bag_id, axis=0)) - return np.mean(vote_per_transformer_id, axis=0) - - def predict(self, X, transformer_ids=None): - """ - Predicts the most likely class per input example. - - Uses the predict_proba method to get the mean vote per id. - Returns the class with the highest vote. - - Parameters - ---------- - X : ndarray - Input data matrix. - - transformer_ids : list, default=None - A list with all transformer ids. Defaults to None if no transformer ids - are given. - - Returns - ------- - y_hat : ndarray of shape [n_samples] - predicted class label per example - - Raises - ------ - NotFittedError - When the model is not fitted. - """ - vote_overall = self.predict_proba(X, transformer_ids=transformer_ids) - return self.classes[np.argmax(vote_overall, axis=1)] From 3559ea8d5e60d55e91801e1890ca0066e5341526 Mon Sep 17 00:00:00 2001 From: KevinWang905 <46271360+KevinWang905@users.noreply.github.com> Date: Mon, 13 Dec 2021 15:09:39 -0500 Subject: [PATCH 15/16] Fixed voter and transformer updating --- proglearn/deciders.py | 7 - proglearn/progressive_learner.py | 255 +++++++------------------------ proglearn/transformers.py | 1 - proglearn/voters.py | 2 + 4 files changed, 58 insertions(+), 207 deletions(-) diff --git a/proglearn/deciders.py b/proglearn/deciders.py index 0945256236..3d5c3412bb 100755 --- a/proglearn/deciders.py +++ b/proglearn/deciders.py @@ -37,8 +37,6 @@ class SimpleArgmaxAverage(BaseClassificationDecider): def __init__(self, classes=[]): self.classes = classes - print("initialized decider") - print(self.classes) def fit( self, @@ -80,9 +78,6 @@ def fit( ValueError When the labels have not been provided and the classes are empty. """ - print("deciders.fit") - print(y) - print(self.classes) if not isinstance(self.classes, (list, np.ndarray)): if len(y) == 0: raise ValueError( @@ -175,6 +170,4 @@ def predict(self, X, transformer_ids=None): When the model is not fitted. """ vote_overall = self.predict_proba(X, transformer_ids=transformer_ids) - print(vote_overall) - print(self.classes) return self.classes[np.argmax(vote_overall, axis=1)] diff --git a/proglearn/progressive_learner.py b/proglearn/progressive_learner.py index f58b28a255..d093c8ac89 100755 --- a/proglearn/progressive_learner.py +++ b/proglearn/progressive_learner.py @@ -78,7 +78,7 @@ class ProgressiveLearner(BaseProgressiveLearner): and values of type obj corresponding to a transformer. This dictionary thus maps transformer ids to the corresponding transformers. - task_id_to_trasnformer_id_to_voters : dict + task_id_to_transformer_id_to_voters : dict A nested dictionary with outer key of type obj, corresponding to task ids inner key of type obj, corresponding to transformer ids, and values of type obj, corresponding to a voter. This dictionary thus maps @@ -168,7 +168,7 @@ def get_task_ids(self): return np.array(list(self.task_id_to_decider.keys())) def _append_transformer(self, transformer_id, transformer): - #print([transformer]) + if transformer_id in self.get_transformer_ids(): self.transformer_id_to_transformers[transformer_id].append(transformer) @@ -176,7 +176,7 @@ def _append_transformer(self, transformer_id, transformer): self.transformer_id_to_transformers[transformer_id] = [transformer] def _replace_transformer(self, transformer_id, transformer): - #print([transformer]) + self.transformer_id_to_transformers[transformer_id] = [transformer] def _append_voter(self, transformer_id, task_id, voter): @@ -197,10 +197,21 @@ def _append_voter(self, transformer_id, task_id, voter): } def _append_voter_data_idx(self, task_id, bag_id, voter_data_idx): + if task_id in list(self.task_id_to_bag_id_to_voter_data_idx.keys()): + self.task_id_to_bag_id_to_voter_data_idx[task_id][bag_id] = voter_data_idx else: self.task_id_to_bag_id_to_voter_data_idx[task_id] = {bag_id: voter_data_idx} + + def _update_voter_data_idx(self, task_id, bag_id, voter_data_idx): + + if task_id in list(self.task_id_to_bag_id_to_voter_data_idx.keys()): + prev = self.task_id_to_bag_id_to_voter_data_idx[task_id][bag_id] + new = voter_data_idx + self.task_id_to_bag_id_to_voter_data_idx[task_id][bag_id] = np.append(prev, new) + else: + self.task_id_to_bag_id_to_voter_data_idx[task_id] = {bag_id: voter_data_idx} def _append_decider_idx(self, task_id, decider_idx): self.task_id_to_decider_idx[task_id] = decider_idx @@ -236,17 +247,20 @@ def _update_transformer( transformer_class, transformer_kwargs, backward_task_ids, - inputclasses = None + inputclasses = None, + decider_kwargs = None ): - print("transformer id at _update_transformer = " + str(transformer_id)) - print("testtesttest") - print('backward_task_ids: '+str(backward_task_ids)) + if transformer_id not in list(self.task_id_to_X.keys()): self.transformer_id_to_X[transformer_id] = X if transformer_id not in list(self.task_id_to_y.keys()): self.transformer_id_to_y[transformer_id] = y + backward_task_ids = ( + backward_task_ids if backward_task_ids is not None else self.get_task_ids() + ) + # for all transformers referring to specified task counter = 0 for transformer in self.transformer_id_to_transformers[transformer_id]: @@ -285,114 +299,28 @@ def _update_transformer( if transformer_data_idx is not None: X2, y2 = X2[transformer_data_idx], y2[transformer_data_idx] - # replace transformer with updated tranformer transformer.transformer_.partial_fit(X2,y2, inputclasses) - # update voter to new transformer + voter_data_idx = np.delete(transformer_voter_data_idx, transformer_data_idx) - # voter_data_idx = np.delete(transformer_voter_data_idx, transformer_data_idx) - # self._append_voter_data_idx( - # task_id=transformer_id, - # bag_id=counter, - # voter_data_idx=voter_data_idx, - #) + self._update_voter_data_idx( + task_id=transformer_id, + bag_id=counter, + voter_data_idx=voter_data_idx, + ) counter = counter + 1 - - - # # Update backwards - # backward_task_ids = ( - # backward_task_ids if backward_task_ids is not None else self.get_task_ids() - # ) - # print('backward_task_ids: '+str(backward_task_ids)) - # transformer_voter_data_idx = ( - # range(len(X)) - # if transformer_voter_data_idx is None - # else transformer_voter_data_idx - # ) - - # #train voters and deciders from new transformer to previous tasks - # for existing_task_id in np.intersect1d(backward_task_ids, self.get_task_ids()): - # self.set_voter(transformer_id=transformer_id, task_id=existing_task_id) - # print("backpropagation") - # print(list( - # self.task_id_to_transformer_id_to_voters[existing_task_id].keys())) - - # self.set_decider( - # task_id=existing_task_id, - # transformer_ids=list( - # self.task_id_to_transformer_id_to_voters[existing_task_id].keys() - # ), - # ) + + for existing_task_id in np.intersect1d(backward_task_ids, self.get_task_ids()): + self.set_voter(transformer_id=transformer_id, task_id=existing_task_id) + self.set_decider( + task_id=existing_task_id, + transformer_ids=list( + self.task_id_to_transformer_id_to_voters[existing_task_id].keys() + ), + ) return self - def set_updated_transformer( - self, - transformer_id=None, - transformer=None, - transformer_data_idx=None, - transformer_class=None, - transformer_kwargs=None, - inputclasses = None - ): - - if transformer_id is None: - transformer_id = len(self.get_transformer_ids()) - - - X = ( - self.transformer_id_to_X[transformer_id] - if transformer_id in list(self.transformer_id_to_X.keys()) - else self.task_id_to_X[transformer_id] - ) - y = ( - self.transformer_id_to_y[transformer_id] - if transformer_id in list(self.transformer_id_to_y.keys()) - else self.task_id_to_y[transformer_id] - ) - if transformer_data_idx is not None: - X, y = X[transformer_data_idx], y[transformer_data_idx] - - if X is None and y is None: - if transformer.is_fitted(): - self._append_transformer(transformer_id, transformer) - else: - raise ValueError( - "transformer_class is not fitted and X is None and y is None." - ) - return - - # Type check X - - if transformer_class is None: - if self.default_transformer_class is None: - raise ValueError( - "transformer_class is None and 'default_transformer_class' is None." - ) - else: - transformer_class = self.default_transformer_class - - if transformer_kwargs is None: - if self.default_transformer_kwargs is None: - raise ValueError( - """transformer_kwargs is None and - 'default_transformer_kwargs' is None.""" - ) - else: - transformer_kwargs = self.default_transformer_kwargs - - # Update transformer - if y is None: - self._replace_transformer( - transformer_id, transformer_class(**transformer_kwargs)._partial_fit(X, inputclasses = inputclasses) - ) - else: - # Type check y - print(transformer_id) - self._replace_transformer( - transformer_id, transformer_class(**transformer_kwargs)._partial_fit(X, y, inputclasses = inputclasses) - #transformer_id, transformer_class(**transformer_kwargs).fit(X, y) - ) def _add_transformer( self, @@ -407,7 +335,6 @@ def _add_transformer( backward_task_ids ): - print('in _add_transformer \n') if transformer_id is None: transformer_id = len(self.get_transformer_ids()) @@ -416,7 +343,6 @@ def _add_transformer( backward_task_ids if backward_task_ids is not None else self.get_task_ids() ) - print('backward_task_ids: '+str(backward_task_ids)) transformer_voter_data_idx = ( range(len(X)) @@ -459,14 +385,7 @@ def _add_transformer( ) # train voters and deciders from new transformer to previous tasks - - print(np.intersect1d(backward_task_ids, self.get_task_ids())) for existing_task_id in np.intersect1d(backward_task_ids, self.get_task_ids()): - - print("backpropagation") - print(list( - self.task_id_to_transformer_id_to_voters[existing_task_id].keys())) - self.set_voter(transformer_id=transformer_id, task_id=existing_task_id) self.set_decider( task_id=existing_task_id, @@ -486,7 +405,6 @@ def set_transformer( transformer_class=None, transformer_kwargs=None, ): - if transformer_id is None: transformer_id = len(self.get_transformer_ids()) @@ -551,6 +469,7 @@ def set_voter( bag_id=None, ): + # Type check X # Type check y @@ -593,11 +512,8 @@ def set_voter( if bag_id is None: transformers = self.transformer_id_to_transformers[transformer_id] else: - transformers = [self.transformer_id_to_transformers[transformer_id][bag_id]] - - print("trasnformsers length = " + str(len(transformers))) + transformers = [self.transformer_id_to_transformers[transformer_id][bag_id]] for transformer_num, transformer in enumerate(transformers): - #print(transformer_num) if transformer_id == task_id: voter_data_idx = self.task_id_to_bag_id_to_voter_data_idx[task_id][ transformer_num @@ -620,8 +536,6 @@ def set_voter( def set_decider( self, task_id, transformer_ids, decider_class=None, decider_kwargs=None ): - - print("decider kwargs at start of set_decider: "+str(decider_kwargs)) if decider_class is None: if task_id in list(self.task_id_to_decider_class.keys()): decider_class = self.task_id_to_decider_class[task_id] @@ -653,14 +567,9 @@ def set_decider( } X, y = self.task_id_to_X[task_id], self.task_id_to_y[task_id] - print("decider kwargs at middle of set_decider: "+str(decider_kwargs)) - print("decider_class(decider kwargs) at middle of set_decider: "+str(decider_class(**decider_kwargs))) self.task_id_to_decider[task_id] = decider_class(**decider_kwargs) decider_idx = self.task_id_to_decider_idx[task_id] - - print("task_to_id_to_decider[task_id] at middle of set_decider: "+str(self.task_id_to_decider[task_id])) - print() self.task_id_to_decider[task_id].fit( X[decider_idx], @@ -672,32 +581,6 @@ def set_decider( self.task_id_to_decider_class[task_id] = decider_class self.task_id_to_decider_kwargs[task_id] = decider_kwargs - def update_transformer( - self, - X, - y, - inputclasses = None, - transformer_data_proportion=1.0, - transformer_voter_data_idx=None, - transformer_id=None, - num_transformers=1, - transformer_class=None, - transformer_kwargs=None, - backward_task_ids=None, - ): - - return self._update_transformer( - X, - y, - inputclasses = inputclasses, - transformer_data_proportion=transformer_data_proportion, - transformer_voter_data_idx=transformer_voter_data_idx, - transformer_id=transformer_id, - num_transformers=num_transformers, - transformer_class=transformer_class, - transformer_kwargs=transformer_kwargs, - backward_task_ids=backward_task_ids, - ) def add_transformer( self, X, @@ -788,8 +671,7 @@ def add_task( backward_task_ids=None, forward_transformer_ids=None, ): - print("decider kwards at start of add_task: "+str(decider_kwargs)) - print("num_transformers at add_task = "+str(num_transformers)) + """ Adds a task to the progressive learner. Optionally trains one or more transformer from the input data (if num_transformers > 0), adds voters @@ -878,12 +760,13 @@ def add_task( transformer_voter_data_idx, decider_idx = self._bifurcate_decider_idxs( range(len(X)), transformer_voter_decider_split ) + self._append_decider_idx(task_id, decider_idx) # add new transformer and train voters and decider # from new transformer to previous tasks if num_transformers > 0: - print("num_transformers = "+str(num_transformers)) + self._add_transformer( X, y, @@ -919,6 +802,7 @@ def add_task( transformer_ids = np.concatenate([forward_transformer_ids, task_id]) else: transformer_ids = self.get_transformer_ids() + self.set_decider( task_id=task_id, transformer_ids=transformer_ids, @@ -945,7 +829,7 @@ def update_task( backward_task_ids=None, forward_transformer_ids=None, ): - print("decider kwargs at start of update_task: "+str(decider_kwargs)) + """ Adds a task to the progressive learner. Optionally trains one or more transformer from the input data (if num_transformers > 0), adds voters @@ -1026,12 +910,14 @@ def update_task( if task_id is None: print("Error: No Task ID inputted") return self - # come up with something that has fewer collisions - - self.task_id_to_X[task_id] = X - self.task_id_to_y[task_id] = y + # come up with something that has fewer collision + self.task_id_to_transformer_id_to_voters[task_id] = {} + + self.task_id_to_X[task_id] = np.concatenate((self.task_id_to_X[task_id],X),axis=0) + self.task_id_to_y[task_id] = np.concatenate((self.task_id_to_y[task_id],y),axis=0) # split into transformer/voter and decider data + transformer_voter_data_idx, decider_idx = self._bifurcate_decider_idxs( range(len(X)), transformer_voter_decider_split ) @@ -1039,11 +925,6 @@ def update_task( # add new transformer and train voters and decider # from new transformer to previous tasks - print("length task ids: "+str(len(self.get_task_ids()))) - print("task_id: "+str(task_id)) - - print("updating transformer") - print("num_transformers = "+str(num_transformers)) if num_transformers > 0: self._update_transformer( X, @@ -1058,39 +939,15 @@ def update_task( transformer_class=transformer_class, transformer_kwargs=transformer_kwargs, backward_task_ids=backward_task_ids, + decider_kwargs = decider_kwargs ) - # The following lines are commented out, needs to be tested if updating deciders and voters after updating task - # improves accuracy - - - # train voters and decider from previous (and current) transformers to new task - # for transformer_id in ( - # forward_transformer_ids - # if forward_transformer_ids - # else self.get_transformer_ids() - # ): - # self.set_voter( - # transformer_id=transformer_id, - # task_id=task_id, - # voter_class=voter_class, - # voter_kwargs=voter_kwargs, - # ) - - # # train decider of new task - # if forward_transformer_ids: - # if num_transformers == 0: - # transformer_ids = forward_transformer_ids - # else: - # transformer_ids = np.concatenate([forward_transformer_ids, task_id]) - # else: - # transformer_ids = self.get_transformer_ids() - # self.set_decider( - # task_id=task_id, - # transformer_ids=transformer_ids, - # decider_class=decider_class, - # decider_kwargs=decider_kwargs, - # ) + self.set_voter( + transformer_id=0, + task_id=task_id, + voter_class=voter_class, + voter_kwargs=voter_kwargs, + ) return self diff --git a/proglearn/transformers.py b/proglearn/transformers.py index 2b86ac8261..c5c9177ef0 100644 --- a/proglearn/transformers.py +++ b/proglearn/transformers.py @@ -170,7 +170,6 @@ def fit(self, X, y): """ X, y = check_X_y(X, y) self.transformer_ = DecisionTreeClassifier(**self.kwargs).fit(X, y) - #print(self.transformer_) return self def transform(self, X): diff --git a/proglearn/voters.py b/proglearn/voters.py index 838f4bfdf6..4381d6aa25 100755 --- a/proglearn/voters.py +++ b/proglearn/voters.py @@ -84,6 +84,8 @@ def fit(self, X, y): ) self.leaf_to_posterior_[leaf_id] = posteriors + + return self def predict_proba(self, X): From 38f8976c3d44c34e6756c1aeb205c18ca774c6f0 Mon Sep 17 00:00:00 2001 From: KevinWang905 <46271360+KevinWang905@users.noreply.github.com> Date: Fri, 17 Dec 2021 15:12:21 -0500 Subject: [PATCH 16/16] Formatted with Black --- proglearn/forest.py | 12 +++--- proglearn/progressive_learner.py | 73 +++++++++++++++----------------- proglearn/transformers.py | 4 -- proglearn/voters.py | 2 - 4 files changed, 41 insertions(+), 50 deletions(-) diff --git a/proglearn/forest.py b/proglearn/forest.py index 00eea3fd5e..7bda7ba903 100644 --- a/proglearn/forest.py +++ b/proglearn/forest.py @@ -289,6 +289,7 @@ def add_transformer( transformer_id=transformer_id, num_transformers=n_estimators, ) + def update_task( self, X, @@ -298,7 +299,7 @@ def update_task( tree_construction_proportion="default", kappa="default", max_depth="default", - inputclasses = None + inputclasses=None, ): """ updates a task with id task_id, max tree depth max_depth, given input data matrix X @@ -350,12 +351,12 @@ def update_task( X, y = check_X_y(X, y) - print("unique y values in update_task: "+str(np.unique(y))) + print("unique y values in update_task: " + str(np.unique(y))) return super().update_task( X, y, - inputclasses = inputclasses, + inputclasses=inputclasses, task_id=task_id, transformer_voter_decider_split=[ tree_construction_proportion, @@ -375,7 +376,7 @@ def update_transformer( self, X, y, - inputclasses = None, + inputclasses=None, transformer_id=None, n_estimators="default", max_depth="default", @@ -392,11 +393,12 @@ def update_transformer( return super().update_transformer( X, y, - inputclasses = inputclasses, + inputclasses=inputclasses, transformer_kwargs={"kwargs": {"max_depth": max_depth}}, transformer_id=transformer_id, num_transformers=n_estimators, ) + def predict_proba(self, X, task_id): """ estimates class posteriors under task_id for each example in input data X. diff --git a/proglearn/progressive_learner.py b/proglearn/progressive_learner.py index d093c8ac89..d8fb13ea82 100755 --- a/proglearn/progressive_learner.py +++ b/proglearn/progressive_learner.py @@ -169,7 +169,6 @@ def get_task_ids(self): def _append_transformer(self, transformer_id, transformer): - if transformer_id in self.get_transformer_ids(): self.transformer_id_to_transformers[transformer_id].append(transformer) else: @@ -203,13 +202,15 @@ def _append_voter_data_idx(self, task_id, bag_id, voter_data_idx): self.task_id_to_bag_id_to_voter_data_idx[task_id][bag_id] = voter_data_idx else: self.task_id_to_bag_id_to_voter_data_idx[task_id] = {bag_id: voter_data_idx} - + def _update_voter_data_idx(self, task_id, bag_id, voter_data_idx): if task_id in list(self.task_id_to_bag_id_to_voter_data_idx.keys()): prev = self.task_id_to_bag_id_to_voter_data_idx[task_id][bag_id] new = voter_data_idx - self.task_id_to_bag_id_to_voter_data_idx[task_id][bag_id] = np.append(prev, new) + self.task_id_to_bag_id_to_voter_data_idx[task_id][bag_id] = np.append( + prev, new + ) else: self.task_id_to_bag_id_to_voter_data_idx[task_id] = {bag_id: voter_data_idx} @@ -235,7 +236,6 @@ def _bifurcate_decider_idxs(self, ra, transformer_voter_decider_split): ) return first_idx, second_idx - def _update_transformer( self, X, @@ -247,11 +247,10 @@ def _update_transformer( transformer_class, transformer_kwargs, backward_task_ids, - inputclasses = None, - decider_kwargs = None + inputclasses=None, + decider_kwargs=None, ): - if transformer_id not in list(self.task_id_to_X.keys()): self.transformer_id_to_X[transformer_id] = X if transformer_id not in list(self.task_id_to_y.keys()): @@ -259,13 +258,12 @@ def _update_transformer( backward_task_ids = ( backward_task_ids if backward_task_ids is not None else self.get_task_ids() - ) + ) # for all transformers referring to specified task counter = 0 for transformer in self.transformer_id_to_transformers[transformer_id]: - # Check data and assign data for training if X is not None: @@ -282,11 +280,11 @@ def _update_transformer( ) else: transformer_data_idx = None - + X2 = ( - self.transformer_id_to_X[transformer_id] - if transformer_id in list(self.transformer_id_to_X.keys()) - else self.task_id_to_X[transformer_id] + self.transformer_id_to_X[transformer_id] + if transformer_id in list(self.transformer_id_to_X.keys()) + else self.task_id_to_X[transformer_id] ) y2 = ( self.transformer_id_to_y[transformer_id] @@ -294,12 +292,10 @@ def _update_transformer( else self.task_id_to_y[transformer_id] ) - - if transformer_data_idx is not None: X2, y2 = X2[transformer_data_idx], y2[transformer_data_idx] - transformer.transformer_.partial_fit(X2,y2, inputclasses) + transformer.transformer_.partial_fit(X2, y2, inputclasses) voter_data_idx = np.delete(transformer_voter_data_idx, transformer_data_idx) @@ -309,7 +305,7 @@ def _update_transformer( voter_data_idx=voter_data_idx, ) counter = counter + 1 - + for existing_task_id in np.intersect1d(backward_task_ids, self.get_task_ids()): self.set_voter(transformer_id=transformer_id, task_id=existing_task_id) self.set_decider( @@ -320,7 +316,6 @@ def _update_transformer( ) return self - def _add_transformer( self, @@ -332,10 +327,9 @@ def _add_transformer( num_transformers, transformer_class, transformer_kwargs, - backward_task_ids + backward_task_ids, ): - if transformer_id is None: transformer_id = len(self.get_transformer_ids()) @@ -343,7 +337,6 @@ def _add_transformer( backward_task_ids if backward_task_ids is not None else self.get_task_ids() ) - transformer_voter_data_idx = ( range(len(X)) if transformer_voter_data_idx is None @@ -469,7 +462,6 @@ def set_voter( bag_id=None, ): - # Type check X # Type check y @@ -512,7 +504,7 @@ def set_voter( if bag_id is None: transformers = self.transformer_id_to_transformers[transformer_id] else: - transformers = [self.transformer_id_to_transformers[transformer_id][bag_id]] + transformers = [self.transformer_id_to_transformers[transformer_id][bag_id]] for transformer_num, transformer in enumerate(transformers): if transformer_id == task_id: voter_data_idx = self.task_id_to_bag_id_to_voter_data_idx[task_id][ @@ -816,7 +808,7 @@ def update_task( self, X, y, - inputclasses = None, + inputclasses=None, task_id=None, transformer_voter_decider_split=[0.67, 0.33, 0], num_transformers=1, @@ -910,14 +902,18 @@ def update_task( if task_id is None: print("Error: No Task ID inputted") return self - # come up with something that has fewer collision + # come up with something that has fewer collision self.task_id_to_transformer_id_to_voters[task_id] = {} - - self.task_id_to_X[task_id] = np.concatenate((self.task_id_to_X[task_id],X),axis=0) - self.task_id_to_y[task_id] = np.concatenate((self.task_id_to_y[task_id],y),axis=0) + + self.task_id_to_X[task_id] = np.concatenate( + (self.task_id_to_X[task_id], X), axis=0 + ) + self.task_id_to_y[task_id] = np.concatenate( + (self.task_id_to_y[task_id], y), axis=0 + ) # split into transformer/voter and decider data - + transformer_voter_data_idx, decider_idx = self._bifurcate_decider_idxs( range(len(X)), transformer_voter_decider_split ) @@ -929,7 +925,7 @@ def update_task( self._update_transformer( X, y, - inputclasses = inputclasses, + inputclasses=inputclasses, transformer_data_proportion=transformer_voter_decider_split[0] if transformer_voter_decider_split else 1, @@ -939,18 +935,17 @@ def update_task( transformer_class=transformer_class, transformer_kwargs=transformer_kwargs, backward_task_ids=backward_task_ids, - decider_kwargs = decider_kwargs + decider_kwargs=decider_kwargs, ) - + self.set_voter( - transformer_id=0, - task_id=task_id, - voter_class=voter_class, - voter_kwargs=voter_kwargs, - ) + transformer_id=0, + task_id=task_id, + voter_class=voter_class, + voter_kwargs=voter_kwargs, + ) return self - def predict(self, X, task_id, transformer_ids=None): """ @@ -980,7 +975,7 @@ def predict(self, X, task_id, transformer_ids=None): raise NotFittedError decider = self.task_id_to_decider[task_id] - + return decider.predict(X, transformer_ids=transformer_ids) diff --git a/proglearn/transformers.py b/proglearn/transformers.py index c5c9177ef0..f737486a3c 100644 --- a/proglearn/transformers.py +++ b/proglearn/transformers.py @@ -105,8 +105,6 @@ def fit(self, X, y): self.fitted_ = True return self - - def transform(self, X): """ @@ -150,8 +148,6 @@ class TreeClassificationTransformer(BaseTransformer): def __init__(self, kwargs={}): self.kwargs = kwargs - - def fit(self, X, y): """ Fits the transformer to data X with labels y. diff --git a/proglearn/voters.py b/proglearn/voters.py index 4381d6aa25..838f4bfdf6 100755 --- a/proglearn/voters.py +++ b/proglearn/voters.py @@ -84,8 +84,6 @@ def fit(self, X, y): ) self.leaf_to_posterior_[leaf_id] = posteriors - - return self def predict_proba(self, X):