Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Srerf-3d #1

Open
wants to merge 15 commits into
base: staging
Choose a base branch
from
102 changes: 89 additions & 13 deletions Python/rerf/rerfClassifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,12 @@ class rerfClassifier(BaseEstimator, ClassifierMixin):
Parameters
----------
projection_matrix : str, optional (default: "RerF")
The random combination of features to use: either "RerF", "Base", or
"S-RerF". "RerF" randomly combines features for each `mtry`. Base
is our implementation of Random Forest. "S-RerF" is structured RerF,
combining multiple features together in random patches.
See Tomita et al. (2016) [#Tomita]_ for further details.
The random combination of features to use: either "RerF", "Base",
"MORF", or "MORF-3D". "RerF" randomly combines features for each
`mtry`. Base is our implementation of Random Forest. "MORF" is
structured RerF, combining multiple features together in random patches.
See Tomita et al. (2016) [#Tomita]_ for further details. "MORF-3D"
is the 3 dimensional extension of MORF.
n_estimators : int, optional (default: 500)
Number of trees in forest.

Expand Down Expand Up @@ -85,19 +86,26 @@ class rerfClassifier(BaseEstimator, ClassifierMixin):
Random seed to use. If None, set seed to ``np.random.randint(1, 1000000)``.

image_height : int, optional (default=None)
S-RerF required parameter. Image height of each observation.
MORF required parameter. Image height of each observation.
image_width : int, optional (default=None)
S-RerF required parameter. Width of each observation.
MORF required parameter. Image width of each observation.
image_depth : int, optional (default=None)
MORF required parameter. Image depth of each observation.
patch_height_max : int, optional (default=max(2, floor(sqrt(image_height))))
S-RerF parameter. Maximum image patch height to randomly select from.
MORF parameter. Maximum image patch height to randomly select from.
If None, set to ``max(2, floor(sqrt(image_height)))``.
patch_height_min : int, optional (default=1)
S-RerF parameter. Minimum image patch height to randomly select from.
MORF parameter. Minimum image patch height to randomly select from.
patch_width_max : int, optional (default=max(2, floor(sqrt(image_width))))
S-RerF parameter. Maximum image patch width to randomly select from.
MORF parameter. Maximum image patch width to randomly select from.
If None, set to ``max(2, floor(sqrt(image_width)))``.
patch_width_min : int, optional (default=1)
S-RerF parameter. Minimum image patch height to randomly select from.
MORF parameter. Minimum image patch width to randomly select from.
patch_depth_max : int, optional (default=max(2, floor(sqrt(image_depth))))
MORF parameter. Maximum image patch depth to randomly select from.
If None, set to ``max(2, floor(sqrt(image_depth)))``.
patch_depth_min : int, optional (default=1)
MORF parameter. Minimum image patch depth to randomly select from.

Returns
-------
Expand Down Expand Up @@ -147,10 +155,13 @@ def __init__(
random_state=None,
image_height=None,
image_width=None,
image_depth=None,
patch_height_max=None,
patch_height_min=1,
patch_width_max=None,
patch_width_min=1,
patch_depth_max=None,
patch_depth_min=1,
):
self.projection_matrix = projection_matrix
self.n_estimators = n_estimators
Expand All @@ -165,10 +176,13 @@ def __init__(
# s-rerf params
self.image_height = image_height
self.image_width = image_width
self.image_depth = image_depth
self.patch_height_max = patch_height_max
self.patch_height_min = patch_height_min
self.patch_width_max = patch_width_max
self.patch_width_min = patch_width_min
self.patch_depth_max = patch_depth_max
self.patch_depth_min = patch_depth_min

def fit(self, X, y):
"""Fit estimator.
Expand Down Expand Up @@ -230,10 +244,10 @@ def fit(self, X, y):
else:
forestType = "binnedBaseTern"
self.method_to_use_ = 1
elif self.projection_matrix == "S-RerF":
elif self.projection_matrix == "MORF":
if self.oob_score:
warn(
"OOB is not currently implemented for the S-RerF"
"OOB is not currently implemented for the MORF"
" algorithm. Continuing with oob_score = False.",
RuntimeWarning,
stacklevel=2,
Expand Down Expand Up @@ -277,6 +291,68 @@ def fit(self, X, y):
self.forest_.setParameter("patchHeightMin", self.patch_height_min_)
self.forest_.setParameter("patchWidthMax", self.patch_width_max_)
self.forest_.setParameter("patchWidthMin", self.patch_width_min_)
elif self.projection_matrix == "MORF-3D":
if self.oob_score:
warn(
"OOB is not currently implemented for the MORF-3D"
" algorithm. Continuing with oob_score = False.",
RuntimeWarning,
stacklevel=2,
)
self.oob_score = False

forestType = "binnedBaseTern" # this should change
self.method_to_use_ = 3
# Check that image_height and image_width are divisors of
# the num_features. This is the most we can do to
# prevent an invalid value being passed in.
if (num_features % self.image_height) != 0:
raise ValueError("Incorrect image_height given:")
else:
self.image_height_ = self.image_height
self.forest_.setParameter("imageHeight", self.image_height_)
if (num_features % self.image_width) != 0:
raise ValueError("Incorrect image_width given:")
else:
self.image_width_ = self.image_width
self.forest_.setParameter("imageWidth", self.image_width_)
if (num_features % self.image_depth) != 0:
raise ValueError("Incorrect image_depth given:")
else:
self.image_depth_ = self.image_depth
self.forest_.setParameter("imageDepth", self.image_depth_)
# If patch_height_{min, max} and patch_width_{min, max} are
# not set by the user, set them to defaults.
if self.patch_height_max is None:
self.patch_height_max_ = max(2, floor(sqrt(self.image_height_)))
else:
self.patch_height_max_ = self.patch_height_max
if self.patch_width_max is None:
self.patch_width_max_ = max(2, floor(sqrt(self.image_width_)))
else:
self.patch_width_max_ = self.patch_width_max
if self.patch_depth_max is None:
self.patch_depth_max_ = max(2, floor(sqrt(self.image_depth_)))
else:
self.patch_depth_max_ = self.patch_depth_max
if 1 <= self.patch_height_min <= self.patch_height_max_:
self.patch_height_min_ = self.patch_height_min
else:
raise ValueError("Incorrect patch_height_min")
if 1 <= self.patch_width_min <= self.patch_width_max_:
self.patch_width_min_ = self.patch_width_min
else:
raise ValueError("Incorrect patch_width_min")
if 1 <= self.patch_depth_min <= self.patch_depth_max_:
self.patch_depth_min_ = self.patch_depth_min
else:
raise ValueError("Incorrect patch_depth_min")
self.forest_.setParameter("patchHeightMax", self.patch_height_max_)
self.forest_.setParameter("patchHeightMin", self.patch_height_min_)
self.forest_.setParameter("patchWidthMax", self.patch_width_max_)
self.forest_.setParameter("patchWidthMin", self.patch_width_min_)
self.forest_.setParameter("patchDepthMax", self.patch_depth_max_)
self.forest_.setParameter("patchDepthMin", self.patch_depth_min_)
else:
raise ValueError("Incorrect projection matrix")
self.forest_.setParameter("forestType", forestType)
Expand Down
42 changes: 41 additions & 1 deletion Python/tests/test_rerfClassifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import re

import numpy as np
import pandas as pd
import pytest
from sklearn import datasets, metrics
from sklearn.utils.validation import check_random_state
Expand Down Expand Up @@ -115,7 +116,7 @@ def test_s_rerf():
y_train = y[: n // 2]

clf = rerfClassifier(
projection_matrix="S-RerF", image_height=8, image_width=8, n_estimators=10
projection_matrix="MORF", image_height=8, image_width=8, n_estimators=10
)

clf.fit(X_train, y_train)
Expand All @@ -142,6 +143,45 @@ def test_s_rerf():
assert clf.patch_height_min_ == 1


def test_s_rerf_3d():
#blob0 = np.random.multivariate_normal([1,1,1], np.eye((3), 100).reshape(100,-1)
#blob1 = np.random.multivariate_normal([-1,-1,-1], np.eye((3), 100).reshape(100,-1)
#X = np.vstack((blob0,blob1))
#Y = np.array([0]*100 + [1]*100).reshape(-1,1)
mat = pd.read_csv("../../packedForest/res/cifar_01.csv", header=None).values
X = mat[:,1:].reshape(200,32,32,3)
np.swapaxes(X, 1, -1)
X = X.reshape(200,-1)
Y = mat[:,0]

clf = rerfClassifier(
projection_matrix="MORF-3D", image_height=32, image_width=32, image_depth=3, n_estimators=10
)
clf.fit(X, Y)
score = clf.score(X, Y)
assert score > 0.5

assert hasattr(clf, "image_height")
assert hasattr(clf, "image_width")
assert hasattr(clf, "image_depth")
assert hasattr(clf, "patch_width_max")
assert hasattr(clf, "patch_width_min")
assert hasattr(clf, "patch_depth_max")
assert hasattr(clf, "patch_depth_min")
assert hasattr(clf, "patch_height_max")
assert hasattr(clf, "patch_height_min")

assert clf.image_height == 32
assert clf.image_width == 32
assert clf.image_depth == 3
assert clf.patch_height_max_ == math.floor(math.sqrt(32))
assert clf.patch_height_min_ == 1
assert clf.patch_width_max_ == math.floor(math.sqrt(32))
assert clf.patch_width_min_ == 1
assert clf.patch_depth_max_ == 2
assert clf.patch_depth_min_ == 1


def check_iris_criterion(projection_matrix):
# Check consistency on dataset iris.

Expand Down
Loading