-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathBuildNode.py
More file actions
53 lines (50 loc) · 2.59 KB
/
BuildNode.py
File metadata and controls
53 lines (50 loc) · 2.59 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import numpy as np
import Node
import SelectFeature
def build(node, entireDatasetLength, impurityCat, impurityNum,
featureNames, featureTypes, featuresToConsider, nmin ,
numIntervals, forceToTerminal=False):
#selects best feature, and then computes resubstitution impurity of the node
#the complete impurity can only be calculated later when we know the
#subtree size of each node (for that the entire tree must be built)
if node.nodeClass != 'unexplored':
raise Exception('The node {des} is already built/explored'.format(
des = node.__repr__() ))
#find the probability of being in the node and the resubstitution error
#resubstirution error is calculated differently for categorical vs
#numerical labels
probabilityOfBeingInNode = len(node.labels)/entireDatasetLength
if node.getLabelType() == 'string':
impurity = impurityCat(node.labels)
resubstitutionImpurity = impurity*probabilityOfBeingInNode
else:
impurity = impurityNum(node.labels)
resubstitutionImpurity = impurity*np.var(node.labels)
#check if the number of examples is less then the predefined minimum
#or if the node already pure (I round to zero impurity if the impurity
# is less then 10^-10)
if (forceToTerminal or node.inputs.shape[0]<=nmin or
resubstitutionImpurity <= 10**-10):
if (node.getLabelType() == 'string'):
terminalNode = Node.ClassificationTerminal(node.labels)
else:
terminalNode = Node.RegressionTerminal(node.labels)
terminalNode.probability = probabilityOfBeingInNode
terminalNode.impurity = resubstitutionImpurity
return terminalNode
#otherwise we need to make a nonterminal node
featureName, isCategorical,arguments = SelectFeature.selectBestFeature(
node.inputs, node.labels, impurityCat,impurityNum, featuresToConsider,
featureNames, featureTypes, numIntervals)
if isCategorical:
nonTerminalNode = Node.NonTerminal(node.inputs,node.labels,
featureName, featureNames, featureTypes,
subset = arguments[0])
else:
nonTerminalNode = Node.NonTerminal(node.inputs, node.labels, featureName,
featureNames,featureTypes,
operator=arguments[0],
boundary=arguments[1])
nonTerminalNode.probability = probabilityOfBeingInNode
nonTerminalNode.impurity = resubstitutionImpurity
return nonTerminalNode