-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathregression-and-hypothesis-testing.R
47 lines (30 loc) · 1.46 KB
/
regression-and-hypothesis-testing.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
training <- read.csv("train-with-numbers.csv")
fit <- lm(numStatus ~ amount_tsh + construction_year + longitude + latitude + numSource + numQuantity, data=training)
summary(fit) # show results
library(leaps)
regsubsets.out <-
regsubsets(numStatus ~ amount_tsh + gps_height + population + construction_year + longitude + latitude + numSource +
numBasin + numQuantity + numLga + numExtraction_type + numPayment_type + numWater_quality + numQuantity + numWaterpoint_type, data = training)
regsubsets.out
summary.out <- summary(regsubsets.out)
as.data.frame(summary.out$outmat)
plot(regsubsets.out, scale = "adjr2", main = "Adjusted R^2")
#both
length(training[training$amount_tsh == 0 & training$construction_year == 0,1])
#just tsh
length(training[training$amount_tsh == 0,1])
#just construction_Year
length(training[training$construction_year == 0,1])
#For construction year
fit2 <- lm(construction_year ~ amount_tsh + gps_height + longitude + latitude + population + num_private, data=training)
summary(fit2) # show results
numExtract <- read.csv("fullNumericExtract.csv")
training$numExtractType <- numExtract$value
# for constructoin year
library(leaps)
regsubsets.out2 <-
regsubsets(construction_year ~ amount_tsh + gps_height + population + longitude + latitude + numExtractType, data = training)
regsubsets.out2
summary.out <- summary(regsubsets.out2)
as.data.frame(summary.out$outmat)
plot(regsubsets.out2, scale = "adjr2", main = "Adjusted R^2")