This repository has been archived by the owner on Jul 16, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathocc_svm.R
124 lines (72 loc) · 3.08 KB
/
occ_svm.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#PACKAGES ---------------------------------------------
library(tidyverse)
library(ggplot2)
library(e1071)
library(caret)
library(caTools)
library(hrbrthemes)
library(reshape2)
library(rgl)
library(patchwork)
#STEP 1: CREATING DATASET ---------------------------------------
#1=CUSTOMER WILL BUY (FALSE), 2=CUSTOMER WILL NOT BUY (TRUE)
#for 2 or more variables
sunflower <- (runif(200, min=0, max=7))
sunflower <- format(round(sunflower, 2), nsmall=2)
daisies <- (runif(200, min=0, max=7))
daisies <- format(round(daisies, 2), nsmall=2)
sunflower <- as.numeric(sunflower)
daisies <- as.numeric(daisies)
sum <- (sunflower+daisies)
response <- as.data.frame(ifelse(sum >=6, 1, 2))
summary(response)
response <- rename(response, "response"="ifelse(sum >= 6, 1, 2)")
sunflower <- as.data.frame(sunflower)
daisies <- as.data.frame(daisies)
df <- cbind(sunflower, daisies, response)
#for 1 variable
sunflower <- (runif(200, min=0, max=7))
sunflower <- format(round(sunflower, 2), nsmall=2)
sunflower <- as.numeric(sunflower)
response <- as.data.frame(ifelse(sunflower >= 3, 1, 2))
summary(response)
response <- rename(response, "response"="ifelse(sunflower >= 3, 1, 2)")
df <- cbind(sunflower, response)
#if you already have data
df <- read.csv("C:/Users/samee/OneDrive/Documents/R/smallbusiness_occ/df.csv")
#STEP 2: SPLIT INTO TRAINING AND TESTING DATA ----------------------
df2 <- df #just in case
split = sample.split(df2$response, SplitRatio = 0.7)
train = subset(df2, split == TRUE)
test = subset(df2, split == FALSE)
#STEP 3: CREATE SVM MODEL -------------------------------
train_x <- train[-3] #take out the column with response
train_y <- train[3] #only the response column
test_x <- test[-3] #take out the column with response
test_y <- test[3] #only the response column
train_y$response <- as.factor(train_y$response)
train_x$sunflower <- as.factor(train_x$sunflower)
train_x$daisies <- as.factor(train_x$daisies)
str(train_x)
svm <- svm(data=train, train_x, train_y, type='one-classification', nu=0.10, scale=FALSE)
summary(svm)
#STEP 4: MAKE PREDICTIONS ---------------------------------
p <- predict(svm, test_x) #predicting test responses using the test independent variables (x)
summary(p)
#STEP 5: MAKE CONFUSION MATRIX ------------------------------
p <- as.data.frame(p)
str(test_y)
str(p)
test_y$response <- as.factor(test_y$response)
p$p <- as.factor(p$p)
levels(p$p) <- list("1"="FALSE", "2"="TRUE") #levels have to match the test data levels
confusionMatrix(test_y$response, p$p)
#STEP 6: PLOT ------------------------------
X <- seq(1, 200, by=1)
df2 <- cbind(df2, X)
#graphing more than 1 variable
one <- ggplot(df2, aes(x=X, y=sunflower, color=response)) + geom_point(size=5) + theme_ipsum()
two <- ggplot(df2, aes(x=X, y=daisies, color=response)) + geom_point(size=5) + theme_ipsum()
one+two
#graphing 1 variable
ggplot(df2, aes(x=X, y=sunflower, color=response)) + geom_point(size=5) + theme_ipsum()