Skip to content

Commit

Permalink
Relax label vector type to AbstractVector (#65)
Browse files Browse the repository at this point in the history
* Relax label vector type to AbstractVector

* info for test

* update README

* add test cases for JLD2
  • Loading branch information
iblislin authored Jan 31, 2021
1 parent 96ba7e4 commit 5ba728d
Show file tree
Hide file tree
Showing 6 changed files with 217 additions and 117 deletions.
3 changes: 3 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[*.jl]
indent_style = space
indent_size = 4
5 changes: 4 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,11 @@ julia = "1.3"

[extras]
DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549"
JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
RDatasets = "ce6b1742-4840-55fa-b093-852dadbb1d8b"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["DelimitedFiles", "Test", "SparseArrays"]
test = ["DelimitedFiles", "FileIO", "JLD2", "RDatasets", "SparseArrays", "Test"]
54 changes: 34 additions & 20 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,26 +22,33 @@ for options.
```julia
using LIBSVM
using RDatasets
using Printf, Statistics
using Printf
using Statistics

# Load Fisher's classic iris data
iris = dataset("datasets", "iris")

# First four dimension of input data is features
X = Matrix(iris[:, 1:4])'

# LIBSVM handles multi-class data automatically using a one-against-one strategy
labels = levelcode.(iris[:Species])
y = iris.Species

# First dimension of input data is features; second is instances
instances = convert(Array, iris[:, 1:4])'
# Split the dataset into training set and testing set
Xtrain = X[:, 1:2:end]
Xtest = X[:, 2:2:end]
ytrain = y[1:2:end]
ytest = y[2:2:end]

# Train SVM on half of the data using default parameters. See documentation
# of svmtrain for options
model = svmtrain(instances[:, 1:2:end], labels[1:2:end]);
model = svmtrain(Xtrain, ytrain)

# Test model on the other half of the data.
(predicted_labels, decision_values) = svmpredict(model, instances[:, 2:2:end]);
ŷ, decision_values = svmpredict(model, Xtest);

# Compute accuracy
@printf "Accuracy: %.2f%%\n" mean((predicted_labels .== labels[2:2:end]))*100
@printf "Accuracy: %.2f%%\n" mean(ŷ .== ytest) * 100
```

### ScikitLearn API
Expand All @@ -52,22 +59,29 @@ You can alternatively use `ScikitLearn.jl` API with same options as `svmtrain`:
using LIBSVM
using RDatasets

#Classification C-SVM
# Classification C-SVM
iris = dataset("datasets", "iris")
labels = levelcode.(iris[:, :Species])
instances = convert(Array, iris[:, 1:4])
model = fit!(SVC(), instances[1:2:end, :], labels[1:2:end])
yp = predict(model, instances[2:2:end, :])
X = Matrix(iris[:, 1:4])
y = iris.Species

Xtrain = X[1:2:end, :]
Xtest = X[2:2:end, :]
ytrain = y[1:2:end]
ytest = y[2:2:end]

model = fit!(SVC(), Xtrain, ytrain)
ŷ = predict(model, Xtest)
```

```julia
# Epsilon-Regression

#epsilon-regression
whiteside = RDatasets.dataset("MASS", "whiteside")
X = Array(whiteside[:Gas])
if typeof(X) <: AbstractVector
X = reshape(X, (length(X),1))
end
y = Array(whiteside[:Temp])
svrmod = fit!(EpsilonSVR(cost = 10., gamma = 1.), X, y)
yp = predict(svrmod, X)
X = Matrix(whiteside[:, 3:3]) # the `Gas` column
y = whiteside.Temp

model = fit!(EpsilonSVR(cost = 10., gamma = 1.), X, y)
ŷ = predict(model, X)
```

## Credits
Expand Down
75 changes: 43 additions & 32 deletions src/LIBSVM.jl
Original file line number Diff line number Diff line change
@@ -1,22 +1,24 @@
__precompile__()
module LIBSVM


import LIBLINEAR

using SparseArrays
using libsvm_jll

export svmtrain, svmpredict, fit!, predict, transform,
SVC, NuSVC, OneClassSVM, NuSVR, EpsilonSVR, LinearSVC,
Linearsolver, Kernel
SVC, NuSVC, OneClassSVM, NuSVR, EpsilonSVR, LinearSVC,
Linearsolver, Kernel

include("LibSVMtypes.jl")
include("constants.jl")

verbosity = false

struct SupportVectors{T, U}
struct SupportVectors{T,U}
l::Int32
nSV::Vector{Int32}
y::Vector{T}
y::AbstractVector{T}
X::AbstractMatrix{U}
indices::Vector{Int32}
SVnodes::Vector{SVMNode}
Expand All @@ -36,8 +38,7 @@ function SupportVectors(smc::SVMModel, y, X)

yi = smc.param.svm_type == 2 ? Float64[] : y[sv_indices]

SupportVectors(smc.l, nSV, yi , X[:,sv_indices],
sv_indices, nodes)
SupportVectors(smc.l, nSV, yi , X[:,sv_indices], sv_indices, nodes)
end

struct SVM{T}
Expand Down Expand Up @@ -68,7 +69,7 @@ struct SVM{T}
probability::Bool
end

function SVM(smc::SVMModel, y::T, X, weights, labels, svmtype, kernel) where T
function SVM(smc::SVMModel, y, X, weights, labels, svmtype, kernel)
svs = SupportVectors(smc, y, X)
coefs = zeros(smc.l, smc.nr_class-1)
for k in 1:(smc.nr_class-1)
Expand Down Expand Up @@ -266,39 +267,48 @@ function set_num_threads(nt::Integer)
end

"""
```julia
svmtrain{T, U<:Real}(X::AbstractMatrix{U}, y::AbstractVector{T}=[];
svmtype::Type=SVC, kernel::Kernel.KERNEL=Kernel.RadialBasis, degree::Integer=3,
gamma::Float64=1.0/size(X, 1), coef0::Float64=0.0,
cost::Float64=1.0, nu::Float64=0.5, epsilon::Float64=0.1,
tolerance::Float64=0.001, shrinking::Bool=true,
probability::Bool=false, weights::Union{Dict{T, Float64}, Cvoid}=nothing,
cachesize::Float64=200.0, verbose::Bool=false)
```
svmtrain(
X::AbstractMatrix{U}, y::AbstractVector{T} = [];
svmtype::Type = SVC,
kernel::Kernel.KERNEL = Kernel.RadialBasis,
degree::Integer = 3,
gamma::Float64 = 1.0/size(X, 1),
coef0::Float64 = 0.0,
cost::Float64=1.0,
nu::Float64 = 0.5,
epsilon::Float64 = 0.1,
tolerance::Float64 = 0.001,
shrinking::Bool = true,
probability::Bool = false,
weights::Union{Dict{T,Float64},Cvoid} = nothing,
cachesize::Float64 = 200.0,
verbose::Bool = false
) where {T,U<:Real}
Train Support Vector Machine using LIBSVM using response vector `y`
and training data `X`. The shape of `X` needs to be (nfeatures, nsamples).
and training data `X`. The shape of `X` needs to be `(nfeatures, nsamples)`.
For one-class SVM use only `X`.
# Arguments
* `svmtype::Type=LIBSVM.SVC`: Type of SVM to train `SVC` (for C-SVM), `NuSVC`
* `svmtype::Type = LIBSVM.SVC`: Type of SVM to train `SVC` (for C-SVM), `NuSVC`
`OneClassSVM`, `EpsilonSVR` or `NuSVR`. Defaults to `OneClassSVM` if
`y` is not used.
* `kernel::Kernels.KERNEL=Kernel.RadialBasis`: Model kernel `Linear`, `Polynomial`,
* `kernel::Kernels.KERNEL = Kernel.RadialBasis`: Model kernel `Linear`, `Polynomial`,
`RadialBasis`, `Sigmoid` or `Precomputed`.
* `degree::Integer=3`: Kernel degree. Used for polynomial kernel
* `gamma::Float64=1.0/size(X, 1)` : γ for kernels
* `coef0::Float64=0.0`: parameter for sigmoid and polynomial kernel
* `cost::Float64=1.0`: cost parameter C of C-SVC, epsilon-SVR, and nu-SVR
* `nu::Float64=0.5`: parameter nu of nu-SVC, one-class SVM, and nu-SVR
* `epsilon::Float64=0.1`: epsilon in loss function of epsilon-SVR
* `tolerance::Float64=0.001`: tolerance of termination criterion
* `shrinking::Bool=true`: whether to use the shrinking heuristics
* `probability::Bool=false`: whether to train a SVC or SVR model for probability estimates
* `degree::Integer = 3`: Kernel degree. Used for polynomial kernel
* `gamma::Float64 = 1.0/size(X, 1)` : γ for kernels
* `coef0::Float64 = 0.0`: parameter for sigmoid and polynomial kernel
* `cost::Float64 = 1.0`: cost parameter C of C-SVC, epsilon-SVR, and nu-SVR
* `nu::Float64 = 0.5`: parameter nu of nu-SVC, one-class SVM, and nu-SVR
* `epsilon::Float64 = 0.1`: epsilon in loss function of epsilon-SVR
* `tolerance::Float64 = 0.001`: tolerance of termination criterion
* `shrinking::Bool = true`: whether to use the shrinking heuristics
* `probability::Bool = false`: whether to train a SVC or SVR model for probability estimates
* `weights::Union{Dict{T, Float64}, Cvoid}=nothing`: dictionary of class weights
* `cachesize::Float64=100.0`: cache memory size in MB
* `verbose::Bool=false`: print training output from LIBSVM if true
* `nt::Integer=0`: number of OpenMP cores to use, if 0 it is set to OMP_NUM_THREADS, if negative it is set to the max number of threads
* `cachesize::Float64 = 100.0`: cache memory size in MB
* `verbose::Bool = false`: print training output from LIBSVM if true
* `nt::Integer = 0`: number of OpenMP cores to use, if 0 it is set to OMP_NUM_THREADS, if negative it is set to the max number of threads
Consult LIBSVM documentation for advice on the choise of correct
parameters and model tuning.
Expand Down Expand Up @@ -419,4 +429,5 @@ end
include("ScikitLearnTypes.jl")
include("ScikitLearnAPI.jl")


end
4 changes: 2 additions & 2 deletions src/ScikitLearnAPI.jl
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ LinearSVC(;solver = Linearsolver.L2R_L2LOSS_SVC_DUAL,
cost, p, bias, verbose, nothing)
@declare_hyperparameters(LinearSVC, [:solver, :weights, :tolerance, :cost, :p, :bias])

function fit!(model::Union{AbstractSVC,AbstractSVR}, X::AbstractMatrix, y::Vector=[])
function fit!(model::Union{AbstractSVC,AbstractSVR}, X::AbstractMatrix, y::AbstractVector = [])
#Build arguments for calling svmtrain
model.gamma == :auto && (model.gamma = 1.0/size(X', 1))
kwargs = Tuple{Symbol, Any}[]
Expand Down Expand Up @@ -97,7 +97,7 @@ function get_params(model::Union{AbstractSVC,AbstractSVR, LinearSVC})
return params
end

function fit!(model::LinearSVC, X::AbstractMatrix, y::Vector)
function fit!(model::LinearSVC, X::AbstractMatrix, y::AbstractVector)
model.fit = LIBLINEAR.linear_train(y, X', solver_type = Int32(model.solver),
weights = model.weights, C = model.cost, bias = model.bias,
p = model.p, eps = model.tolerance, verbose = model.verbose)
Expand Down
Loading

0 comments on commit 5ba728d

Please sign in to comment.