Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 2 additions & 5 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
name = "openml"
version = "0.1.2"
authors = ["Martin Billinger <[email protected]>"]
edition = "2018"

description = "A rust interface to [OpenML](http://openml.org/)."
keywords = ["machine-learning", "openml", "data", "dataset"]
Expand All @@ -23,13 +24,9 @@ time = "0.1"
app_dirs = "1.2.1"
arff = "0.3"
fs2 = "0.4.3"
futures = "0.1"
hyper = "0.11"
hyper-tls = "0.1"
reqwest = "0.9"
log = "0.4"
num-traits = "0.2"
serde = "1.0"
serde_derive = "1.0"
serde_json = "1.0"
tokio-core = "0.1"

10 changes: 3 additions & 7 deletions examples/classification_task.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
extern crate openml;

use openml::baseline::NaiveBayesClassifier;
use openml::prelude::*;
use openml::{PredictiveAccuracy, SupervisedClassification};
use openml::baseline::NaiveBayesClassifier;

fn main() {
// Load "Supervised Classification on iris" task (https://www.openml.org/t/59)
Expand All @@ -13,14 +13,10 @@ fn main() {
// run the task
let result: PredictiveAccuracy<_> = task.run(|train, test| {
// train classifier
let nbc: NaiveBayesClassifier<u8> = train
.map(|(x, y)| (x, y))
.collect();
let nbc: NaiveBayesClassifier<u8> = train.map(|(x, y)| (x, y)).collect();

// test classifier
let y_out: Vec<_> = test
.map(|x| nbc.predict(x))
.collect();
let y_out: Vec<_> = test.map(|x| nbc.predict(x)).collect();

Box::new(y_out.into_iter())
});
Expand Down
10 changes: 3 additions & 7 deletions examples/regression_task.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
extern crate openml;

use openml::baseline::NaiveLinearRegression;
use openml::prelude::*;
use openml::{RootMeanSquaredError, SupervisedRegression};
use openml::baseline::NaiveLinearRegression;

fn main() {
// Load "Supervised Regression on liver-disorders" task (https://www.openml.org/t/52948)
Expand All @@ -13,14 +13,10 @@ fn main() {
// run the task
let result: RootMeanSquaredError<_> = task.run(|train, test| {
// train model
let model: NaiveLinearRegression = train
.map(|(x, y)| (x, y))
.collect();
let model: NaiveLinearRegression = train.map(|(x, y)| (x, y)).collect();

// test model
let y_out: Vec<_> = test
.map(|x| model.predict(x))
.collect();
let y_out: Vec<_> = test.map(|x| model.predict(x)).collect();

Box::new(y_out.into_iter())
});
Expand Down
2 changes: 1 addition & 1 deletion src/baseline/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ mod naive_bayes_classifier;
mod naive_linear_regression;

pub use self::naive_bayes_classifier::NaiveBayesClassifier;
pub use self::naive_linear_regression::NaiveLinearRegression;
pub use self::naive_linear_regression::NaiveLinearRegression;
38 changes: 19 additions & 19 deletions src/baseline/naive_bayes_classifier.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,31 +19,32 @@ use std::iter::FromIterator;
/// ```
#[derive(Debug)]
pub struct NaiveBayesClassifier<C>
where C: Eq + Hash
where
C: Eq + Hash,
{
class_distributions: HashMap<C, FeatureDistribution>,
}

/// Distribution of each feature column
#[derive(Debug, Clone)]
struct FeatureDistribution {
distributions: Vec<NormalDistribution>
distributions: Vec<NormalDistribution>,
}

/// Univariate Normal Distribution
#[derive(Copy, Clone)]
struct NormalDistribution {
sum: f64,
sqsum: f64,
n: usize
n: usize,
}

impl<'a, C: 'a, J> FromIterator<(J, &'a C)> for NaiveBayesClassifier<C>
where
J: IntoIterator<Item=&'a f64>,
J: IntoIterator<Item = &'a f64>,
C: Eq + Hash + Copy,
{
fn from_iter<I: IntoIterator<Item=(J, &'a C)>>(iter: I) -> Self {
fn from_iter<I: IntoIterator<Item = (J, &'a C)>>(iter: I) -> Self {
let mut class_distributions = HashMap::new();

for (x, &y) in iter {
Expand All @@ -62,13 +63,14 @@ where
}

NaiveBayesClassifier {
class_distributions
class_distributions,
}
}
}

impl<C> NaiveBayesClassifier<C>
where C: Eq + Hash + Copy,
where
C: Eq + Hash + Copy,
{
/// predict target class for a single feature vector
pub fn predict(&self, x: &[f64]) -> C {
Expand Down Expand Up @@ -98,7 +100,7 @@ where C: Eq + Hash + Copy,
impl FeatureDistribution {
fn new() -> Self {
FeatureDistribution {
distributions: Vec::new()
distributions: Vec::new(),
}
}
}
Expand All @@ -108,7 +110,7 @@ impl NormalDistribution {
NormalDistribution {
sum: 0.0,
sqsum: 0.0,
n: 0
n: 0,
}
}

Expand All @@ -131,7 +133,6 @@ impl NormalDistribution {
let xm = x - self.mean();

0.5 * ((1.0 / (2.0 * f64::consts::PI * v)).ln() - (xm * xm) / v)

}
}

Expand All @@ -143,15 +144,14 @@ impl fmt::Debug for NormalDistribution {

#[test]
fn nbc() {
let data = vec![(vec![1.0, 2.0], 'A'),
(vec![2.0, 1.0], 'A'),
(vec![1.0, 5.0], 'B'),
(vec![2.0, 6.0], 'B')];

let nbc: NaiveBayesClassifier<_> = data
.iter()
.map(|(x, y)| (x, y))
.collect();
let data = vec![
(vec![1.0, 2.0], 'A'),
(vec![2.0, 1.0], 'A'),
(vec![1.0, 5.0], 'B'),
(vec![2.0, 6.0], 'B'),
];

let nbc: NaiveBayesClassifier<_> = data.iter().map(|(x, y)| (x, y)).collect();

assert_eq!(nbc.predict(&[1.5, 1.5]), 'A');
assert_eq!(nbc.predict(&[5.5, 1.5]), 'A');
Expand Down
47 changes: 22 additions & 25 deletions src/baseline/naive_linear_regression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,17 @@ use std::iter::FromIterator;
/// .collect();
/// ```
#[derive(Debug)]
pub struct NaiveLinearRegression
{
pub struct NaiveLinearRegression {
slope: f64,
intercept: f64,
feature: usize,
}

impl<'a, J> FromIterator<(J, &'a f64)> for NaiveLinearRegression
where
J: IntoIterator<Item=&'a f64>,
where
J: IntoIterator<Item = &'a f64>,
{
fn from_iter<I: IntoIterator<Item=(J, &'a f64)>>(iter: I) -> Self {
fn from_iter<I: IntoIterator<Item = (J, &'a f64)>>(iter: I) -> Self {
let mut feature_columns = Vec::new();
let mut target_column = Vec::new();

Expand Down Expand Up @@ -72,7 +71,8 @@ impl<'a, J> FromIterator<(J, &'a f64)> for NaiveLinearRegression
let slope = covar / x_var;
let intercept = y_mean - slope * x_mean;

let err: f64 = feature.iter()
let err: f64 = feature
.iter()
.zip(target_column.iter())
.map(|(&x, &y)| intercept + slope * x - y)
.map(|r| r * r)
Expand All @@ -94,8 +94,7 @@ impl<'a, J> FromIterator<(J, &'a f64)> for NaiveLinearRegression
}
}

impl NaiveLinearRegression
{
impl NaiveLinearRegression {
/// predict target value for a single feature vector
pub fn predict(&self, x: &[f64]) -> f64 {
self.intercept + x[self.feature] * self.slope
Expand All @@ -104,15 +103,14 @@ impl NaiveLinearRegression

#[test]
fn nbc_flat() {
let data = vec![(vec![1.0, 2.0], 3.0),
(vec![2.0, 1.0], 3.0),
(vec![1.0, 5.0], 3.0),
(vec![2.0, 6.0], 3.0)];
let data = vec![
(vec![1.0, 2.0], 3.0),
(vec![2.0, 1.0], 3.0),
(vec![1.0, 5.0], 3.0),
(vec![2.0, 6.0], 3.0),
];

let nlr: NaiveLinearRegression = data
.iter()
.map(|(x, y)| (x, y))
.collect();
let nlr: NaiveLinearRegression = data.iter().map(|(x, y)| (x, y)).collect();

assert_eq!(nlr.predict(&[1.5, 1.5]), 3.0);
assert_eq!(nlr.predict(&[5.5, 1.5]), 3.0);
Expand All @@ -122,15 +120,14 @@ fn nbc_flat() {

#[test]
fn nbc_slope() {
let data = vec![(vec![1.0, 2.0], 8.0),
(vec![2.0, 1.0], 9.0),
(vec![1.0, 5.0], 5.0),
(vec![2.0, 6.0], 4.0)];

let nlr: NaiveLinearRegression = data
.iter()
.map(|(x, y)| (x, y))
.collect();
let data = vec![
(vec![1.0, 2.0], 8.0),
(vec![2.0, 1.0], 9.0),
(vec![1.0, 5.0], 5.0),
(vec![2.0, 6.0], 4.0),
];

let nlr: NaiveLinearRegression = data.iter().map(|(x, y)| (x, y)).collect();

assert_eq!(nlr.predict(&[1.5, 1.5]), 8.5);
assert_eq!(nlr.predict(&[5.5, 1.5]), 8.5);
Expand Down
28 changes: 6 additions & 22 deletions src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,7 @@ use std::string::FromUtf8Error;

use app_dirs::AppDirsError;
use arff::Error as ArffError;
use hyper::Error as HyperError;
use hyper::error::UriError;
use hyper_tls::Error as TlsError;
use reqwest::Error as ReqwestError;
use serde_json::Error as JsonError;

pub type Result<T> = StdResult<T, Error>;
Expand All @@ -15,9 +13,7 @@ pub type Result<T> = StdResult<T, Error>;
pub enum Error {
IoError(IoError),
Utf8Error(FromUtf8Error),
HyperError(HyperError),
HyperUriError(UriError),
HyperTlsError(TlsError),
HttpsError(ReqwestError),
JsonError(JsonError),
ArffError(ArffError),
AppDirsError(AppDirsError),
Expand All @@ -35,21 +31,9 @@ impl From<FromUtf8Error> for Error {
}
}

impl From<HyperError> for Error {
fn from(e: HyperError) -> Self {
Error::HyperError(e)
}
}

impl From<UriError> for Error {
fn from(e: UriError) -> Self {
Error::HyperUriError(e)
}
}

impl From<TlsError> for Error {
fn from(e: TlsError) -> Self {
Error::HyperTlsError(e)
impl From<ReqwestError> for Error {
fn from(e: ReqwestError) -> Self {
Error::HttpsError(e)
}
}

Expand All @@ -69,7 +53,7 @@ impl From<AppDirsError> for Error {
fn from(e: AppDirsError) -> Self {
match e {
AppDirsError::Io(e) => Error::IoError(e),
_ => Error::AppDirsError(e)
_ => Error::AppDirsError(e),
}
}
}
Loading