diff --git a/Cargo.toml b/Cargo.toml index cd2c5ff..1efe39a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,6 +2,7 @@ name = "openml" version = "0.1.2" authors = ["Martin Billinger "] +edition = "2018" description = "A rust interface to [OpenML](http://openml.org/)." keywords = ["machine-learning", "openml", "data", "dataset"] @@ -23,13 +24,9 @@ time = "0.1" app_dirs = "1.2.1" arff = "0.3" fs2 = "0.4.3" -futures = "0.1" -hyper = "0.11" -hyper-tls = "0.1" +reqwest = "0.9" log = "0.4" num-traits = "0.2" serde = "1.0" serde_derive = "1.0" serde_json = "1.0" -tokio-core = "0.1" - diff --git a/examples/classification_task.rs b/examples/classification_task.rs index 3a839f4..d714028 100644 --- a/examples/classification_task.rs +++ b/examples/classification_task.rs @@ -1,8 +1,8 @@ extern crate openml; +use openml::baseline::NaiveBayesClassifier; use openml::prelude::*; use openml::{PredictiveAccuracy, SupervisedClassification}; -use openml::baseline::NaiveBayesClassifier; fn main() { // Load "Supervised Classification on iris" task (https://www.openml.org/t/59) @@ -13,14 +13,10 @@ fn main() { // run the task let result: PredictiveAccuracy<_> = task.run(|train, test| { // train classifier - let nbc: NaiveBayesClassifier = train - .map(|(x, y)| (x, y)) - .collect(); + let nbc: NaiveBayesClassifier = train.map(|(x, y)| (x, y)).collect(); // test classifier - let y_out: Vec<_> = test - .map(|x| nbc.predict(x)) - .collect(); + let y_out: Vec<_> = test.map(|x| nbc.predict(x)).collect(); Box::new(y_out.into_iter()) }); diff --git a/examples/regression_task.rs b/examples/regression_task.rs index 7d12a82..6a30a05 100644 --- a/examples/regression_task.rs +++ b/examples/regression_task.rs @@ -1,8 +1,8 @@ extern crate openml; +use openml::baseline::NaiveLinearRegression; use openml::prelude::*; use openml::{RootMeanSquaredError, SupervisedRegression}; -use openml::baseline::NaiveLinearRegression; fn main() { // Load "Supervised Regression on liver-disorders" task (https://www.openml.org/t/52948) @@ -13,14 +13,10 @@ fn main() { // run the task let result: RootMeanSquaredError<_> = task.run(|train, test| { // train model - let model: NaiveLinearRegression = train - .map(|(x, y)| (x, y)) - .collect(); + let model: NaiveLinearRegression = train.map(|(x, y)| (x, y)).collect(); // test model - let y_out: Vec<_> = test - .map(|x| model.predict(x)) - .collect(); + let y_out: Vec<_> = test.map(|x| model.predict(x)).collect(); Box::new(y_out.into_iter()) }); diff --git a/src/baseline/mod.rs b/src/baseline/mod.rs index 9d7d4f7..f06ce4b 100644 --- a/src/baseline/mod.rs +++ b/src/baseline/mod.rs @@ -4,4 +4,4 @@ mod naive_bayes_classifier; mod naive_linear_regression; pub use self::naive_bayes_classifier::NaiveBayesClassifier; -pub use self::naive_linear_regression::NaiveLinearRegression; \ No newline at end of file +pub use self::naive_linear_regression::NaiveLinearRegression; diff --git a/src/baseline/naive_bayes_classifier.rs b/src/baseline/naive_bayes_classifier.rs index 8be186c..4cd5974 100644 --- a/src/baseline/naive_bayes_classifier.rs +++ b/src/baseline/naive_bayes_classifier.rs @@ -19,7 +19,8 @@ use std::iter::FromIterator; /// ``` #[derive(Debug)] pub struct NaiveBayesClassifier -where C: Eq + Hash +where + C: Eq + Hash, { class_distributions: HashMap, } @@ -27,7 +28,7 @@ where C: Eq + Hash /// Distribution of each feature column #[derive(Debug, Clone)] struct FeatureDistribution { - distributions: Vec + distributions: Vec, } /// Univariate Normal Distribution @@ -35,15 +36,15 @@ struct FeatureDistribution { struct NormalDistribution { sum: f64, sqsum: f64, - n: usize + n: usize, } impl<'a, C: 'a, J> FromIterator<(J, &'a C)> for NaiveBayesClassifier where - J: IntoIterator, + J: IntoIterator, C: Eq + Hash + Copy, { - fn from_iter>(iter: I) -> Self { + fn from_iter>(iter: I) -> Self { let mut class_distributions = HashMap::new(); for (x, &y) in iter { @@ -62,13 +63,14 @@ where } NaiveBayesClassifier { - class_distributions + class_distributions, } } } impl NaiveBayesClassifier -where C: Eq + Hash + Copy, +where + C: Eq + Hash + Copy, { /// predict target class for a single feature vector pub fn predict(&self, x: &[f64]) -> C { @@ -98,7 +100,7 @@ where C: Eq + Hash + Copy, impl FeatureDistribution { fn new() -> Self { FeatureDistribution { - distributions: Vec::new() + distributions: Vec::new(), } } } @@ -108,7 +110,7 @@ impl NormalDistribution { NormalDistribution { sum: 0.0, sqsum: 0.0, - n: 0 + n: 0, } } @@ -131,7 +133,6 @@ impl NormalDistribution { let xm = x - self.mean(); 0.5 * ((1.0 / (2.0 * f64::consts::PI * v)).ln() - (xm * xm) / v) - } } @@ -143,15 +144,14 @@ impl fmt::Debug for NormalDistribution { #[test] fn nbc() { - let data = vec![(vec![1.0, 2.0], 'A'), - (vec![2.0, 1.0], 'A'), - (vec![1.0, 5.0], 'B'), - (vec![2.0, 6.0], 'B')]; - - let nbc: NaiveBayesClassifier<_> = data - .iter() - .map(|(x, y)| (x, y)) - .collect(); + let data = vec![ + (vec![1.0, 2.0], 'A'), + (vec![2.0, 1.0], 'A'), + (vec![1.0, 5.0], 'B'), + (vec![2.0, 6.0], 'B'), + ]; + + let nbc: NaiveBayesClassifier<_> = data.iter().map(|(x, y)| (x, y)).collect(); assert_eq!(nbc.predict(&[1.5, 1.5]), 'A'); assert_eq!(nbc.predict(&[5.5, 1.5]), 'A'); diff --git a/src/baseline/naive_linear_regression.rs b/src/baseline/naive_linear_regression.rs index 13a5e64..9e7d808 100644 --- a/src/baseline/naive_linear_regression.rs +++ b/src/baseline/naive_linear_regression.rs @@ -15,18 +15,17 @@ use std::iter::FromIterator; /// .collect(); /// ``` #[derive(Debug)] -pub struct NaiveLinearRegression -{ +pub struct NaiveLinearRegression { slope: f64, intercept: f64, feature: usize, } impl<'a, J> FromIterator<(J, &'a f64)> for NaiveLinearRegression - where - J: IntoIterator, +where + J: IntoIterator, { - fn from_iter>(iter: I) -> Self { + fn from_iter>(iter: I) -> Self { let mut feature_columns = Vec::new(); let mut target_column = Vec::new(); @@ -72,7 +71,8 @@ impl<'a, J> FromIterator<(J, &'a f64)> for NaiveLinearRegression let slope = covar / x_var; let intercept = y_mean - slope * x_mean; - let err: f64 = feature.iter() + let err: f64 = feature + .iter() .zip(target_column.iter()) .map(|(&x, &y)| intercept + slope * x - y) .map(|r| r * r) @@ -94,8 +94,7 @@ impl<'a, J> FromIterator<(J, &'a f64)> for NaiveLinearRegression } } -impl NaiveLinearRegression -{ +impl NaiveLinearRegression { /// predict target value for a single feature vector pub fn predict(&self, x: &[f64]) -> f64 { self.intercept + x[self.feature] * self.slope @@ -104,15 +103,14 @@ impl NaiveLinearRegression #[test] fn nbc_flat() { - let data = vec![(vec![1.0, 2.0], 3.0), - (vec![2.0, 1.0], 3.0), - (vec![1.0, 5.0], 3.0), - (vec![2.0, 6.0], 3.0)]; + let data = vec![ + (vec![1.0, 2.0], 3.0), + (vec![2.0, 1.0], 3.0), + (vec![1.0, 5.0], 3.0), + (vec![2.0, 6.0], 3.0), + ]; - let nlr: NaiveLinearRegression = data - .iter() - .map(|(x, y)| (x, y)) - .collect(); + let nlr: NaiveLinearRegression = data.iter().map(|(x, y)| (x, y)).collect(); assert_eq!(nlr.predict(&[1.5, 1.5]), 3.0); assert_eq!(nlr.predict(&[5.5, 1.5]), 3.0); @@ -122,15 +120,14 @@ fn nbc_flat() { #[test] fn nbc_slope() { - let data = vec![(vec![1.0, 2.0], 8.0), - (vec![2.0, 1.0], 9.0), - (vec![1.0, 5.0], 5.0), - (vec![2.0, 6.0], 4.0)]; - - let nlr: NaiveLinearRegression = data - .iter() - .map(|(x, y)| (x, y)) - .collect(); + let data = vec![ + (vec![1.0, 2.0], 8.0), + (vec![2.0, 1.0], 9.0), + (vec![1.0, 5.0], 5.0), + (vec![2.0, 6.0], 4.0), + ]; + + let nlr: NaiveLinearRegression = data.iter().map(|(x, y)| (x, y)).collect(); assert_eq!(nlr.predict(&[1.5, 1.5]), 8.5); assert_eq!(nlr.predict(&[5.5, 1.5]), 8.5); diff --git a/src/error.rs b/src/error.rs index 743a233..809c119 100644 --- a/src/error.rs +++ b/src/error.rs @@ -4,9 +4,7 @@ use std::string::FromUtf8Error; use app_dirs::AppDirsError; use arff::Error as ArffError; -use hyper::Error as HyperError; -use hyper::error::UriError; -use hyper_tls::Error as TlsError; +use reqwest::Error as ReqwestError; use serde_json::Error as JsonError; pub type Result = StdResult; @@ -15,9 +13,7 @@ pub type Result = StdResult; pub enum Error { IoError(IoError), Utf8Error(FromUtf8Error), - HyperError(HyperError), - HyperUriError(UriError), - HyperTlsError(TlsError), + HttpsError(ReqwestError), JsonError(JsonError), ArffError(ArffError), AppDirsError(AppDirsError), @@ -35,21 +31,9 @@ impl From for Error { } } -impl From for Error { - fn from(e: HyperError) -> Self { - Error::HyperError(e) - } -} - -impl From for Error { - fn from(e: UriError) -> Self { - Error::HyperUriError(e) - } -} - -impl From for Error { - fn from(e: TlsError) -> Self { - Error::HyperTlsError(e) +impl From for Error { + fn from(e: ReqwestError) -> Self { + Error::HttpsError(e) } } @@ -69,7 +53,7 @@ impl From for Error { fn from(e: AppDirsError) -> Self { match e { AppDirsError::Io(e) => Error::IoError(e), - _ => Error::AppDirsError(e) + _ => Error::AppDirsError(e), } } } diff --git a/src/lib.rs b/src/lib.rs index 89c09c4..ec6bedb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -40,9 +40,7 @@ extern crate app_dirs; extern crate arff; extern crate fs2; -extern crate futures; -extern crate hyper; -extern crate hyper_tls; +extern crate reqwest; #[macro_use] extern crate log; extern crate num_traits; @@ -54,7 +52,6 @@ extern crate serde_json; extern crate simple_logger; #[cfg(test)] extern crate time; -extern crate tokio_core; pub mod baseline; mod dataset; @@ -65,25 +62,19 @@ pub mod prelude; mod procedures; mod tasks; -pub use measure_accumulator::{ - MeasureAccumulator, - PredictiveAccuracy, - RootMeanSquaredError +pub use crate::measure_accumulator::{ + MeasureAccumulator, PredictiveAccuracy, RootMeanSquaredError, }; -pub use tasks::{ - SupervisedClassification, - SupervisedRegression, - Task -}; +pub use crate::tasks::{SupervisedClassification, SupervisedRegression, Task}; #[cfg(test)] mod tests { use log::Level; use time::PreciseTime; - use baseline::NaiveBayesClassifier; - use measure_accumulator::PredictiveAccuracy; + use crate::baseline::NaiveBayesClassifier; + use crate::measure_accumulator::PredictiveAccuracy; use super::*; @@ -119,14 +110,10 @@ mod tests { let result: PredictiveAccuracy<_> = task.run(|train, test| { // train classifier - let nbc: NaiveBayesClassifier = train - .map(|(x, y)| (x, y)) - .collect(); + let nbc: NaiveBayesClassifier = train.map(|(x, y)| (x, y)).collect(); // test classifier - let y_out: Vec<_> = test - .map(|x| nbc.predict(x)) - .collect(); + let y_out: Vec<_> = test.map(|x| nbc.predict(x)).collect(); Box::new(y_out.into_iter()) }); diff --git a/src/measure_accumulator.rs b/src/measure_accumulator.rs index 9747252..b0e4695 100644 --- a/src/measure_accumulator.rs +++ b/src/measure_accumulator.rs @@ -91,28 +91,28 @@ where } } - /// Adjusted Rand Index #[derive(Debug)] pub struct AdjustedRandIndex -where T: Eq + Hash, +where + T: Eq + Hash, { - contingency_table: HashMap<(T, T), usize> + contingency_table: HashMap<(T, T), usize>, } - - impl MeasureAccumulator for AdjustedRandIndex - where T: Eq + Hash + Clone, +where + T: Eq + Hash + Clone, { fn new() -> Self { AdjustedRandIndex { - contingency_table: HashMap::new() + contingency_table: HashMap::new(), } } fn update_one(&mut self, known: &T, pred: &T) { - let n = self.contingency_table + let n = self + .contingency_table .entry((known.clone(), pred.clone())) .or_insert(0); *n += 1; diff --git a/src/openml_api/impls_from_json.rs b/src/openml_api/impls_from_json.rs index 54245e6..b6bcdca 100644 --- a/src/openml_api/impls_from_json.rs +++ b/src/openml_api/impls_from_json.rs @@ -3,10 +3,10 @@ use arff; use arff::dynamic::DataSet as ArffDataSet; use serde_json; -use dataset::DataSet; -use error::Result; -use procedures::{Fold, FrozenSets}; -use tasks::{SupervisedClassification, SupervisedRegression}; +use crate::dataset::DataSet; +use crate::error::Result; +use crate::procedures::{Fold, FrozenSets}; +use crate::tasks::{SupervisedClassification, SupervisedRegression}; use super::api_types::{CrossValItem, GenericResponse, TrainTest}; use super::web_access::get_cached; @@ -20,7 +20,8 @@ impl DataSet { let info_url = format!("https://www.openml.org/api/v1/json/data/{}", id); let info: GenericResponse = serde_json::from_str(&get_cached(&info_url).unwrap()).unwrap(); - let default_target = info.look_up("/data_set_description/default_target_attribute") + let default_target = info + .look_up("/data_set_description/default_target_attribute") .and_then(|v| v.as_str()); let target = match (default_target, target) { @@ -28,7 +29,8 @@ impl DataSet { (None, None) => None, }; - let dset_url = info.look_up("/data_set_description/url") + let dset_url = info + .look_up("/data_set_description/url") .unwrap() .as_str() .unwrap(); @@ -113,12 +115,12 @@ impl FrozenSets { if item.repeat >= folds.len() { folds.resize(item.repeat + 1, vec![]); } - let mut rep = &mut folds[item.repeat]; + let rep = &mut folds[item.repeat]; if item.fold >= rep.len() { rep.resize(item.fold + 1, Fold::new()); } - let mut fold = &mut rep[item.fold]; + let fold = &mut rep[item.fold]; match item.purpose { TrainTest::Train => fold.trainset.push(item.rowid), diff --git a/src/openml_api/impls_from_openml.rs b/src/openml_api/impls_from_openml.rs index d33a817..5318e9e 100644 --- a/src/openml_api/impls_from_openml.rs +++ b/src/openml_api/impls_from_openml.rs @@ -1,12 +1,12 @@ //! implementations to load tasks from the OpenML API. use serde_json; -use error::Result; -use tasks::{SupervisedClassification, SupervisedRegression}; +use crate::error::Result; +use crate::tasks::{SupervisedClassification, SupervisedRegression}; -use super::Id; use super::api_types::GenericResponse; use super::web_access::get_cached; +use super::Id; impl SupervisedClassification { pub fn from_openml<'a, T: Id>(id: T) -> Result { @@ -19,7 +19,7 @@ impl SupervisedClassification { match response.look_up("/task/task_type_id").unwrap().as_str() { Some("1") => Ok(SupervisedClassification::from_json(task)), Some(id) => panic!("Wrong task type ID. Expected \"1\" but got \"{}\"", id), - None => panic!("Invalid task type ID") + None => panic!("Invalid task type ID"), } } } @@ -35,7 +35,7 @@ impl SupervisedRegression { match response.look_up("/task/task_type_id").unwrap().as_str() { Some("2") => Ok(SupervisedRegression::from_json(task)), Some(id) => panic!("Wrong task type ID. Expected \"2\" but got \"{}\"", id), - None => panic!("Invalid task type ID") + None => panic!("Invalid task type ID"), } } } diff --git a/src/openml_api/web_access.rs b/src/openml_api/web_access.rs index 8071137..d8e8c82 100644 --- a/src/openml_api/web_access.rs +++ b/src/openml_api/web_access.rs @@ -4,16 +4,15 @@ use std::fs::{File, OpenOptions}; use std::io::{self, Read, Write}; use app_dirs::{app_root, AppDataType, AppInfo}; -use futures::{Future, Stream}; -use hyper::Client; -use hyper_tls::HttpsConnector; -use tokio_core::reactor::Core; -use error::Result; +use crate::error::Result; use super::file_lock::{ExclusiveLock, SharedLock}; -const APP_INFO: AppInfo = AppInfo{name: "openml-rust", author: "openml-rust"}; +const APP_INFO: AppInfo = AppInfo { + name: "openml-rust", + author: "openml-rust", +}; /// Query a URL. If possible read the response from local cache pub fn get_cached(url: &str) -> Result { @@ -57,26 +56,7 @@ pub fn get_cached(url: &str) -> Result { /// Query a URL. fn download(url: &str) -> Result { - let mut core = Core::new()?; - let handle = core.handle(); - let client = Client::configure() - .connector(HttpsConnector::new(4, &handle)?) - .build(&handle); - - let req = client.get(url.parse()?); - - let mut bytes = Vec::new(); - { - let work = req.and_then(|res| { - res.body().for_each(|chunk| { - bytes.extend_from_slice(&chunk); - Ok(()) - }) - }); - core.run(work)? - } - let result = String::from_utf8(bytes)?; - Ok(result) + Ok(reqwest::get(url)?.text()?) } /// Convert URL to file name for chching diff --git a/src/prelude.rs b/src/prelude.rs index 55978ff..5f046ef 100644 --- a/src/prelude.rs +++ b/src/prelude.rs @@ -1 +1 @@ -pub use measure_accumulator::MeasureAccumulator; \ No newline at end of file +pub use crate::measure_accumulator::MeasureAccumulator; diff --git a/src/tasks/mod.rs b/src/tasks/mod.rs index fffb2ae..19cfa18 100644 --- a/src/tasks/mod.rs +++ b/src/tasks/mod.rs @@ -8,7 +8,7 @@ use serde::de::DeserializeOwned; pub use self::supervised_classification::SupervisedClassification; pub use self::supervised_regression::SupervisedRegression; -use measure_accumulator::MeasureAccumulator; +use crate::measure_accumulator::MeasureAccumulator; pub trait Task { /// get task ID @@ -31,8 +31,10 @@ pub trait Task { /// that expect every feature to have the same type. fn run(&self, flow: F) -> M where - F: Fn(&mut Iterator, &mut Iterator) - -> Box>, + F: Fn( + &mut Iterator, + &mut Iterator, + ) -> Box>, X: DeserializeOwned, Y: DeserializeOwned, M: MeasureAccumulator; diff --git a/src/tasks/supervised_classification.rs b/src/tasks/supervised_classification.rs index 1b6919d..4c75f4a 100644 --- a/src/tasks/supervised_classification.rs +++ b/src/tasks/supervised_classification.rs @@ -1,9 +1,9 @@ use arff::dynamic::de::from_dataset; use serde::de::DeserializeOwned; -use dataset::DataSet; -use measure_accumulator::MeasureAccumulator; -use procedures::Procedure; +use crate::dataset::DataSet; +use crate::measure_accumulator::MeasureAccumulator; +use crate::procedures::Procedure; /// Classification task pub struct SupervisedClassification { @@ -35,7 +35,8 @@ impl SupervisedClassification { Y: DeserializeOwned, M: MeasureAccumulator, { - let (dx, dy) = self.source_data + let (dx, dy) = self + .source_data .clone_split() .expect("Supervised Classification requires a target column"); @@ -63,13 +64,16 @@ impl SupervisedClassification { /// that expect every feature to have the same type. pub fn run(&self, flow: F) -> M where - F: Fn(&mut Iterator, &mut Iterator) - -> Box>, + F: Fn( + &mut Iterator, + &mut Iterator, + ) -> Box>, X: DeserializeOwned, Y: DeserializeOwned, M: MeasureAccumulator, { - let (dx, dy) = self.source_data + let (dx, dy) = self + .source_data .clone_split() .expect("Supervised Classification requires a target column"); @@ -79,11 +83,13 @@ impl SupervisedClassification { let mut measure = M::new(); for fold in self.estimation_procedure.iter() { - let mut train = fold.trainset + let mut train = fold + .trainset .iter() .map(|&i| (&x[i * dx.n_cols()..(i + 1) * dx.n_cols()], &y[i])); - let mut test = fold.testset + let mut test = fold + .testset .iter() .map(|&i| &x[i * dx.n_cols()..(i + 1) * dx.n_cols()]); diff --git a/src/tasks/supervised_regression.rs b/src/tasks/supervised_regression.rs index f586710..3ee9dd4 100644 --- a/src/tasks/supervised_regression.rs +++ b/src/tasks/supervised_regression.rs @@ -1,9 +1,9 @@ use arff::dynamic::de::from_dataset; use serde::de::DeserializeOwned; -use dataset::DataSet; -use measure_accumulator::MeasureAccumulator; -use procedures::Procedure; +use crate::dataset::DataSet; +use crate::measure_accumulator::MeasureAccumulator; +use crate::procedures::Procedure; /// Regression task pub struct SupervisedRegression { @@ -34,7 +34,8 @@ impl SupervisedRegression { Y: DeserializeOwned, M: MeasureAccumulator, { - let (dx, dy) = self.source_data + let (dx, dy) = self + .source_data .clone_split() .expect("Supervised Regression requires a target column"); @@ -62,13 +63,16 @@ impl SupervisedRegression { /// that expect every feature to have the same type. pub fn run(&self, flow: F) -> M where - F: Fn(&mut Iterator, &mut Iterator) - -> Box>, + F: Fn( + &mut Iterator, + &mut Iterator, + ) -> Box>, X: DeserializeOwned, Y: DeserializeOwned, M: MeasureAccumulator, { - let (dx, dy) = self.source_data + let (dx, dy) = self + .source_data .clone_split() .expect("Supervised Regression requires a target column"); @@ -78,11 +82,13 @@ impl SupervisedRegression { let mut measure = M::new(); for fold in self.estimation_procedure.iter() { - let mut train = fold.trainset + let mut train = fold + .trainset .iter() .map(|&i| (&x[i * dx.n_cols()..(i + 1) * dx.n_cols()], &y[i])); - let mut test = fold.testset + let mut test = fold + .testset .iter() .map(|&i| &x[i * dx.n_cols()..(i + 1) * dx.n_cols()]);