diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..beb509c
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+.venv/
+data/
+.env
diff --git a/config.yaml b/config.yaml
new file mode 100644
index 0000000..01f37e2
--- /dev/null
+++ b/config.yaml
@@ -0,0 +1,4 @@
+database:
+  sql:
+    uri: !ENV 'sqlite:///feature_store.db'
+    chunksize: 10
\ No newline at end of file
diff --git a/readme.md b/readme.md
new file mode 100644
index 0000000..897aa3e
--- /dev/null
+++ b/readme.md
@@ -0,0 +1,7 @@
+# CryptoML
+
+Current functionality:
+    - Build datasets from bootstrap data
+
+Backlog:
+    - Update datasets with new data records
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..9e5cafb
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,13 @@
+typer
+xgboost
+pandas
+statsmodels
+scikit-learn
+scipy
+numpy
+requests
+pyyaml
+git+https://github.com/RedLicorice/pyti.git
+sqlalchemy
+confuse
+python-dotenv
\ No newline at end of file
diff --git a/src/cli.py b/src/cli.py
new file mode 100644
index 0000000..5798173
--- /dev/null
+++ b/src/cli.py
@@ -0,0 +1,109 @@
+import typer
+import xgboost
+app = typer.Typer()
+
+
+@app.command(name='bootstrap', help='Bootstrap dataset with data from zip files in data/bootstrap')
+def build_dataset(symbol: str, currency: str):
+    target_name = '../data/dataset-{symbol}{currency}'.format(symbol=symbol, currency=currency)
+    from crawlers import kraken, coinmetrics
+    _kraken = kraken.get_bootstrap_data(symbol, currency).fillna(method='ffill')
+    _coinmetrics = coinmetrics.get_bootstrap_data(symbol)
+
+    ohlcv = kraken.ticks_to_ohlcv(_kraken, '1D').fillna(method='ffill')
+
+    from dataset import build, get_feature_metadata, make_target
+    import pandas as pd
+    result = build(ohlcv=ohlcv, coinmetrics=_coinmetrics, W=10)
+    result.to_csv(target_name + '.csv', index_label='timestamp')
+
+    _begin, _end, _features = get_feature_metadata(result)
+    meta = pd.DataFrame.from_records(_features)
+    meta.index = meta['name']
+    meta.drop(labels='name', axis='columns', inplace=True)
+    meta.to_csv(target_name + '.meta.csv', index_label='feature')
+
+    target = make_target(ohlcv)
+    target.to_csv(target_name + '.target.csv', index_label='timestamp')
+
+    info = {
+        'symbol': symbol,
+        'currency': currency,
+        'interval': '1D',
+        'records': result.shape[0],
+        'features': result.shape[1],
+        'index_min': result.index.min().to_pydatetime().isoformat(),
+        'index_max': result.index.max().to_pydatetime().isoformat(),
+        'valid_index_min': _begin,
+        'valid_index_max': _end,
+        'targets': {str(k): False if k != 'class' else True for k in target.columns},
+        'features': {str(k): True for k in meta.index}
+    }
+    with open(target_name + '.info.yaml', 'w') as f:
+        import yaml
+        yaml.dump(info, f, sort_keys=False)
+    print('done')
+
+@app.command(name='selection', help='Perform feature selection and update <dataset>.info.yaml with selected features')
+def selection(symbol: str, currency: str, percent: float):
+    target_name = '../data/dataset-{symbol}{currency}'.format(symbol=symbol, currency=currency)
+    import pandas as pd
+    import math
+    from crawlers import load_yaml, save_yaml
+
+    info = load_yaml(target_name + '.info.yaml')
+    dataset = pd.read_csv(target_name + '.csv', parse_dates=True, index_col='timestamp')
+    target = pd.read_csv(target_name + '.target.csv', parse_dates=True, index_col='timestamp')
+    first_valid_i = dataset.index.get_loc(info.valid_index_min)
+    last_valid_i = dataset.index.get_loc(info.valid_index_max)
+
+    training_records = math.floor((last_valid_i - first_valid_i) * percent)
+    dataset['label'] = target['class']
+    training_dataset = dataset.iloc[first_valid_i:first_valid_i+training_records]
+    # testing_dataset = dataset.iloc[first_valid_i+training_records: last_valid_i+1]
+
+    from xgboost import XGBClassifier
+    from util.selection_pipeline import Pipeline
+    from sklearn.impute import SimpleImputer
+    from sklearn.preprocessing import StandardScaler
+    pipeline = Pipeline(steps=[
+        ('s', StandardScaler()),
+        ('i', SimpleImputer()),
+        ('c', XGBClassifier(use_label_encoder=False))
+    ])
+
+    X_train = training_dataset.drop(labels=['label'], axis='columns')
+    with pd.option_context('mode.use_inf_as_na', True):  # Set option temporarily
+        X_train.fillna(axis='columns', method='ffill', inplace=True)
+    y_train = training_dataset['label']
+
+    from sklearn.feature_selection import SelectFromModel
+    sel = SelectFromModel(pipeline)
+    sel.fit(X_train, y_train)
+    support = sel.get_support()
+
+    dinfo = info.to_dict()
+    for c, mask in zip(X_train.columns, support):
+        dinfo['features'][c] = True if mask else False
+
+    import yaml
+    with open(target_name + '.info.yaml', 'w') as f:
+        yaml.dump(dinfo, f, sort_keys=False)
+    with open(target_name + '.info.yaml.bak', 'w') as f:
+        yaml.dump(info, f, sort_keys=False)
+    print('done')
+
+
+
+@app.command()
+def test(symbol: str, currency: str):
+    from dataset import make_ohlcv_ta
+    from crawlers import kraken
+    _kraken = kraken.get_bootstrap_data(symbol, currency).fillna(method='ffill')
+    ohlcv = kraken.ticks_to_ohlcv(_kraken, '1D').fillna(method='ffill')
+    ohlcv_ta = make_ohlcv_ta(ohlcv)
+    print('It works')
+    print(ohlcv_ta.head())
+
+if __name__ == '__main__':
+    app()
\ No newline at end of file
diff --git a/src/config.py b/src/config.py
new file mode 100644
index 0000000..06b9477
--- /dev/null
+++ b/src/config.py
@@ -0,0 +1,32 @@
+import confuse
+from dotenv import load_dotenv
+import re, os, yaml
+
+# Load Configuration
+env_variable_pattern = re.compile('.*?\${(\w+)}.*?')
+_configfile = None
+load_dotenv()
+
+def replace_env_variables(loader, node):
+    """
+    Extracts the environment variable from the node's value
+    :param yaml.Loader loader: the yaml loader
+    :param node: the current node in the yaml
+    :return: the parsed string that contains the value of the environment
+    variable
+    """
+    value = loader.construct_scalar(node)
+    match = env_variable_pattern.findall(value)  # to find all env variables in line
+    if match:
+        full_value = value
+        for g in match:
+            full_value = full_value.replace(
+                f'${{{g}}}', os.environ.get(g, g)
+            )
+        return full_value if not full_value.isnumeric() else int(full_value)
+    return value
+
+confuse.Loader.add_constructor('!ENV', replace_env_variables)
+#config = confuse.Configuration('CryptoML-API', __name__)
+config = confuse.LazyConfig('CryptoML', __name__)
+config.set_file('../config.yaml')
diff --git a/src/crawlers/__init__.py b/src/crawlers/__init__.py
new file mode 100644
index 0000000..b5ecb52
--- /dev/null
+++ b/src/crawlers/__init__.py
@@ -0,0 +1,90 @@
+import yaml
+from util.bunch import Bunch
+
+
+class Spec(dict):
+    def __init__(self, base_url, endpoints, **kwargs):
+        if not type(endpoints) is dict:
+            raise ValueError('Endpoints must be a dictionary!')
+        self.base_url = base_url
+        self.endpoints = endpoints
+        super().__init__(kwargs)
+
+    @staticmethod
+    def from_dict(spec):
+        if not spec:
+            raise ValueError('Provided spec is invalid.')
+        if not 'base_url' in spec:
+            raise ValueError('Provided spec does not describe a base url.')
+        res = Spec(base_url=spec['base_url'], endpoints=spec['endpoints'])
+        return res
+
+    def __dir__(self):
+        return self.endpoints.keys()
+
+    def __setstate__(self, state):
+        pass
+
+    def __setattr__(self, key, value):
+        if key in ['endpoints', 'base_url']:
+            self[key] = value
+        else:
+            self.endpoints[key] = value
+
+    def __getattr__(self, key):
+        if key in ['endpoints', 'base_url']:
+            return self[key]
+        try:
+            query = self.endpoints[key]
+            if '{' in query and '}' in query:
+                return (self.base_url + query).format
+            return self.base_url + query
+        except KeyError:
+            raise AttributeError('Method not described in api spec: ' + key)
+
+
+def load_api_spec(filename):
+    with open(filename, 'r') as f:
+        try:
+            spec = yaml.safe_load(f)
+        except yaml.YAMLError as exc:
+            print('error loading api spec' + exc)
+    if spec:
+        return Spec.from_dict(spec)
+
+def load_yaml(filename):
+    with open(filename, 'r') as f:
+        try:
+            spec = yaml.safe_load(f)
+        except yaml.YAMLError as exc:
+            print('error loading yaml' + exc)
+    if spec:
+        return Bunch(**spec)
+
+def save_yaml(filename, data):
+    with open(filename, 'w') as f:
+        try:
+            yaml.safe_dump(data=data, stream=f)
+        except yaml.YAMLError as exc:
+            print('error saving yaml' + exc.message or str(exc))
+
+def load_bootstrap(zip_file, csv_file):
+    import zipfile
+    import pandas as pd
+    with zipfile.ZipFile(zip_file) as z:
+        with z.open(csv_file) as f:
+            train = pd.read_csv(f, delimiter=",", parse_dates=True, index_col='date')
+            return train
+
+def bootstrap_index(filename):
+    index = load_yaml(filename)
+    return index.bootstrap
+
+def load_transformer(filename):
+    import importlib
+    import ntpath
+    
+    spec = importlib.util.spec_from_file_location("bootstrap."+ntpath.basename(filename)[:-3], filename)
+    transformer = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(transformer)
+    return transformer
\ No newline at end of file
diff --git a/src/crawlers/coinmetrics-community.yaml b/src/crawlers/coinmetrics-community.yaml
new file mode 100644
index 0000000..8310284
--- /dev/null
+++ b/src/crawlers/coinmetrics-community.yaml
@@ -0,0 +1,6 @@
+base_url: 'https://community-api.coinmetrics.io/v4'
+api_key:  ''
+endpoints:
+  asset_metadata: '/catalog/assets?assets={assets}&pretty=false'
+  metrics_timeseries: '/timeseries/asset-metrics?assets={assets}&metrics={metrics}&frequency={frequency}&status=all&start_time={begin}&end_time={end}&end_inclusive=true&timezone=Europe/Rome'
+
diff --git a/src/crawlers/coinmetrics.py b/src/crawlers/coinmetrics.py
new file mode 100644
index 0000000..1302117
--- /dev/null
+++ b/src/crawlers/coinmetrics.py
@@ -0,0 +1,62 @@
+def api():
+    from . import load_api_spec
+    return load_api_spec('crawlers/coinmetrics-community.yaml')
+
+def get_assets(assets):
+    if type(assets) is list:
+        assets = ','.join(assets)
+    import requests
+    resp = requests.get(api().asset_metadata(assets=assets))
+    return resp.json()['data']
+
+def get_asset_features(asset_name, frequency):
+    assets = get_assets(asset_name)
+    result = []
+    id = 1
+    for asset in assets:
+        for metric in asset['metrics']:
+            for f in metric['frequencies']:
+                if frequency == f['frequency']:
+                    result.append({'index': id, 'dataset': 'coinmetrics', 'asset': asset['asset'], 'name': metric['metric'], 'min': f['min_time'], 'max': f['max_time'], 'enabled': True})
+                    id += 1
+    return result
+
+def get_asset_metrics(asset_name, metrics, frequency, begin, end):
+    if type(metrics) is list:
+        metrics = ','.join(metrics)
+    import requests
+    resp = requests.get(api().metrics_timeseries(assets=asset_name, metrics=metrics, frequency=frequency, begin=begin, end=end))
+    rj = resp.json()
+    result = rj['data']
+    import time
+    if 'next_page_url' in rj:
+        while True:
+            time.sleep(0.5)
+            resp = requests.get(rj['next_page_url'])
+            rj = resp.json()
+            result += rj['data']
+            if not 'next_page_url' in rj:
+                break
+    return result
+
+
+def get_bootstrap_data(symbol):
+    symbol = symbol.lower()
+
+    from . import bootstrap_index, load_transformer
+    try:
+        index = bootstrap_index('../data/bootstrap/index.yaml')
+        transformer = load_transformer('../data/bootstrap/' + index.coinmetrics.transformer)
+        if symbol not in index.coinmetrics.groups:
+            filename = index.coinmetrics.name_format.format(symbol=symbol) + '.csv'
+            return transformer.get_df('../data/bootstrap/' + index.coinmetrics.zipfile, filename)
+        else:
+            filenames = [index.coinmetrics.name_format.format(symbol=symbol) + '.csv']
+            filenames += [ name + '.csv' for name in index.coinmetrics.groups[symbol]]
+            dataframes = [transformer.get_df('../data/bootstrap/' + index.coinmetrics.zipfile, filename) for filename in filenames]
+
+            import pandas as pd
+            return pd.concat(dataframes)
+    except Exception as e:
+        print('Exception occurred!    ' + str(e))
+        raise
diff --git a/src/crawlers/kraken.py b/src/crawlers/kraken.py
new file mode 100644
index 0000000..1ea7692
--- /dev/null
+++ b/src/crawlers/kraken.py
@@ -0,0 +1,51 @@
+def api():
+    from . import load_api_spec
+    return load_api_spec('crawlers/kraken.yaml')
+
+def get_pair_ohlc(pair, metrics, frequency, begin, end):
+    global kraken
+    if type(metrics) is list:
+        metrics = ','.join(metrics)
+    import requests
+    resp = requests.get(api().ohlc_data(assets=pair, metrics=metrics, frequency=frequency, since=0))
+    rj = resp.json()
+    result = rj['data']
+    import time
+    if 'next_page_url' in rj:
+        while True:
+            time.sleep(0.5)
+            resp = requests.get(rj['next_page_url'])
+            rj = resp.json()
+            result += rj['data']
+            if not 'next_page_url' in rj:
+                break
+    return result
+
+
+def get_bootstrap_data(symbol, currency):
+    from . import bootstrap_index, load_transformer
+    _convert_map = {
+        'btc':'xbt',
+        'doge':'xdg'
+    }
+    if symbol in _convert_map:
+        symbol = _convert_map[symbol]
+    if currency in _convert_map:
+        currency = _convert_map[currency]
+    try:
+        index = bootstrap_index('../data/bootstrap/index.yaml')
+        transformer = load_transformer('../data/bootstrap/' + index.kraken.transformer)
+        if index.kraken.groups:
+            raise ValueError('Groups are not supported for kraken Loader')
+        filename = index.kraken.name_format.format(symbol=symbol.upper(), currency=currency.upper()) + '.csv'
+        return transformer.get_df('../data/bootstrap/' + index.kraken.zipfile, filename)
+    except Exception as e:
+        print('Exception occurred!    ' + str(e))
+        raise
+
+
+def ticks_to_ohlcv(ticks, interval):
+    resample = ticks.resample(interval)
+    ohlc = resample['price'].ohlc()
+    ohlc['volume'] = resample['amount'].sum()
+    return ohlc
\ No newline at end of file
diff --git a/src/crawlers/kraken.yaml b/src/crawlers/kraken.yaml
new file mode 100644
index 0000000..e677ae6
--- /dev/null
+++ b/src/crawlers/kraken.yaml
@@ -0,0 +1,6 @@
+base_url: 'https://api.kraken.com'
+api_key:  ''
+endpoints:
+  asset_info: '/0/public/Assets?asset={assets}'  # https://docs.kraken.com/rest/#operation/getAssetInfo
+  # OHLC Returns: [int <time>, string <open>, string <high>, string <low>, string <close>, string <vwap>, string <volume>, int <count>]
+  ohlc_data: '/0/public/OHLC?pair={pair}&since={since}'  # https://docs.kraken.com/rest/#operation/getOHLCData
\ No newline at end of file
diff --git a/src/dataset.py b/src/dataset.py
new file mode 100644
index 0000000..10aa304
--- /dev/null
+++ b/src/dataset.py
@@ -0,0 +1,254 @@
+import pandas as pd
+
+TA_CONFIG = {
+    'rsma': [(5, 20), (8, 15), (20, 50)],
+    'rema': [(5, 20), (8, 15), (20, 50)],
+    'macd': [(12, 26)],
+    'ao': [14],
+    'adx': [14],
+    'wd': [14],
+    'ppo': [(12, 26)],
+    'rsi': [14],
+    'mfi': [14],
+    'tsi': None,
+    'stoch': [14],
+    'cmo': [14],
+    'atrp': [14],
+    'pvo': [(12, 26)],
+    'fi': [13, 50],
+    'adi': None,
+    'obv': None
+}
+
+
+def get_feature_metadata(df):
+    feature_indices = []
+    global_first = None
+    global_last = None
+    for c in df.columns:
+        # If all elements are non-NA/null, first_valid_index and last_valid_index return None.
+        # They also return None for empty Series/DataFrame.
+        if df[c].empty:
+            print("Feature {} is empty!".format(c))
+            continue
+        fvi = df[c].first_valid_index() or df[c].index.min()
+        lvi = df[c].last_valid_index() or df[c].index.max()
+        _first = fvi.to_pydatetime()
+        _last = lvi.to_pydatetime()
+        feature_indices.append({
+            'name': str(c),
+            'first': _first.isoformat(),
+            'last': _last.isoformat(),
+            'count': df[c].shape[0],
+            'null': df[c].isna().sum(),
+            'distinct': df[c].value_counts().sum()
+        })
+        if not global_first or _first > global_first:
+            global_first = _first
+        if not global_last or _last < global_last:
+            global_last = _last
+    return global_first.isoformat(), global_last.isoformat(), feature_indices
+
+
+def make_ohlcv_lags(ohlcv, W):
+    from features.lagging import make_lagged
+    lagged_ohlcv = pd.concat(
+        [make_lagged(ohlcv, i) for i in range(1, W + 1)],
+        axis='columns',
+        verify_integrity=True,
+        sort=True,
+        join='inner'
+    )
+    return lagged_ohlcv
+
+
+def make_ohlcv_pct(ohlcv):
+    ohlcv_pct = ohlcv[['open', 'high', 'low', 'close', 'volume']].pct_change()
+    ohlcv_pct.columns = ['{}_pct'.format(c) for c in ohlcv_pct.columns]
+    return ohlcv_pct
+
+
+def make_ohlc_patterns(ohlcv):
+    from features.talib import get_talib_patterns
+    _patterns = get_talib_patterns(ohlcv)
+    ohlc_patterns = pd.DataFrame(index=ohlcv.index)
+    ohlc_patterns['talib_patterns_mean'] = _patterns.mean(axis=1)
+    ohlc_patterns['talib_patterns_sum'] = _patterns.sum(axis=1)
+    return ohlc_patterns
+
+
+def make_ohlc_residual(ohlcv):
+    from features.decompose import get_residual
+    # Residual from STL Decomposition of OHLC data
+    ohlc_residuals = pd.DataFrame()
+    ohlc_residuals['open_resid'] = get_residual(ohlcv.open)
+    ohlc_residuals['high_resid'] = get_residual(ohlcv.high)
+    ohlc_residuals['low_resid'] = get_residual(ohlcv.low)
+    ohlc_residuals['close_resid'] = get_residual(ohlcv.close)
+    return ohlc_residuals
+
+
+def make_ohlc_splines(ohlcv):
+    from features.spline import get_spline
+    # SPLINES
+    # Use SPLINES to extract price information
+    ohlc_splines = pd.DataFrame(index=ohlcv.index)
+    # First derivative indicates slope
+    ohlc_splines['open_spl_d1'] = get_spline(ohlcv.open, 1)
+    ohlc_splines['high_spl_d1'] = get_spline(ohlcv.high, 1)
+    ohlc_splines['low_spl_d1'] = get_spline(ohlcv.low, 1)
+    ohlc_splines['close_spl_d1'] = get_spline(ohlcv.close, 1)
+    # Second derivative indicates convexity
+    ohlc_splines['open_spl_d2'] = get_spline(ohlcv.open, 2)
+    ohlc_splines['high_spl_d2'] = get_spline(ohlcv.high, 2)
+    ohlc_splines['low_spl_d2'] = get_spline(ohlcv.low, 2)
+    ohlc_splines['close_spl_d2'] = get_spline(ohlcv.close, 2)
+    return ohlc_splines
+
+
+def make_ohlcv_stats(ohlcv):
+    from features.ohlcv import ohlcv_resample
+
+    # Relevant stats from OHLC data interpretation
+    ohlcv_stats = pd.DataFrame(index=ohlcv.index)
+    ohlcv_stats['close_open_pct'] = (ohlcv.close - ohlcv.open).pct_change()  # Change in body of the candle (> 0 if candle is green)
+    ohlcv_stats['high_close_dist_pct'] = (ohlcv.high - ohlcv.close).pct_change()  # Change in wick size of the candle, shorter wick should be bullish
+    ohlcv_stats['low_close_dist_pct'] = (ohlcv.close - ohlcv.low).pct_change()  # Change in shadow size of the candle, this increasing would indicate support (maybe a bounce)
+    ohlcv_stats['high_low_dist_pct'] = (ohlcv.high - ohlcv.low).pct_change()  # Change in total candle size, smaller candles stands for low volatility
+    ohlcv_stats['close_volatility_3d'] = ohlcv.close.pct_change().rolling(3).std(ddof=0)
+    ohlcv_stats['close_volatility_7d'] = ohlcv.close.pct_change().rolling(7).std(ddof=0)
+    ohlcv_stats['close_volatility_30d'] = ohlcv.close.pct_change().rolling(30).std(ddof=0)
+
+
+    # Stats from resampled OHLC data
+    for d in [3, 7, 30]:
+        ohlcv_d = ohlcv_resample(ohlcv=ohlcv, period=d, interval='D')
+        ohlcv_stats['close_open_pct_d{}'.format(d)] = (ohlcv_d.close - ohlcv_d.open).pct_change()
+        ohlcv_stats['high_close_dist_pct_d{}'.format(d)] = (ohlcv_d.high - ohlcv_d.close).pct_change()
+        ohlcv_stats['low_close_dist_pct_d{}'.format(d)] = (ohlcv_d.close - ohlcv_d.low).pct_change()
+        ohlcv_stats['high_low_dist_pct_d{}'.format(d)] = (ohlcv_d.high - ohlcv_d.low).pct_change()
+
+    return ohlcv_stats
+
+
+def make_ohlcv_ta(ohlcv):
+    import talib as ta
+    result = pd.DataFrame(index=ohlcv.index)
+    result['adx'] = ta.ADX(ohlcv.high, ohlcv.low, ohlcv.close, timeperiod=14)
+    result['adxr'] = ta.ADXR(ohlcv.high, ohlcv.low, ohlcv.close, timeperiod=14)
+    result['apo'] = ta.APO(ohlcv.close, fastperiod=12, slowperiod=26)
+    aroon_down, aroon_up = ta.AROON(ohlcv.high, ohlcv.low, timeperiod=14)
+    result['aroon_down'] = aroon_down
+    result['aroon_up'] = aroon_up
+    result['aroonosc'] = ta.AROONOSC(ohlcv.high, ohlcv.low, timeperiod=14)
+    result['bop'] = ta.BOP(ohlcv.open, ohlcv.high, ohlcv.low, ohlcv.close)
+    result['cmo'] = ta.CMO(ohlcv.close, timeperiod=14)
+    result['cci'] = ta.CCI(ohlcv.high, ohlcv.low, ohlcv.close, timeperiod=14)
+    result['dx'] = ta.DX(ohlcv.high, ohlcv.low, ohlcv.close, timeperiod=14)
+    macd, macdsignal, macdhist = ta.MACD(ohlcv.close, fastperiod=12, slowperiod=26, signalperiod=9)
+    result['macd'] = macd
+    result['macdsignal'] = macdsignal
+    result['macdhist'] = macdhist
+    result['mfi'] = ta.MFI(ohlcv.high, ohlcv.low, ohlcv.close, ohlcv.volume, timeperiod=14)
+    result['minus_di'] = ta.MINUS_DI(ohlcv.high, ohlcv.low, ohlcv.close, timeperiod=14)
+    result['minus_dm'] = ta.MINUS_DM(ohlcv.high, ohlcv.low, timeperiod=14)
+    result['mom'] = ta.MOM(ohlcv.close, timeperiod=10)
+    result['plus_di'] = ta.MINUS_DI(ohlcv.high, ohlcv.low, ohlcv.close, timeperiod=14)
+    result['plus_dm'] = ta.MINUS_DM(ohlcv.high, ohlcv.low, timeperiod=14)
+    result['ppo'] = ta.PPO(ohlcv.close, fastperiod=12, slowperiod=26, matype=0)
+    result['roc'] = ta.ROC(ohlcv.close, timeperiod=10)
+    result['rocp'] = ta.ROCP(ohlcv.close, timeperiod=10)
+    result['rocr'] = ta.ROCR(ohlcv.close, timeperiod=10)
+    result['rocr100'] = ta.ROCR100(ohlcv.close, timeperiod=10)
+    result['rsi'] = ta.RSI(ohlcv.close, timeperiod=14)
+    slowk, slowd = ta.STOCH(ohlcv.high, ohlcv.low, ohlcv.close, fastk_period=5, slowk_period=3, slowk_matype=0, slowd_period=3, slowd_matype=0)
+    result['stoch_slowk'] = slowk
+    result['stoch_slowd'] = slowd
+    fastk, fastd = ta.STOCHF(ohlcv.high, ohlcv.low, ohlcv.close, fastk_period=5, fastd_period=3, fastd_matype=0)
+    result['stochf_fastk'] = fastk
+    result['stochf_fastd'] = fastd
+    rfastk, rfastd = ta.STOCHRSI(ohlcv.close, timeperiod=14, fastk_period=5, fastd_period=3, fastd_matype=0)
+    result['stochrsi_fastk'] = rfastk
+    result['stochrsi_fastd'] = rfastd
+    result['trix'] = ta.TRIX(ohlcv.close, timeperiod=30)
+    result['ultosc'] = ta.ULTOSC(ohlcv.high, ohlcv.low, ohlcv.close, timeperiod1=7, timeperiod2=14, timeperiod3=28)
+    result['willr'] = ta.WILLR(ohlcv.high, ohlcv.low, ohlcv.close, timeperiod=14)
+    result['atr'] = ta.ATR(ohlcv.high, ohlcv.low, ohlcv.close, timeperiod=14)
+    result['natr'] = ta.NATR(ohlcv.high, ohlcv.low, ohlcv.close, timeperiod=14)
+    result['trange'] = ta.TRANGE(ohlcv.high, ohlcv.low, ohlcv.close)
+    result['ad'] = ta.AD(ohlcv.high, ohlcv.low, ohlcv.close, ohlcv.volume)
+    result['adosc'] = ta.ADOSC(ohlcv.high, ohlcv.low, ohlcv.close, ohlcv.volume, fastperiod=3, slowperiod=10)
+    result['obv'] = ta.OBV(ohlcv.close, ohlcv.volume)
+    # Missing functions from PyTi
+    # Missing only RSMA, REMA, TSI, PVO, FI, ADI
+    from pyti.force_index import force_index
+    from pyti.true_strength_index import true_strength_index
+    from pyti.volume_oscillator import volume_oscillator
+    from pyti.accumulation_distribution import accumulation_distribution
+    from features.technical_indicators import relative_sma, relative_ema
+    result['fi'] = force_index(ohlcv.close, ohlcv.volume)
+    result['tsi'] = true_strength_index(ohlcv.close)
+    result['pvo_12_26'] = volume_oscillator(ohlcv.volume, short_period=12, long_period=26)
+    result['adi'] = accumulation_distribution(ohlcv.close, ohlcv.high, ohlcv.low, ohlcv.volume)
+    result['rsma_3_7'] = relative_sma(ohlcv.close, short=3, long=7)
+    result['rema_3_7'] = relative_ema(ohlcv.close, short=3, long=7)
+    result['rsma_12_26'] = relative_sma(ohlcv.close, short=12, long=26)
+    result['rema_12_26'] = relative_ema(ohlcv.close, short=12, long=26)
+    result['rsma_24_50'] = relative_sma(ohlcv.close, short=24, long=50)
+    result['rema_24_50'] = relative_ema(ohlcv.close, short=24, long=50)
+    return result
+
+
+def make_target(ohlcv):
+    from features.targets import target_price, target_pct, target_class, target_binary, target_binned_class
+    result = pd.DataFrame(index=ohlcv.index)
+    result['price'] = target_price(ohlcv.close)
+    result['pct'] = target_pct(ohlcv.close)
+    result['class'] = target_class(ohlcv.close)
+    result['binary'] = target_binary(ohlcv.close)
+    result['bin_class'] = target_binned_class(ohlcv.close, n_bins=3)
+    result['bin_binary'] = target_binned_class(ohlcv.close, n_bins=2)
+    return result
+
+
+def build(ohlcv: pd.DataFrame, coinmetrics: pd.DataFrame, **kwargs):
+    W = kwargs.get('W', 10)
+    # ATSA - OHLC with 10-lag + TA
+    lagged_ohlcv = make_ohlcv_lags(ohlcv, W)
+
+    # Lagged percent variation of OHLCV
+    ohlcv_pct = make_ohlcv_pct(ohlcv)
+    lagged_ohlcv_pct = make_ohlcv_lags(ohlcv_pct, W)
+    ohlc_patterns = make_ohlc_patterns(ohlcv)
+    # ohlc_splines = make_ohlc_splines(ohlcv)
+    ohlc_residuals = make_ohlc_residual(ohlcv)
+    lagged_ohlc_residuals = make_ohlcv_lags(ohlc_residuals, W=10)
+    ohlcv_stats = make_ohlcv_stats(ohlcv)
+    # from features.technical_indicators import get_ta_features
+    # ta = get_ta_features(ohlcv, TA_CONFIG)
+    ta = make_ohlcv_ta(ohlcv)
+
+    cm_percent = coinmetrics.pct_change(periods=1, fill_method='ffill')
+    cm_percent.columns = [c+'_pct' for c in cm_percent.columns]
+
+    merge_dataframes = [
+        ohlcv, lagged_ohlcv,
+        ohlcv_pct, lagged_ohlcv_pct,
+        ohlc_residuals, lagged_ohlc_residuals,
+        ohlcv_stats,
+        ohlc_patterns,
+        ta,
+        coinmetrics, cm_percent
+    ]
+
+    # Drop columns whose values are all nan or inf from each facet
+    with pd.option_context('mode.use_inf_as_na', True):  # Set option temporarily
+        for _df in merge_dataframes:
+            _df.dropna(axis='columns', how='all', inplace=True)
+    return pd.concat(
+        merge_dataframes,
+        axis='columns',
+        verify_integrity=True,
+        sort=True,
+        join='outer'
+    )
diff --git a/src/features/__init__.py b/src/features/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/features/decompose.py b/src/features/decompose.py
new file mode 100644
index 0000000..f613a2b
--- /dev/null
+++ b/src/features/decompose.py
@@ -0,0 +1,6 @@
+from statsmodels.tsa.seasonal import STL
+import pandas as pd
+
+def get_residual(s: pd.Series):
+    res = STL(s).fit().resid
+    return pd.Series(res, index=s.index)
\ No newline at end of file
diff --git a/src/features/lagging.py b/src/features/lagging.py
new file mode 100644
index 0000000..00a70e2
--- /dev/null
+++ b/src/features/lagging.py
@@ -0,0 +1,10 @@
+import pandas as pd
+from typing import Union
+
+def make_lagged(df: Union[pd.Series, pd.DataFrame], periods=1):
+	shift = df.shift(periods=periods)
+	if hasattr(shift, 'columns'):
+		shift.columns = ['{}_lag{}'.format(c, periods) for c in shift.columns]
+	elif hasattr(shift, 'name'):
+		shift.rename('{}_lag{}'.format(shift.name, periods))
+	return shift
\ No newline at end of file
diff --git a/src/features/ohlcv.py b/src/features/ohlcv.py
new file mode 100644
index 0000000..f0a5198
--- /dev/null
+++ b/src/features/ohlcv.py
@@ -0,0 +1,34 @@
+import pandas as pd
+
+
+def ohlcv_from_ticks(price: pd.Series, volume: pd.Series, interval, **kwargs):
+    ohlc = price.resample(interval).ohlc()
+    volume = volume.resample(interval).sum()
+    volume.name = 'volume'
+    ohlcv = pd.concat([ohlc, volume], axis='columns')
+    return ohlcv
+
+
+def ohlcv_resample(ohlcv: pd.DataFrame, **kwargs):
+    period = int(kwargs.get('period', 7))
+    interval = kwargs.get('interval', 'D')
+    process_fun = kwargs.get('process_fun', lambda x: x)
+    rename_fun = kwargs.get('rename_fun', None)
+    result = []
+    df = ohlcv.sort_index()
+    for i in range(period):
+        _df = df.iloc[i:]
+        nth_day = _df.resample('{}{}'.format(period, interval),
+                               closed='left',
+                               label='right',
+                               convention='end',
+                               kind='timestamp'
+                ).agg({'open': 'first', 'high': 'max', 'low': 'min', 'close': 'last', 'volume': 'sum'}).copy()
+
+        result.append(process_fun(nth_day))
+    _result = pd.concat(result, sort=True).sort_index()
+    if rename_fun:
+        _result.columns = rename_fun([c for c in _result.columns])
+    if kwargs.get('trim', True):
+        _result = _result.loc[ohlcv.first_valid_index():ohlcv.last_valid_index()]
+    return _result
\ No newline at end of file
diff --git a/src/features/spline.py b/src/features/spline.py
new file mode 100644
index 0000000..66f9252
--- /dev/null
+++ b/src/features/spline.py
@@ -0,0 +1,15 @@
+from scipy.interpolate import UnivariateSpline
+import numpy as np
+
+def get_spline(y, nu, degree=3):
+    # The number of data points must be larger than the spline degree k.
+    result = []
+    for i in range(y.shape[0]):
+        if i < degree:
+            result.append(np.nan)
+            continue
+        x_space = np.linspace(0, i, i + 1)
+        _y = y.iloc[0:i + 1]
+        spl = UnivariateSpline(x_space, _y, s=0, k=degree)
+        result.append(spl(i, nu=nu))
+    return result
\ No newline at end of file
diff --git a/src/features/talib.py b/src/features/talib.py
new file mode 100644
index 0000000..8fdcf92
--- /dev/null
+++ b/src/features/talib.py
@@ -0,0 +1,76 @@
+import talib
+import pandas as pd
+
+def get_talib_patterns(ohlcv):
+    candle_names = [
+        'CDL2CROWS',
+        'CDL3BLACKCROWS',
+        'CDL3INSIDE',
+        'CDL3LINESTRIKE',
+        'CDL3OUTSIDE',
+        'CDL3STARSINSOUTH',
+        'CDL3WHITESOLDIERS',
+        'CDLABANDONEDBABY',
+        'CDLADVANCEBLOCK',
+        'CDLBELTHOLD',
+        'CDLBREAKAWAY',
+        'CDLCLOSINGMARUBOZU',
+        'CDLCONCEALBABYSWALL',
+        'CDLCOUNTERATTACK',
+        'CDLDARKCLOUDCOVER',
+        'CDLDOJI',
+        'CDLDOJISTAR',
+        'CDLDRAGONFLYDOJI',
+        'CDLENGULFING',
+        'CDLEVENINGDOJISTAR',
+        'CDLEVENINGSTAR',
+        'CDLGAPSIDESIDEWHITE',
+        'CDLGRAVESTONEDOJI',
+        'CDLHAMMER',
+        'CDLHANGINGMAN',
+        'CDLHARAMI',
+        'CDLHARAMICROSS',
+        'CDLHIGHWAVE',
+        'CDLHIKKAKE',
+        'CDLHIKKAKEMOD',
+        'CDLHOMINGPIGEON',
+        'CDLIDENTICAL3CROWS',
+        'CDLINNECK',
+        'CDLINVERTEDHAMMER',
+        'CDLKICKING',
+        'CDLKICKINGBYLENGTH',
+        'CDLLADDERBOTTOM',
+        'CDLLONGLEGGEDDOJI',
+        'CDLLONGLINE',
+        'CDLMARUBOZU',
+        'CDLMATCHINGLOW',
+        'CDLMATHOLD',
+        'CDLMORNINGDOJISTAR',
+        'CDLMORNINGSTAR',
+        'CDLONNECK',
+        'CDLPIERCING',
+        'CDLRICKSHAWMAN',
+        'CDLRISEFALL3METHODS',
+        'CDLSEPARATINGLINES',
+        'CDLSHOOTINGSTAR',
+        'CDLSHORTLINE',
+        'CDLSPINNINGTOP',
+        'CDLSTALLEDPATTERN',
+        'CDLSTICKSANDWICH',
+        'CDLTAKURI',
+        'CDLTASUKIGAP',
+        'CDLTHRUSTING',
+        'CDLTRISTAR',
+        #'CDLUNIQUE3RIVE',
+        'CDLUPSIDEGAP2CROWS',
+        'CDLXSIDEGAP3METHODS'
+    ]
+    df = pd.DataFrame(index=ohlcv.index)
+    # create columns for each pattern
+    for candle in candle_names:
+        # below is same as;
+        # df["CDL3LINESTRIKE"] = talib.CDL3LINESTRIKE(op, hi, lo, cl)
+        fun = getattr(talib, candle)
+        if fun:
+            df[candle] = fun(ohlcv.open, ohlcv.high, ohlcv.low, ohlcv.close)
+    return df
\ No newline at end of file
diff --git a/src/features/targets.py b/src/features/targets.py
new file mode 100644
index 0000000..dce1fa8
--- /dev/null
+++ b/src/features/targets.py
@@ -0,0 +1,43 @@
+import pandas as pd
+import numpy as np
+from util.discretization import to_discrete_double, to_discrete_single
+from sklearn.preprocessing import KBinsDiscretizer
+
+def target_price(close : pd.Series, **kwargs):
+    return close.shift(-kwargs.get('periods', 1)).fillna(method='ffill')
+
+def target_pct(close : pd.Series, **kwargs):
+    pct_var = pd.Series(np.roll(close.pct_change(periods=kwargs.get('periods', 1)), -kwargs.get('periods', 1)), index=close.index).fillna(method='ffill')
+    return pct_var
+
+def target_class(close : pd.Series, **kwargs):
+    pct_var = target_pct(close, **kwargs)
+    classes = to_discrete_double(pct_var.fillna(method='ffill'), -0.01, 0.01)
+    return pd.Series(classes, index=pct_var.index)
+
+def target_binary(close : pd.Series, **kwargs):
+    pct_var = target_pct(close, **kwargs)
+    classes = to_discrete_single(pct_var.fillna(method='ffill'), 0.00)
+    return pd.Series(classes, index=pct_var.index)
+
+def target_label(classes, **kwargs):
+    _labels = ['SELL', 'HOLD', 'BUY']
+    if 'labels' in kwargs:
+        _labels = kwargs.get('labels')
+    return pd.Series([_labels[int(c)] for c in classes], index=classes.index)
+
+def target_binned_class(close : pd.Series, **kwargs):
+    pct_var = target_pct(close, **kwargs)
+    values = pct_var.replace([np.inf, -np.inf], np.nan).fillna(method='ffill').values
+    values = np.reshape(values, (-1, 1))
+    discretizer = KBinsDiscretizer(n_bins=kwargs.get('n_bins',3), strategy='quantile', encode='ordinal')
+    discrete = discretizer.fit_transform(values)
+    return pd.Series(np.reshape(discrete, (-1,)), index=pct_var.index)
+
+def target_binned_class_kmeans(close : pd.Series, **kwargs):
+    pct_var = target_pct(close, **kwargs)
+    values = pct_var.replace([np.inf, -np.inf], np.nan).fillna(method='ffill').values
+    values = np.reshape(values, (-1, 1))
+    discretizer = KBinsDiscretizer(n_bins=kwargs.get('n_bins',3), strategy='kmeans', encode='ordinal')
+    discrete = discretizer.fit_transform(values)
+    return pd.Series(np.reshape(discrete, (-1,)), index=pct_var.index)
\ No newline at end of file
diff --git a/src/features/technical_indicators.py b/src/features/technical_indicators.py
new file mode 100644
index 0000000..c62c80a
--- /dev/null
+++ b/src/features/technical_indicators.py
@@ -0,0 +1,242 @@
+import numpy as np
+import pandas as pd
+from pyti.exponential_moving_average import exponential_moving_average
+from pyti.simple_moving_average import simple_moving_average
+from pyti.moving_average_convergence_divergence import moving_average_convergence_divergence
+from pyti.aroon import aroon_up, aroon_down, aroon_oscillator
+from pyti.directional_indicators import (average_directional_index, positive_directional_index, negative_directional_index)
+from pyti.price_oscillator import price_oscillator
+from pyti.relative_strength_index import relative_strength_index
+from pyti.money_flow_index import money_flow_index
+from pyti.stochastic import percent_k
+from pyti.chande_momentum_oscillator import chande_momentum_oscillator
+from pyti.average_true_range_percent import average_true_range_percent
+from pyti.volume_oscillator import volume_oscillator
+from pyti.accumulation_distribution import accumulation_distribution
+from pyti.on_balance_volume import on_balance_volume
+from pyti.force_index import force_index
+from pyti.true_strength_index import true_strength_index
+from pyti.function_helper import fill_for_noncomputable_vals
+from pyti.bollinger_bands import percent_b, upper_bollinger_band, middle_bollinger_band, lower_bollinger_band
+from pyti.volatility import volatility
+from pyti import catch_errors
+import warnings
+import logging
+
+
+def relative_sma(data, short, long):
+		sma_short = simple_moving_average(data, period=short)
+		sma_long = simple_moving_average(data, period=long)
+		with warnings.catch_warnings():
+			warnings.simplefilter("ignore", category=RuntimeWarning)
+			smadiff = sma_short - sma_long
+			rsma = np.divide(smadiff, sma_long)
+		return fill_for_noncomputable_vals(data, rsma)
+
+def relative_ema(data, short, long):
+	ema_short = exponential_moving_average(data, period=short)
+	ema_long = exponential_moving_average(data, period=long)
+	with warnings.catch_warnings():
+		warnings.simplefilter("ignore", category=RuntimeWarning)
+		emadiff = ema_short - ema_long
+		rema = np.divide(emadiff, ema_long)
+	return fill_for_noncomputable_vals(data, rema)
+
+def percent_k_pr(high_data, low_data, close_data, period):
+	"""
+	%K.
+	Formula:
+	%k = data(t) - low(n) / (high(n) - low(n))
+	"""
+	# print (len(high_data))
+	# print (period)
+	catch_errors.check_for_period_error(high_data, period)
+	catch_errors.check_for_period_error(low_data, period)
+	catch_errors.check_for_period_error(close_data, period)
+	percent_k = [((close_data[idx] - np.min(low_data[idx+1-period:idx+1])) /
+		 (np.max(high_data[idx+1-period:idx+1]) -
+		  np.min(low_data[idx+1-period:idx+1]))) for idx in range(period-1, len(close_data))]
+	percent_k = fill_for_noncomputable_vals(close_data, percent_k)
+
+	return percent_k
+
+def _get_ta_features(high, low, close, volume, desc):
+	"""
+	Returns a dict containing the technical analysis indicators calculated on the given
+	high, low, close and volumes.
+	"""
+	ta = {}
+
+	# Set numpy to ignore division error and invalid values (since not all features are complete)
+	old_settings = np.seterr(divide='ignore', invalid='ignore')
+	record_count = len(close)
+
+	# Determine relative moving averages
+	for _short, _long in desc['rsma']:
+		if record_count < _short or record_count < _long:
+			logging.error("get_ta_features: not enough records for rsma (short={}, long={}, records={})"
+						 .format(_short, _long, record_count))
+			continue
+		ta['rsma_{}_{}'.format(_short, _long)] = relative_sma(close, _short, _long)
+	for _short, _long in desc['rema']:
+		if record_count < _short or record_count < _long:
+			logging.error("get_ta_features: not enough records for rema (short={}, long={}, records={})"
+						 .format(_short, _long, record_count))
+			continue
+		ta['rema_{}_{}'.format(_short, _long)] = relative_ema(close, _short, _long)
+
+	# MACD Indicator
+	if 'macd' in desc:
+		for _short, _long in desc['macd']:
+			if record_count < _short or record_count < _long:
+				logging.error("get_ta_features: not enough records for rema (short={}, long={}, records={})"
+							 .format(_short, _long, record_count))
+				continue
+			ta['macd_{}_{}'.format(_short, _long)] = moving_average_convergence_divergence(close, _short, _long)
+
+	# Aroon Indicator
+	if 'ao' in desc:
+		for _period in desc['ao']:
+			if record_count < _period:
+				logging.error("get_ta_features: not enough records for ao (period={}, records={})"
+							 .format(_period, record_count))
+				continue
+			ta['ao_{}'.format(_period)] = aroon_oscillator(close, _period)
+
+	# Average Directional Movement Index (ADX)
+	if 'adx' in desc:
+		for _period in desc['adx']:
+			if record_count < _period:
+				logging.error("get_ta_features: not enough records for adx (period={}, records={})"
+							 .format(_period, record_count))
+				continue
+			ta['adx_{}'.format(_period)] = average_directional_index(close, high, low, _period)
+
+	# Difference between Positive Directional Index(DI+) and Negative Directional Index(DI-)
+	if 'wd' in desc:
+		for _period in desc['wd']:
+			if record_count < _period:
+				logging.error("get_ta_features: not enough records for wd (period={}, records={})"
+							 .format(_period, record_count))
+				continue
+			ta['wd_{}'.format(_period)] = \
+				positive_directional_index(close, high, low, _period) \
+				- negative_directional_index(close, high, low, _period)
+
+	# Percentage Price Oscillator
+	if 'ppo' in desc:
+		for _short, _long in desc['ppo']:
+			if record_count < _short or record_count < _long:
+				logging.error("get_ta_features: not enough records for ppo (short={}, long={}, records={})"
+							 .format(_short, _long, record_count))
+				continue
+			ta['ppo_{}_{}'.format(_short, _long)] = price_oscillator(close, _short, _long)
+
+	# Relative Strength Index
+	if 'rsi' in desc:
+		for _period in desc['rsi']:
+			if record_count < _period:
+				logging.error("get_ta_features: not enough records for rsi (period={}, records={})"
+							 .format(_period, record_count))
+				continue
+			ta['rsi_{}'.format(_period)] = relative_strength_index(close, _period)
+
+	# Money Flow Index
+	if 'mfi' in desc:
+		for _period in desc['mfi']:
+			if record_count < _period:
+				logging.error("get_ta_features: not enough records for mfi (period={}, records={})"
+							 .format(_period, record_count))
+				continue
+			ta['mfi_{}'.format(_period)] = money_flow_index(close, high, low, volume, _period)
+
+	# True Strength Index
+	if 'tsi' in desc and len(close) >= 40:
+		if record_count < 40:
+			logging.error("get_ta_features: not enough records for tsi (period={}, records={})"
+						 .format(40, record_count))
+		else:
+			ta['tsi'] = true_strength_index(close)
+
+	if 'boll' in desc:
+		for _period in desc['stoch']:
+			if record_count < _period:
+				logging.error("get_ta_features: not enough records for boll (period={}, records={})"
+							 .format(_period, record_count))
+				continue
+			ta['boll_{}'.format(_period)] = percent_b(close, _period)
+
+	# Stochastic Oscillator
+	if 'stoch' in desc:
+		for _period in desc['stoch']:
+			if record_count < _period:
+				logging.error("get_ta_features: not enough records for stoch (period={}, records={})"
+							 .format(_period, record_count))
+				continue
+			ta['stoch_{}'.format(_period)] = percent_k(close, _period)
+	# ta.py['stoch'] = percent_k(high, low, close, 14)
+
+	# Chande Momentum Oscillator
+	## Not available in ta.py
+	if 'cmo' in desc:
+		for _period in desc['cmo']:
+			if record_count < _period:
+				logging.error("get_ta_features: not enough records for cmo (period={}, records={})"
+							 .format(_period, record_count))
+				continue
+			ta['cmo_{}'.format(_period)] = chande_momentum_oscillator(close, _period)
+
+	# Average True Range Percentage
+	if 'atrp' in desc:
+		for _period in desc['atrp']:
+			if record_count < _period:
+				logging.error("get_ta_features: not enough records for atrp (period={}, records={})"
+							 .format(_period, record_count))
+				continue
+			ta['atrp_{}'.format(_period)] = average_true_range_percent(close, _period)
+
+	# Percentage Volume Oscillator
+	if 'pvo' in desc:
+		for _short, _long in desc['pvo']:
+			if record_count < _short or record_count < _long:
+				logging.error("get_ta_features: not enough records for pvo (short={}, long={}, records={})"
+							 .format(_short, _long, record_count))
+				continue
+			ta['pvo_{}_{}'.format(_short, _long)] = volume_oscillator(volume, _short, _long)
+
+	# Force Index
+	if 'fi' in desc:
+		fi = force_index(close, volume)
+		for _period in desc['fi']:
+			if record_count < _period:
+				logging.error("get_ta_features: not enough records for fi (period={}, records={})"
+							 .format(_period, record_count))
+				continue
+			ta['fi_{}'.format(_period)] = exponential_moving_average(fi, _period)
+
+	# Accumulation Distribution Line
+	if 'adi' in desc:
+		ta['adi'] = accumulation_distribution(close, high, low, volume)
+
+	# On Balance Volume
+	if 'obv' in desc:
+		ta['obv'] = on_balance_volume(close, volume)
+
+	# Restore numpy error settings
+	np.seterr(**old_settings)
+
+	return ta
+
+def get_ta_features(ohlcv: pd.DataFrame, indicators: dict):
+	ta = _get_ta_features(
+		ohlcv['high'].values,
+		ohlcv['low'].values,
+		ohlcv['close'].values,
+		ohlcv['volume'].values,
+		indicators
+	)
+
+	result = pd.DataFrame(index=ohlcv.index)
+	for k in ta.keys():  # Keys are the same both for 'ta' and 'dta'
+		result[k] = ta[k]
+	return result
\ No newline at end of file
diff --git a/src/featurestore.py b/src/featurestore.py
new file mode 100644
index 0000000..5f5db93
--- /dev/null
+++ b/src/featurestore.py
@@ -0,0 +1,57 @@
+from sqlalchemy import create_engine
+from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import sessionmaker, scoped_session
+from sqlalchemy.orm.session import Session
+from config import config
+
+engine = None
+Base = declarative_base()
+# Create missing tables
+db_session = None
+
+def get_engine_uri():
+    return config['database']['sql']['uri'].get(str)
+
+def init_engine(**kwargs):
+    uri = get_engine_uri()
+    if uri.startswith('sqlite://'):
+      kwargs.update({'connect_args': {'check_same_thread': False}})
+    engine = create_engine(uri, **kwargs)
+    return engine
+
+
+def save_df(storename: str, store):
+    engine = init_engine()
+    with engine.connect() as conn:
+        store.to_sql(name=storename, con=conn, chunksize=config['database']['sql']['chunksize'].get(int), method = 'multi', if_exists='replace')
+
+
+def load_df(name, features, begin=None, end=None):
+    columns = ['timestamp']
+    if type(features) is list:
+         columns += features
+    elif type(features) is str:
+        columns += [features if not ',' in features else features.split(',')]
+    elif not features:
+        columns = ['*']
+    else:
+        raise ValueError('Features must either be a list or a comma-separed string')
+
+    engine = init_engine()
+    with engine.connect() as conn:
+        import pandas as pd
+        query = 'SELECT {columns} FROM {name}'.format(columns=','.join(columns), name=name)
+        if begin or end:
+            query += ' WHERE '
+            if begin and end:
+                query += 'timestamp >= {begin} AND timestamp <= {end}'.format(begin=begin, end=end)
+            if begin and not end:
+                query += 'timestamp >= {begin}'.format(begin=begin)
+            if not begin and end:
+                query += 'timestamp <= {end}'.format(end=end)
+        query += ' ORDER BY timestamp ASC'
+
+        return pd.read_sql_query(query, con=conn, chunksize=config['database']['sql']['chunksize'].get(int), parse_dates=True, index_col='timestamp')
+
+
+
diff --git a/src/util/__init__.py b/src/util/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/util/bunch.py b/src/util/bunch.py
new file mode 100644
index 0000000..74a13c3
--- /dev/null
+++ b/src/util/bunch.py
@@ -0,0 +1,53 @@
+class Bunch(dict):
+    """Container object exposing keys as attributes.
+    Bunch objects are sometimes used as an output for functions and methods.
+    They extend dictionaries by enabling values to be accessed by key,
+    `bunch["value_key"]`, or by an attribute, `bunch.value_key`.
+    Examples
+    --------
+    # >>> from utils import Bunch
+    # >>> b = Bunch(a=1, b=2)
+    # >>> b['b']
+    # 2
+    # >>> b.b
+    # 2
+    # >>> b.a = 3
+    # >>> b['a']
+    # 3
+    # >>> b.c = 6
+    # >>> b['c']
+    # 6
+    """
+
+    def __init__(self, **kwargs):
+        super().__init__(kwargs)
+
+    def __setattr__(self, key, value):
+        if type(value) is dict:
+            self[key] = Bunch(**value) # Made this mofo recursive :-)
+        self[key] = value
+
+    def __dir__(self):
+        return self.keys()
+
+    def __getattr__(self, key):
+        try:
+            if type(self[key]) is dict:
+                return Bunch(**self[key])
+            return self[key]
+        except KeyError:
+            raise AttributeError(key)
+
+    def to_dict(self):
+        return dict(**self)
+
+    def __setstate__(self, state):
+        # Bunch pickles generated with scikit-learn 0.16.* have an non
+        # empty __dict__. This causes a surprising behaviour when
+        # loading these pickles scikit-learn 0.17: reading bunch.key
+        # uses __dict__ but assigning to bunch.key use __setattr__ and
+        # only changes bunch['key']. More details can be found at:
+        # https://github.com/scikit-learn/scikit-learn/issues/6196.
+        # Overriding __setstate__ to be a noop has the effect of
+        # ignoring the pickled __dict__
+        pass
diff --git a/src/util/discretization.py b/src/util/discretization.py
new file mode 100644
index 0000000..1a318ed
--- /dev/null
+++ b/src/util/discretization.py
@@ -0,0 +1,29 @@
+import numpy as np
+
+
+def to_discrete_single(values, threshold):
+    def _to_discrete(x, threshold):
+        if np.isnan(x):
+            return -1
+        if x < threshold:
+            return 0
+        return 1
+
+    fun = np.vectorize(_to_discrete)
+    return fun(values, threshold)
+
+
+def to_discrete_double(values, threshold_lo=0.01, threshold_hi=0.01, classes = None):
+    if not classes:
+        classes = [0,1,2]
+    def _to_discrete(x, threshold_lo, threshold_hi):
+        if np.isnan(x):
+            return -1
+        if x <= threshold_lo:
+            return classes[0]
+        elif threshold_lo < x < threshold_hi:
+            return classes[1]
+        else:
+            return classes[2]
+    fun = np.vectorize(_to_discrete)
+    return fun(values, threshold_lo, threshold_hi)
\ No newline at end of file
diff --git a/src/util/selection_pipeline.py b/src/util/selection_pipeline.py
new file mode 100644
index 0000000..fdf19ec
--- /dev/null
+++ b/src/util/selection_pipeline.py
@@ -0,0 +1,24 @@
+from sklearn.pipeline import Pipeline as SklearnPipeline
+
+# This class exposes coef_ and feature_importances_ from a pipeline in order to
+#  use it for feature selection in wrapper methods such as RFECV or SelectFromModel
+class Pipeline(SklearnPipeline):
+    train_x = None
+    train_y = None
+    is_fit = False
+
+    def fit(self, X, y=None, **fit_params):
+        # Keep record of training datasets
+        self.train_x = X,
+        self.train_y = y
+
+        super(Pipeline, self).fit(X, y, **fit_params)
+        # We're assuming classifier is the last element of the pipeline
+        clf = self.steps[-1][-1]
+        if hasattr(clf, 'coef_'):
+            self.coef_ = clf.coef_
+        if hasattr(clf, 'feature_importances_'):
+            self.feature_importances_ = clf.feature_importances_
+
+        self.is_fit = True
+        return self
\ No newline at end of file