diff --git a/README.md b/README.md index 8b515c9..7239917 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,22 @@ $ python savsnet/plot_ts_GP.py --help for further details. +Spatial mapping of cases +------------------------ + +The `savsnet/logistic2D` module contains functions for spatial smoothing of case occurrence at point locations, using an inducing point approximation to a logistic geostatistical model with stationary mean and Matérn ($k=3/2$) covariance function. This is called by the `logisticKrige.py` Python script such as: +```bash +$ python logistic_krige.py -i 5000 -s '2020-03-04' -p gadm36_GBR.gpkg myData.csv +``` +where `myData.csv` is a CSV file containing at least the headings `consult_date` (ISO date format), `person_easting` and `person_northing` (in rectangular coordinates), and `case` (1 or 0 denoting positive or negative for a given condition). The script runs the logistic geostatistic model, writes the posterior to a Python pickle file and the posterior mean to a GeoTIFF file. + +See +```bash +$ python logisticKrige.py --help +``` +for information on further arguments. + + License ------- diff --git a/logistic_krige.py b/logistic_krige.py new file mode 100644 index 0000000..5a97612 --- /dev/null +++ b/logistic_krige.py @@ -0,0 +1,68 @@ +""" Performs logistic Kriging """ + +import sys +import argparse +import pickle as pkl +import numpy as np +import pandas as pd +import geopandas as gp +import pymc3 as pm + +from savsnet.logistic2D import logistic2D +from savsnet.gis_util import gen_raster, make_masked_raster, raster2coords + + +def get_mean(x): + return np.mean(x, axis=0) + + +def get_exceed(x): + p = np.sum(x > 0., axis=0)/x.shape[0] + return ((p < 0.05) | (p > 0.95))*255. + + +if __name__ == '__main__': + import argparse + import pickle as pkl + import pandas as pd + + parser = argparse.ArgumentParser(description='Fit Binomial GP timeseries model') + parser.add_argument("data", nargs=1, type=str, + help="Input data file with (at least) columns consult_date, gi_mpc_selected, Easting, Northing") + parser.add_argument("--iterations", "-i", type=int, default=[5000], nargs=1, + help="Number of MCMC iterations") + parser.add_argument("--startdate", "-s", type=np.datetime64, nargs=1, help="Start date") + parser.add_argument("--period", "-d", type=int, default=[7], nargs=1, help="Period in days") + parser.add_argument("--polygon", "-p", type=str, nargs=1, help="Polygon file") + args = parser.parse_args() + + data = pd.read_csv(args.data[0]) + data['consult_date'] = pd.to_datetime(data['consult_date']) + period = pd.to_timedelta(args.period[0], unit='day') + + start = args.startdate[0] + end = start + period + + d = data[(start <= data['consult_date']) & (data['consult_date'] < end)] + + d = d.groupby([d['person_easting'], d['person_northing']]) + aggr = d['gi_mpc_selected'].agg([['case', lambda x: np.sum(x) > 0]]) + aggr.reset_index(inplace=True) + + y = np.array(aggr['case']) + coords = np.array(aggr[['person_easting', 'person_northing']], dtype='float32') + knots = pm.gp.util.kmeans_inducing_points(300, coords) + + # GIS + poly = gp.read_file(args.polygon[0]).to_crs(epsg=27700) + r = make_masked_raster(poly, 5000., crs='+init=epsg:27700') + pred_points = raster2coords(r) + + sampler = logistic2D(y, coords=coords/1000., knots=knots/1000., pred_coords=pred_points/1000.) + result = sampler(args.iterations[0], chains=1) + with open(f"posterior_week{start}.pkl", 'wb') as f: + pkl.dump(result, f) + + gen_raster(result['posterior']['s_star'], poly, filename=f"raster_{start}.tiff", + summary=[['mean', get_mean], ['exceed', get_exceed]]) + sys.exit(0) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..60fd778 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +numpy +matplotlib +rasterio +geopandas +pandas +theano +pymc3 diff --git a/savsnet/gis_util.py b/savsnet/gis_util.py new file mode 100644 index 0000000..5696677 --- /dev/null +++ b/savsnet/gis_util.py @@ -0,0 +1,82 @@ +# Raster generator +import numpy as np +import rasterio +import rasterio.mask +from rasterio.io import MemoryFile +from rasterio.transform import from_origin +from rasterio.plot import show +import matplotlib.pyplot as plt + +def make_masked_raster(polygon, resolution, bands=1, all_touched=False, + nodata=-9999., crs='+init=epsg:27700', filename=None): + """Generates a raster with points outside poly set to nodata. Pixels + are set to be of dimension res x res""" + x = np.arange(polygon.bounds['minx'].values, polygon.bounds['maxx'].values, resolution) + y = np.arange(polygon.bounds['miny'].values, polygon.bounds['maxy'].values, resolution) + X,Y = np.meshgrid(x,y) + Z = np.full(X.shape, 1.) + transform = from_origin(x[0] - resolution/2, y[-1]+resolution/2, resolution, resolution) + + raster_args = {'driver': 'GTiff', + 'height': Z.shape[0], + 'width': Z.shape[1], + 'count': bands, + 'dtype': Z.dtype, + 'crs': polygon.crs, + 'transform': transform, + 'nodata': nodata} + + if filename is None: + memfile = MemoryFile() + raster = memfile.open(**raster_args) + else: + raster = rasterio.open(filename, 'w+', **raster_args) + + for i in range(bands): + raster.write(Z, i+1) + + mask = rasterio.mask.mask(raster, polygon.geometry, crop=True, all_touched=all_touched, filled=True) + for i in range(bands): + raster.write(mask[0][i], i+1) + + return raster + + +def raster2coords(raster): + x, y = np.meshgrid(np.arange(raster.shape[0]), np.arange(raster.shape[1])) + coords = np.array([raster.xy(x, y) for x, y in zip(x.ravel(), y.ravel())]) + return coords[raster.read(1).T.ravel()!=raster.nodata, :] + + +def fill_raster(data, raster, band=1): + r = raster.read(band).T.flatten() + r[r!=raster.nodata] = data + r = r.reshape([raster.shape[1], raster.shape[0]]).T + raster.write(r, band) + + +def gen_raster(posterior, poly, filename=None, + summary=[['s_star', lambda x: np.mean(x, axis=0)]]): + raster = make_masked_raster(polygon=poly, resolution=5000., + bands=len(summary), filename=filename) + + for i, (name, f) in enumerate(summary): + fill_raster(f(posterior), + raster, i+1) + raster.update_tags(i+1, surface=name) + + raster.close() + return raster + + +def plot_raster(ax, raster, transform, title=None, z_range=[None, None], log=False, alpha=1.0): + + show(raster, transform=transform, vmin=z_range[0], vmax=z_range[1], ax=ax, cmap='coolwarm', alpha=alpha) + ax.set_title(title) + ax.axis('off') + cb = plt.colorbar(ax.images[1], ax=ax, shrink=0.7) + if log is True: + ticks = cb.get_ticks() + cb.set_ticks(ticks) + cb.set_ticklabels(np.round(np.exp(ticks), 1)) + return ax, cb diff --git a/savsnet/logistic2D.py b/savsnet/logistic2D.py new file mode 100644 index 0000000..62d62f3 --- /dev/null +++ b/savsnet/logistic2D.py @@ -0,0 +1,144 @@ +"""Functions to perform 2D logistic kriging""" + +import sys + +import geopandas as gp +import numpy as np +import pymc3 as pm +import theano as t +import theano.tensor as tt +from pymc3.gp.util import stabilize +from theano.tensor.nlinalg import matrix_inverse + +from savsnet.gis_util import make_masked_raster, raster2coords +from savsnet.raster import make_raster + + +class Data: + def __init__(self, y, coords_obs, coords_knots): + self.y = np.squeeze(np.array(y)) + self.coords = np.array(coords_obs) + self.knots = np.array(coords_knots) + assert y.shape[0] == coords_obs.shape[0] + assert self.coords.shape[1] == 2 + assert self.knots.shape[1] == 2 + + def __len__(self): + return self.y.shape[0] + + +def project(x_val, x_coords, x_star_coords, cov_fn): + """Projects a Gaussian process defined by `x_val` at `x_coords` + onto a set of coordinates `x_star_coords`. + :param x_val: values of the GP at `x_coords` + :param x_coords: a set of coordinates for each `x_val` + :param x_star_coords: a set of coordinates onto which to project the GP + :param cov_fn: a covariance function returning a covariance matrix given a set of coordinates + :returns: a vector of projected values at `x_star_coords` + """ + kxx = cov_fn(x_coords) + kxxtx = matrix_inverse(stabilize(kxx)) + kxxs = tt.dot(kxxtx, x_val) + knew = cov_fn(x_star_coords, x_coords) + return tt.dot(knew, kxxs) + + +def logistic2D(y, coords, knots, pred_coords): + """Returns an instance of a logistic geostatistical model with + Matern32 covariance. + + Let $y_i, i=1,\dots,n$ be a set of binary observations at locations $x_i, i=1,\dots,n$. + We model $y_i$ as + $$ + y_i \sim Bernoulli(p_i) + $$ + with + $$ + \mbox{logit}(p_i) = \alpha + S(x_i). + $$ + + $S(x_i)$ is a latent Gaussian process defined as + $$ + S(\bm{x}) \sim \mbox{MultivariateNormal}(\bm{0}, \Sigma^2) + $$ + where + $$ + \Sigma_{ij}^2 = \sigma^2 \left(1 + \frac{\sqrt{3(x - x')^2}}{\ell}\right) + \mathrm{exp}\left[ - \frac{\sqrt{3(x - x')^2}}{\ell} \right] + $$ + + The model evaluates $S(x_i)$ approximately using a set of inducing points $x^\star_i, i=1,\dots,m$ for $m$ auxilliary locations. See [Banerjee \emph{et al.} (2009)](https://dx.doi.org/10.1111%2Fj.1467-9868.2008.00663.x) for further details. + + :param y: a vector of binary outcomes {0, 1} + :param coords: a matrix of coordinates of `y` of shape `[n, d]` for `d`-dimensions and `n` observations. + :param knots: a matrix of inducing point coordinates of shape `[m, d]`. + :param pred_coords: a matrix of coordinates at which predictions are required. + :returns: a dictionary containing the PyMC3 `model`, and the `posterior` PyC3 `Multitrace` object. + """ + model = pm.Model() + with model: + alpha = pm.Normal('alpha', mu=0., sd=1.) + sigma_sq = pm.Gamma('sigma_sq', 1., 1.) + phi = pm.Gamma('phi', 2., 0.1) + + spatial_cov = sigma_sq * pm.gp.cov.Matern32(2, phi) + spatial_gp = pm.gp.Latent(cov_func=spatial_cov) + s = spatial_gp.prior('s', X=knots) + s_star_ = pm.Deterministic('s_star', project(s, knots, pred_coords, spatial_cov)) + + eta = alpha + project(s, knots, coords, spatial_cov) + y_rv = pm.Bernoulli('y', p=pm.invlogit(eta), observed=y) + + + def sample_fn(*args, **kwargs): + with model: + trace = pm.sample(*args, **kwargs) + return {'model': model, 'posterior': trace} + + return sample_fn + + +if __name__ == '__main__': + import argparse + import pickle as pkl + import pandas as pd + + parser = argparse.ArgumentParser(description='Fit Binomial GP timeseries model') + parser.add_argument("data", nargs=1, type=str, + help="Input data file with (at least) columns consult_date, gi_mpc_selected, Easting, Northing") + parser.add_argument("--startdate", "-s", type=np.datetime64, nargs=1, help="Start date") + parser.add_argument("--period", "-p", type=int, default=[7], nargs=1, help="Period in days") + parser.add_argument("--iterations", "-i", type=int, default=[5000], nargs=1, + help="Number of MCMC iterations") + args = parser.parse_args() + + data = pd.read_csv(args.data[0]) + data['consult_date'] = pd.to_datetime(data['consult_date']) + week = pd.to_timedelta(args.period[0], unit='day') + + start = args.startdate[0] + + end = start + week + d = data[(start <= data['consult_date']) & (data['consult_date'] < end)] + + d = d.groupby([d['person_easting'], d['person_northing']]) + aggr = d['gi_mpc_selected'].agg([['case', lambda x: np.sum(x) > 0]]) + aggr.reset_index(inplace=True) + + y = np.array(aggr['case']) + coords = np.array(aggr[['person_easting', 'person_northing']], dtype='float32') + knots = pm.gp.util.kmeans_inducing_points(300, coords) + + # GIS + poly = gp.read_file('/home/jewellcp/Documents/GIS/gadm36_GBR.gpkg').to_crs(epsg=27700) + r = make_masked_raster(poly, 5000., crs='+init=epsg:27700') + pred_points = raster2coords(r) + + sampler = logistic2D(y, coords=coords/1000., knots=knots/1000., pred_coords=pred_points/1000.) + result = sampler(args.iterations[0], chains=1) + with open(f"vom_dog_spatial_week{start}.pkl", 'wb') as f: + pkl.dump(result, f) + + start = end + + sys.exit(0) diff --git a/savsnet/plot_ts_gp.py b/savsnet/plot_ts_gp.py index 9d61c64..de677c2 100644 --- a/savsnet/plot_ts_gp.py +++ b/savsnet/plot_ts_gp.py @@ -5,12 +5,16 @@ from matplotlib.ticker import FormatStrFormatter import pandas as pd import numpy as np -import pymc3 as pm import pickle -from prev_ts_GP import extractData -def plotPrediction(ax,X,y,N,pred,mindate,lag=None,prev_mult=1,plot_gp=False): +def clip_to_range(x, datetime_range): + mintime = np.min(datetime_range) + maxtime = np.max(datetime_range) + return x[(x.index >= mintime) & (x.index <= maxtime)] + + +def plotPrediction(ax, data, prev_mult=1, plot_gp=False): """Predictive time series plot with (by default) the prediction summarised as [0.01,0.05,0.5,0.95,0.99] quantiles, and observations colour-coded by tail-probability. @@ -32,23 +36,17 @@ def plotPrediction(ax,X,y,N,pred,mindate,lag=None,prev_mult=1,plot_gp=False): Nothing. Just modifies ax """ - from pymc3.gp.util import plot_gp_dist + # Prediction quantiles + phat = data['pred'].transpose() * prev_mult # TxM for T times and M mcmc iterations + pctiles = np.percentile(phat, [1, 5, 50, 95, 99], axis=1) - # Time slice - ts = slice(0,X.shape[0]) - if lag is not None: - ts = slice(X.shape[0]-lag, X.shape[0]) - # Data - x = np.array([mindate + pd.Timedelta(d,unit='D') for d in X[ts]]) - pbar = np.array(y/N)[ts] * prev_mult - - # Prediction quantiles - phat = pm.invlogit(pred[:,ts]).eval() * prev_mult - pctiles = np.percentile(phat, [1,5,50,95,99], axis=0) + pbar = data['data']['cases']/data['data']['N'] * prev_mult + pbar = clip_to_range(pbar, phat.index) # Tail probabilities for observed p - prp = np.sum(pbar > phat, axis=0)/phat.shape[0] + phat_interp = phat.apply(lambda x: np.interp(pbar.index, phat.index, x)) + prp = np.sum(pbar[:, None] > phat_interp, axis=1)/phat_interp.shape[1] prp[prp > .5] = 1. - prp[prp > .5] # Risk masks @@ -58,17 +56,33 @@ def plotPrediction(ax,X,y,N,pred,mindate,lag=None,prev_mult=1,plot_gp=False): # Construct plot if plot_gp is True: - plot_gp_dist(ax,phat,x,plot_samples=False, palette="Blues") + from pymc3.gp.util import plot_gp_dist + plot_gp_dist(ax,phat,phat.columns,plot_samples=False, palette="Blues") else: - ax.fill_between(x, pctiles[4,:], pctiles[0,:], color='lightgrey',alpha=.5,label="99% credible interval") - ax.fill_between(x, pctiles[3,:], pctiles[1,:], color='lightgrey',alpha=1,label='95% credible interval') - ax.plot(x, pctiles[2,:], c='grey', ls='-', label="Predicted prevalence") - ax.scatter(x[green],pbar[green],c='green',s=8,alpha=0.5,label='0.05
0 else [np.nan] + return np.concatenate([indexof(xx, y) for xx in x]) + + +def BinomGP(y, N, time, time_pred, mcmc_iter, start={}): """Fits a logistic Gaussian process regression timeseries model. Details @@ -31,6 +41,7 @@ def BinomGP(y, N, X, X_star,mcmc_iter,start={}): y -- a vector of cases N -- a vector of number tested X -- a vector of times at which y is observed + T -- a vector of time since recruitment X_star -- a vector of times at which predictons are to be made start -- a dictionary of starting values for the MCMC. @@ -38,46 +49,48 @@ def BinomGP(y, N, X, X_star,mcmc_iter,start={}): ======= A tuple of (model,trace,pred) where model is a PyMC3 Model object, trace is a PyMC3 Multitrace object, and pred is a 5000 x X_star.shape[0] matrix of draws from the posterior predictive distribution of $\pi_t$. """ - - X = np.array(X)[:,None] # Inputs must be arranged as column vector - X_star = X_star[:,None] - + time = np.array(time)[:, None] # Inputs must be arranged as column vector + offset = np.mean(time) + time = time - offset # Center time model = pm.Model() with model: - - alpha = pm.Normal('alpha',0, 1000, testval=0.) - beta = pm.Normal('beta',0, 100, testval=0.) - sigmasq_s = pm.Gamma('sigmasq_s',.1,.1,testval=0.1) - phi_s = pm.Gamma('phi_s', 1., 1., testval=0.5) - tau2 = pm.Gamma('tau2',.1,.1,testval=0.1) + alpha = pm.Normal('alpha', 0, 1000, testval=0.) + beta = pm.Normal('beta', 0, 100, testval=0.) + sigmasq_s = pm.HalfNormal('sigmasq_s', 5., testval=0.1) + phi_s = 0.16 #pm.HalfNormal('phi_s', 5., testval=0.5) + tau2 = pm.Gamma('tau2', .1, .1, testval=0.1) # Construct GPs - cov_s = sigmasq_s * pm.gp.cov.Periodic(1,365.,phi_s) - mean_f = pm.gp.mean.Linear(coeffs=beta, intercept=alpha) - gp_s = pm.gp.Latent(mean_func=mean_f,cov_func=cov_s) - + cov_t = sigmasq_s * pm.gp.cov.Periodic(1, 365., phi_s) + mean_t = pm.gp.mean.Linear(coeffs=beta, intercept=alpha) + gp_period = pm.gp.Latent(mean_func=mean_t, cov_func=cov_t) + cov_nugget = pm.gp.cov.WhiteNoise(tau2) - nugget = pm.gp.Latent(cov_func=cov_nugget) + gp_nugget = pm.gp.Latent(cov_func=cov_nugget) + + gp_t = gp_period + gp_nugget + s = gp_t.prior('gp_t', X=time[:, None]) - gp = gp_s + nugget - model.gp = gp - s = gp.prior('s',X=X) - Y_obs = pm.Binomial('y_obs', N, pm.invlogit(s), observed=y) # Sample trace = pm.sample(mcmc_iter, chains=1, - start=start) - # Predictions - s_star = gp.conditional('s_star', X_star) - pred = pm.sample_ppc(trace, vars=[s_star,Y_obs]) + start=start, + tune=1000, + adapt_step_size=True) + + # Prediction + time_pred -= offset + s_star = gp_t.conditional('s_star', time_pred[:, None]) + pi_star = pm.Deterministic('pi_star', pm.invlogit(s_star)) + pred = pm.sample_posterior_predictive(trace, var_names=['y_obs', 'pi_star']) - return (model,trace,pred) + return {'model': model, 'trace': trace, 'pred': pred} -def extractData(dat,species,condition): +def extractData(dat, species, condition, date_range=None): """Extracts weekly records for 'condition' in 'species'. Parameters @@ -95,61 +108,60 @@ def extractData(dat,species,condition): N -- total number of cases seen weekly for species """ - dat = dat[dat.Species==species] - dates = pd.DatetimeIndex(dat.Date) - minDate = np.min(dates) - week = (dates-minDate).days // 7 - - dat = dat.groupby(week) - aggTable = dat['Consult_reason'].agg([['cases',lambda x: np.sum(x==condition)], - ['N',len]]) - - aggTable['day'] = aggTable.index * 7 - aggTable['date'] = [minDate + pd.Timedelta(dt*7, unit='D') for dt in aggTable.index] - - return (aggTable) - - - -def predProb(y, ystar): - """Calculates predictive tail probability of y wrt ystar. - - Parameters - ========== - y -- a vector of length n of observed values of y - ystar -- a m x n matrix containing draws from the joint distribution of ystar. - - Returns - ======= - A vector of tail probabilities for y wrt ystar. - """ - q = np.sum(y > ystar, axis=0) / ystar.shape[0] - q[q > .5] = 1. - q[q > .5] - return q - - - - - - - -if __name__=='__main__': + if date_range is None: + date_range = [np.min(dat['Date']), np.max(dat['Date'])] + + dat = dat[dat['Species'] == species] + dat['Date'] = pd.to_datetime(dat['Date']) + dat = dat[dat['Date'] >= date_range[0]] + dat = dat[dat['Date'] < date_range[1]] + + # Turn date into days, and quantize into weeks + dat['day'] = (dat['Date'] - __refdate) // np.timedelta64(1,'W') * 7.0 # Ensure this is float + + # Now group by date combination + datgrp = dat.groupby(by='day') + aggTable = datgrp['Consult_reason'].agg([['cases', lambda x: np.sum(x == condition)], + ['N', len]]) + aggTable['day'] = np.array(aggTable.index) + aggTable.index = __refdate + pd.to_timedelta(aggTable.index, unit='D') + return aggTable + + +# def predProb(y, ystar): +# """Calculates predictive tail probability of y wrt ystar. +# +# Parameters +# ========== +# y -- a vector of length n of observed values of y +# ystar -- a m x n matrix containing draws from the joint distribution of ystar. +# +# Returns +# ======= +# A vector of tail probabilities for y wrt ystar. +# """ +# q = np.sum(y > ystar, axis=0) / ystar.shape[0] +# q[q > .5] = 1. - q[q > .5] +# return q + + +if __name__ == '__main__': import argparse import pickle parser = argparse.ArgumentParser(description='Fit Binomial GP timeseries model') - parser.add_argument("data", nargs=1, type=str, - help="Input data file with (at least) columns Date, Species, Consult_reason") - parser.add_argument("-o", "--prefix", dest="prefix",type=str,default=None, + parser.add_argument("data", nargs=1, type=str, + help="Input data file with (at least) columns Date, Species, Consult_reason") + parser.add_argument("-o", "--prefix", dest="prefix", type=str, default=None, help="Output file prefix [optional].") - parser.add_argument("-c", "--condition", dest="condition",nargs='+',type=str, + parser.add_argument("-c", "--condition", dest="condition", nargs='+', type=str, required=True, help="One or more space-separated conditions to analyse") - parser.add_argument("-s", "--species", dest="species",nargs='+',type=str, + parser.add_argument("-s", "--species", dest="species", nargs='+', type=str, required=True, help="One or more space-separated species to analyse") - parser.add_argument("-i", "--iterations", dest="iterations", type=int, + parser.add_argument("-i", "--iterations", dest="iterations", type=int, default=5000, nargs=1, help="Number of MCMC iterations (default 5000)") args = parser.parse_args() @@ -158,14 +170,27 @@ def predProb(y, ystar): for species in args.species: for condition in args.condition: - print ("Running GP smoother for condition '%s' in species '%s'" % (condition,species)) - d = extractData(data, species, condition) - res = BinomGP(np.array(d.cases),np.array(d.N), - np.array(d.day), np.array(d.day), + print("Running GP smoother for condition '%s' in species '%s'" % (condition, species)) + sys.stdout.write("Extracting data...") + sys.stdout.flush() + d = extractData(data, species, condition, date_range=['2016-02-02','2019-12-01']) + sys.stdout.write("done\nCalculating...") + sys.stdout.flush() + + pred_range = pd.date_range(start='2018-02-14', end='2020-02-14', freq='W') + day_pred = (pred_range - __refdate) / np.timedelta64(1, 'D') + + res = BinomGP(np.array(d.cases, dtype=float), np.array(d.N, dtype=float), + np.array(d.day, dtype=float), np.array(day_pred, dtype=float), mcmc_iter=args.iterations[0]) + res['data'] = extractData(data, species, condition, date_range=['2018-02-14','2020-02-14']) + res['pred'] = pd.DataFrame(res['pred']['pi_star']) + res['pred'].columns = pd.Index(pred_range) + sys.stdout.write("done\n") + sys.stdout.flush() filename = "%s_%s.pkl" % (species, condition) if args.prefix is not None: - filename = "%s_%s" % (args.prefix,filename) - print ("Saving '%s'" % filename) + filename = "%s%s" % (args.prefix, filename) + print("Saving '%s'" % filename) with open(filename, 'wb') as f: - pickle.dump(res,f,pickle.HIGHEST_PROTOCOL) + pickle.dump(res, f, pickle.HIGHEST_PROTOCOL) diff --git a/savsnet/raster.py b/savsnet/raster.py new file mode 100644 index 0000000..076c2a8 --- /dev/null +++ b/savsnet/raster.py @@ -0,0 +1,18 @@ +"""Generate rasters for logistic 2D""" + +import pickle as pkl +import geopandas as gp +from savsnet.gis_util import gen_raster + + +def make_raster(s_star, filename): + poly = gp.read_file('/home/jewellcp/Documents/GIS/gadm36_GBR.gpkg').to_crs(epsg=27700) + gen_raster(s_star, poly, filename=filename) + + +if __name__ == '__main__': + + with open('../../vom_dog_post.pkl', 'rb') as f: + trace = pkl.load(f) + + make_raster(trace['posterior']['s_star'], 'vom_dog_2020-01-01.tiff') diff --git a/tests/test_data_extraction.py b/tests/test_data_extraction.py new file mode 100644 index 0000000..ab4d1bb --- /dev/null +++ b/tests/test_data_extraction.py @@ -0,0 +1,18 @@ +# Data extraction tests + +import unittest +import os +import pandas as pd + +from savsnet.prev_ts_GP import extractData + +class TestDataExtraction(unittest.TestCase): + + def setUp(self): + test_dir = os.path.dirname(os.path.realpath(__file__)) + self.data_path = os.path.join(test_dir, "../data/savsnet_2019_08_06.csv") + + def test_extract(self): + data_csv = pd.read_csv(self.data_path) + data = extractData(data_csv, 'cat', 'gastroenteric') + print(data) \ No newline at end of file diff --git a/tests/test_prediction.py b/tests/test_prediction.py new file mode 100644 index 0000000..f61c288 --- /dev/null +++ b/tests/test_prediction.py @@ -0,0 +1 @@ +from prev_ts_GP \ No newline at end of file