Skip to content

Commit 16a5f14

Browse files
Support parallel loading of multiple Predictor.df paths
* Support specifying multiple df file paths * Support loading multiple df file paths with * glob expression * Support loading of GeoPackages
1 parent 6d07206 commit 16a5f14

File tree

1 file changed

+18
-1
lines changed

1 file changed

+18
-1
lines changed

src/utils.py

+18-1
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
11
import os
22
import ast
3+
import glob
34
import math
45
import random
56
import logging
67
import uuid
78

89
import numpy as np
910
import pandas as pd
11+
import geopandas as gpd
1012
import matplotlib.pyplot as plt
13+
from joblib import Parallel, delayed
1114

1215
import dataset
1316
import geometry
@@ -202,6 +205,10 @@ def parse_int_list(l):
202205

203206

204207
def load_df(df_path):
208+
if '*' in df_path:
209+
files = glob.glob(df_path)
210+
return load_dfs(files)
211+
205212
if '.csv' in df_path:
206213
return pd.read_csv(df_path)
207214

@@ -211,4 +218,14 @@ def load_df(df_path):
211218
if '.parquet' in df_path:
212219
return pd.read_parquet(df_path)
213220

214-
raise Exception('File type not supported, please use .csv, .pkl, .parquet files.')
221+
if '.gpkg' in df_path:
222+
return gpd.read_file(df_path)
223+
224+
raise Exception('File type not supported, please use .csv, .pkl, .parquet, or .gpkg files.')
225+
226+
227+
def load_dfs(df_paths):
228+
dfs = Parallel(n_jobs=-1)(delayed(load_df)(path) for path in df_paths)
229+
df = pd.concat(dfs, ignore_index=True)
230+
231+
return df

0 commit comments

Comments
 (0)