Skip to content

Commit 2fd9054

Browse files
committed
Added additional, upgraded numpy, and updated handlers to meet expected behavior (accepting properties via serving.properties or env variables, throwing error when csv files contain non numeric data)
1 parent 1769295 commit 2fd9054

File tree

8 files changed

+732
-336
lines changed

8 files changed

+732
-336
lines changed

engines/python/setup/djl_python/encode_decode.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,26 +22,29 @@
2222

2323

2424
def decode_csv(inputs: Input, require_headers=True): # type: (str) -> np.array
25-
csv_string = inputs.get_as_string()
25+
csv_content = inputs.get_as_string()
2626

2727
if require_headers:
28-
if not any(header in csv_string.splitlines()[0].lower()
28+
if not any(header in csv_content.splitlines()[0].lower()
2929
for header in ["question", "context", "inputs"]):
3030
raise ValueError(
3131
"You need to provide the correct CSV with Header columns to use it with the inference toolkit default handler.",
3232
)
33-
stream = StringIO(csv_string)
33+
stream = StringIO(csv_content)
3434
request_list = list(csv.DictReader(stream))
3535
if "inputs" in request_list[0].keys():
3636
return {"inputs": [entry["inputs"] for entry in request_list]}
3737
else:
3838
return {"inputs": request_list}
3939
else:
4040
# for preditive ML inputs
41-
try:
42-
return np.genfromtxt(StringIO(csv_string), delimiter=",")
43-
except (ValueError, TypeError) as e:
44-
raise ValueError(f"Failed to parse CSV data: {str(e)}")
41+
result = np.genfromtxt(StringIO(csv_content), delimiter=",")
42+
# Check for NaN values which indicate non-numeric data
43+
if np.isnan(result).any():
44+
raise ValueError(
45+
"CSV contains non-numeric data. Please provide numeric data only."
46+
)
47+
return result
4548

4649

4750
def encode_csv(content): # type: (str) -> np.array

engines/python/setup/djl_python/sklearn_handler.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,8 @@ def _get_trusted_types(self, properties: dict):
3636
trusted_types_str = properties.get("skops_trusted_types", "")
3737
if not trusted_types_str:
3838
raise ValueError(
39-
"SKLEARN_SKOPS_TRUSTED_TYPES environment variable must be set to load skops models. "
40-
"Example: SKLEARN_SKOPS_TRUSTED_TYPES='sklearn.ensemble._forest.RandomForestClassifier,numpy.ndarray'"
39+
"option.skops_trusted_types must be set to load skops models. "
40+
"Example: option.skops_trusted_types='sklearn.ensemble._forest.RandomForestClassifier,numpy.ndarray'"
4141
)
4242
trusted_types = [
4343
t.strip() for t in trusted_types_str.split(",") if t.strip()
@@ -75,7 +75,7 @@ def initialize(self, properties: dict):
7575
if properties.get("trust_insecure_model_files",
7676
"false").lower() != "true":
7777
raise ValueError(
78-
f"trust_insecure_model_files must be set to 'true' to use {model_format} format (only skops is secure by default)"
78+
f"option.trust_insecure_model_files must be set to 'true' to use {model_format} format (only skops is secure by default)"
7979
)
8080

8181
if model_format == "joblib":

engines/python/setup/djl_python/xgboost_handler.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@ def __init__(self):
3131

3232
def initialize(self, properties: dict):
3333
model_dir = properties.get("model_dir")
34-
model_format = properties.get("model_format", "json")
34+
model_format = (properties.get("model_format")
35+
or os.environ.get("MODEL_FORMAT") or "json")
3536

3637
format_extensions = {
3738
"json": ["json"],
@@ -56,10 +57,12 @@ def initialize(self, properties: dict):
5657
self.model = xgb.Booster()
5758
self.model.load_model(model_file)
5859
else: # unsafe formats: pickle, xgb
59-
if properties.get("trust_insecure_model_files",
60-
"false").lower() != "true":
60+
trust_insecure = (properties.get("trust_insecure_model_files")
61+
or os.environ.get("TRUST_INSECURE_MODEL_FILES")
62+
or "false")
63+
if trust_insecure.lower() != "true":
6164
raise ValueError(
62-
"trust_insecure_model_files must be set to 'true' to use unsafe formats (only json/ubj are secure by default)"
65+
"option.trust_insecure_model_files must be set to 'true' to use unsafe formats (only json/ubj are secure by default)"
6366
)
6467
if model_format == "pickle":
6568
with open(model_file, 'rb') as f:

serving/docker/Dockerfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ RUN scripts/install_python.sh ${python_version} && \
8484
scripts/patch_oss_dlc.sh python && \
8585
pip3 install torch=="${torch_version}" torchvision --extra-index-url https://download.pytorch.org/whl/cpu && \
8686
pip3 install scikit-learn=="${sklearn_version}" skops cloudpickle xgboost=="${xgboost_version}" pydantic=="${pydantic_version}" && \
87+
pip3 install --upgrade numpy && \
8788
echo "${djl_serving_version} cpufull" > /opt/djl/bin/telemetry && \
8889
rm -rf /opt/djl/logs && \
8990
chown -R djl:djl /opt/djl && \

tests/integration/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@
22
/logs
33
/all_logs
44
/models
5+
/client_logs

tests/integration/download_models.sh

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ python_skl_models_urls=(
3434
"https://resources.djl.ai/test-models/python/sklearn/sklearn_multi_model_v2.zip"
3535
"https://resources.djl.ai/test-models/python/sklearn/sklearn_unsafe_model_v2.zip"
3636
"https://resources.djl.ai/test-models/python/sklearn/sklearn_custom_model_v2.zip"
37+
"https://resources.djl.ai/test-models/python/sklearn/sklearn_skops_model_env_v2.zip"
3738
)
3839

3940
python_xgb_models_urls=(
@@ -47,18 +48,10 @@ python_xgb_models_urls=(
4748
download() {
4849
urls=("$@")
4950
for url in "${urls[@]}"; do
50-
if [[ "$url" == */ ]]; then
51-
# Directory URL - use wget to download recursively
52-
dirname=$(basename "${url%/}")
53-
if ! [ -d "${dirname}" ]; then
54-
wget -r -np -nH --cut-dirs=3 -R "index.html*" "$url"
55-
fi
56-
else
57-
# File URL - use curl with cache-busting headers
58-
filename=${url##*/}
59-
if ! [ -f "${filename}" ]; then
60-
curl -sf -H "Cache-Control: no-cache" -H "Pragma: no-cache" -O "$url"
61-
fi
51+
filename=${url##*/}
52+
# does not download the file, if file already exists
53+
if ! [ -f "${filename}" ]; then
54+
curl -sf -O "$url"
6255
fi
6356
done
6457
}

0 commit comments

Comments
 (0)