Skip to content

Commit 1d60071

Browse files
author
Nikhil Raverkar
committed
Updated MLIO version and added buildspec
1 parent 4a77d1d commit 1d60071

File tree

7 files changed

+101
-30
lines changed

7 files changed

+101
-30
lines changed

ci/buildspec.yml

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
version: 0.2
2+
3+
phases:
4+
install:
5+
runtime-versions:
6+
python: 3.8
7+
docker: 19
8+
pre_build:
9+
commands:
10+
- echo Pre-build started on `date`
11+
- echo Installing dependencies...
12+
- curl -LO http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
13+
- bash Miniconda3-latest-Linux-x86_64.sh -bfp /miniconda3
14+
- export PATH=/miniconda3/bin:${PATH}
15+
- conda install python=3.8
16+
- conda update -y conda
17+
- python3 -m pip install pip==20.1
18+
- python3 -m pip install .[test]
19+
build:
20+
commands:
21+
- echo Build started on `date`
22+
- echo Docker login...
23+
- docker login -u $dockerhub_username -p $dockerhub_password
24+
- echo Building the Docker image...
25+
- docker build -t xgboost-container-base:$FRAMEWORK_VERSION-cpu-py3 -f docker/$FRAMEWORK_VERSION/base/Dockerfile.cpu .
26+
- python3 setup.py bdist_wheel --universal
27+
- docker build -t preprod-xgboost-container:$FRAMEWORK_VERSION-cpu-py3 -f docker/$FRAMEWORK_VERSION/final/Dockerfile.cpu .
28+
- printf "FROM preprod-xgboost-container:$FRAMEWORK_VERSION-cpu-py3\nADD . /app\nWORKDIR /app\nRUN python3 -m pip install .[test]" > Dockerfile.test
29+
- docker build -t test-xgboost-container -f Dockerfile.test .
30+
- echo Running tox...
31+
- docker run --rm -t test-xgboost-container sh -c 'tox -e ALL'
32+
- echo Running container tests...
33+
- pytest test/integration/local --docker-base-name preprod-xgboost-container --tag $FRAMEWORK_VERSION-cpu-py3 --py-version 3 --framework-version $FRAMEWORK_VERSION
34+
- docker tag preprod-xgboost-container:$FRAMEWORK_VERSION-cpu-py3 $SM_ALPHA.dkr.ecr.us-west-2.amazonaws.com/sagemaker-xgboost:$FRAMEWORK_VERSION-cpu-py3
35+
- docker tag preprod-xgboost-container:$FRAMEWORK_VERSION-cpu-py3 $SM_ALPHA.dkr.ecr.us-west-2.amazonaws.com/sagemaker-xgboost:$FRAMEWORK_VERSION
36+
post_build:
37+
commands:
38+
- echo Build completed on `date`
39+
- |
40+
case $CODEBUILD_WEBHOOK_EVENT in
41+
PULL_REQUEST_MERGED)
42+
echo Logging in to Amazon ECR...
43+
$(aws ecr get-login --no-include-email --region $AWS_DEFAULT_REGION)
44+
echo Pushing the Docker image...
45+
docker push $SM_ALPHA.dkr.ecr.us-west-2.amazonaws.com/sagemaker-xgboost:$FRAMEWORK_VERSION-cpu-py3 | grep -v -E "[0-9]{12}.dkr.ecr.\S+.amazonaws.com"
46+
docker push $SM_ALPHA.dkr.ecr.us-west-2.amazonaws.com/sagemaker-xgboost:$FRAMEWORK_VERSION | grep -v -E "[0-9]{12}.dkr.ecr.\S+.amazonaws.com"
47+
;;
48+
PULL_REQUEST_CREATED | PULL_REQUEST_UPDATED | PULL_REQUEST_REOPENED)
49+
echo Logging in to Amazon ECR...
50+
$(aws ecr get-login --no-include-email --region $AWS_DEFAULT_REGION)
51+
echo Pushing the Docker image...
52+
# pushes test tag for manual verification, requires cleanup in ECR every once in a while though
53+
TEST_TAG=$SM_ALPHA.dkr.ecr.us-west-2.amazonaws.com/sagemaker-xgboost:${FRAMEWORK_VERSION}-cpu-py3-test
54+
docker tag preprod-xgboost-container:$FRAMEWORK_VERSION-cpu-py3 ${TEST_TAG}
55+
docker push ${TEST_TAG} | grep -v -E "[0-9]{12}.dkr.ecr.\S+.amazonaws.com"
56+
;;
57+
*)
58+
echo Undefined behavior for webhook event type $CODEBUILD_WEBHOOK_EVENT
59+
;;
60+
esac

docker/1.0-1/base/Dockerfile.cpu

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
FROM ubuntu:16.04
22

3+
ARG PYARROW_VERSION=0.16.0
4+
ARG MLIO_VERSION=0.6.0
5+
ARG PYTHON_VERSION=3.7.10
36
# Install python and other runtime dependencies
47
RUN apt-get update && \
58
apt-get -y install \
@@ -25,11 +28,11 @@ RUN echo 'installing miniconda' && \
2528

2629
ENV PATH=/miniconda3/bin:${PATH}
2730

28-
RUN conda install -c conda-forge python=3.6.13 && \
31+
RUN conda install -c conda-forge python=${PYTHON_VERSION} && \
2932
conda update -y conda && \
30-
conda install pip=20.1 && \
31-
conda install -c conda-forge pyarrow=0.14.1 && \
32-
conda install -c mlio -c conda-forge mlio-py=0.1
33+
python3 -m pip install --upgrade pip && \
34+
conda install -c conda-forge pyarrow=${PYARROW_VERSION} && \
35+
conda install -c mlio -c conda-forge mlio-py=${MLIO_VERSION}
3336

3437
# Python won’t try to write .pyc or .pyo files on the import of source modules
3538
# Force stdin, stdout and stderr to be totally unbuffered. Good for logging

requirements.txt

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,27 @@
11
Flask==1.1.1 # sagemaker-containers requires flask 1.1.1
22
PyYAML==5.4.1
3-
boto3==1.10.14
4-
botocore==1.13.14
5-
gunicorn<20.0.0
6-
cryptography==3.4.6
3+
Pillow==9.1.0
4+
boto3==1.17.52
5+
botocore==1.20.52
6+
cryptography==35.0.0
7+
gunicorn==19.10.0
78
matplotlib==3.3.2
89
multi-model-server==1.1.1
910
numpy==1.19.2
1011
pandas==1.1.3
12+
protobuf==3.20.1
1113
psutil==5.6.7 # sagemaker-containers requires psutil 5.6.7
1214
python-dateutil==2.8.0
13-
requests<2.21
15+
requests==2.25.1
1416
retrying==1.3.3
15-
sagemaker-containers>=2.8.3,<2.9
17+
sagemaker-containers==2.8.6.post2
1618
sagemaker-inference==1.2.0
1719
scikit-learn==0.23.2
18-
scipy==1.2.2
19-
smdebug==0.4.13
20-
urllib3==1.25.9
21-
wheel
20+
scipy==1.5.3
21+
smdebug==1.0.10
22+
urllib3==1.26.5
23+
wheel==0.35.1
24+
jinja2==3.0.3
25+
itsdangerous==2.0.1
26+
MarkupSafe==2.1.1
27+
Werkzeug==0.15.6

src/sagemaker_xgboost_container/data_utils.py

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -299,10 +299,11 @@ def _get_csv_dmatrix_pipe_mode(pipe_path, csv_weights):
299299
:return: xgb.DMatrix or None
300300
"""
301301
try:
302-
dataset = [mlio.SageMakerPipe(pipe_path)]
303-
reader = mlio.CsvReader(dataset=dataset,
304-
batch_size=BATCH_SIZE,
305-
header_row_index=None)
302+
pipes_path = pipe_path if isinstance(pipe_path, list) else [pipe_path]
303+
dataset = [mlio.SageMakerPipe(path) for path in pipes_path]
304+
reader_params = mlio.DataReaderParams(dataset=dataset, batch_size=BATCH_SIZE)
305+
csv_params = mlio.CsvParams(header_row_index=None)
306+
reader = mlio.CsvReader(reader_params, csv_params)
306307

307308
# Check if data is present in reader
308309
if reader.peek_example() is not None:
@@ -449,17 +450,15 @@ def get_recordio_protobuf_dmatrix(path, is_pipe=False):
449450
"""
450451
try:
451452
if is_pipe:
452-
dataset = [mlio.SageMakerPipe(path)]
453-
reader = mlio.RecordIOProtobufReader(dataset=dataset,
454-
batch_size=BATCH_SIZE)
453+
pipes_path = path if isinstance(path, list) else [path]
454+
dataset = [mlio.SageMakerPipe(pipe_path) for pipe_path in pipes_path]
455455
else:
456456
dataset = mlio.list_files(path)
457-
reader = mlio.RecordIOProtobufReader(dataset=dataset,
458-
batch_size=BATCH_SIZE)
459-
457+
reader_params = mlio.DataReaderParams(dataset=dataset, batch_size=BATCH_SIZE)
458+
reader = mlio.RecordIOProtobufReader(reader_params)
460459
if reader.peek_example() is not None:
461460
# recordio-protobuf tensor may be dense (use numpy) or sparse (use scipy)
462-
if type(reader.peek_example()['values']) is mlio.core.DenseTensor:
461+
if type(reader.peek_example()['values']) is mlio.DenseTensor:
463462
to_matrix = as_numpy
464463
vstack = np.vstack
465464
else:

src/sagemaker_xgboost_container/encoder.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,9 +83,10 @@ def recordio_protobuf_to_dmatrix(string_like): # type: (bytes) -> xgb.DMatrix
8383
"""
8484
buf = bytes(string_like)
8585
dataset = [mlio.InMemoryStore(buf)]
86-
reader = mlio.RecordIOProtobufReader(dataset=dataset, batch_size=100)
86+
reader_params = mlio.DataReaderParams(dataset=dataset, batch_size=100)
87+
reader = mlio.RecordIOProtobufReader(reader_params)
8788

88-
if type(reader.peek_example()['values']) is mlio.core.DenseTensor:
89+
if type(reader.peek_example()['values']) is mlio.DenseTensor:
8990
to_matrix = as_numpy
9091
vstack = np.vstack
9192
else:

test-requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
Flask==1.1.1
12
coverage
23
docker-compose
34
flake8

tox.ini

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[tox]
2-
envlist = {py36}-xgboost{1.0},flake8
2+
envlist = {py37}-xgboost{1.0},flake8
33

44
[flake8]
55
max-line-length = 120
@@ -15,8 +15,9 @@ deps =
1515
-r{toxinidir}/requirements.txt
1616
-r{toxinidir}/test-requirements.txt
1717
conda_deps=
18-
pyarrow=0.14.1
19-
mlio-py=0.1
18+
pyarrow==0.16.0
19+
mlio-py==0.6.0
20+
tbb==2020.2
2021
conda_channels=
2122
conda-forge
2223
mlio

0 commit comments

Comments
 (0)