Skip to content

Commit

Permalink
add docker support
Browse files Browse the repository at this point in the history
  • Loading branch information
BigJohhn committed Dec 2, 2024
1 parent 3843e4c commit 94f4d1a
Show file tree
Hide file tree
Showing 7 changed files with 224 additions and 0 deletions.
7 changes: 7 additions & 0 deletions docker/10_nvidia.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"file_format_version" : "1.0.0",
"ICD" : {
"library_path" : "libEGL_nvidia.so.0"
}
}

75 changes: 75 additions & 0 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# 1stly, to build zeroth-bot-sim:v0
# FROM nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04

# COPY sources.list /etc/apt/sources.list

# RUN mkdir /root/.pip
# COPY pip.conf /root/.pip/pip.conf

# RUN apt install -y wget zlib1g-dev libssl-dev libncurses5-dev libsqlite3-dev libreadline-dev libtk8.6 libgdm-dev libdb4o-cil-dev libpcap-dev
# RUN wget https://mirrors.huaweicloud.com/python/3.8.19/Python-3.8.19.tar.xz && tar -xvf Python-3.8.19.tar.xz

# WORKDIR /root/Python-3.8.19
# RUN ./configure --prefix=/usr/local && make && make install

# WORKDIR /usr/local/bin/
# RUN ln -s pip3 pip

# WORKDIR /app/sim/
# RUN make install-dev

# RUN wget https://developer.nvidia.com/isaac-gym-preview-4
# # RUN tar -xvf
# RUN make install-third-party-external

FROM zeroth-bot-sim:v0

WORKDIR /root

RUN apt update

# on vnc client use <your docker ip>:5901
RUN apt update && apt install -y \
--no-install-recommends \
libxcursor-dev \
libxrandr-dev \
libxinerama-dev \
libxi-dev \
mesa-common-dev \
zip \
unzip \
make \
vulkan-utils \
mesa-vulkan-drivers \
pigz \
git \
libegl1

# WAR for eglReleaseThread shutdown crash in libEGL_mesa.so.0 (ensure it's never detected/loaded)
# Can't remove package libegl-mesa0 directly (because of libegl1 which we need)
RUN rm /usr/lib/x86_64-linux-gnu/libEGL_mesa.so.0 /usr/lib/x86_64-linux-gnu/libEGL_mesa.so.0.0.0 /usr/share/glvnd/egl_vendor.d/50_mesa.json

COPY ./nvidia_icd.json /usr/share/vulkan/icd.d/nvidia_icd.json
COPY ./10_nvidia.json /usr/share/glvnd/egl_vendor.d/10_nvidia.json


WORKDIR /app/sim

RUN useradd --create-home gymuser
USER gymuser

# copy gym repo to docker
COPY --chown=gymuser . .

# install gym modules
ENV PATH="/home/gymuser/.local/bin:$PATH"

RUN export MODEL_DIR=sim/resources/stompymicro

ENV NVIDIA_VISIBLE_DEVICES=all NVIDIA_DRIVER_CAPABILITIES=all

CMD ["tail","-f", "/dev/null"]

# CMD ["python3", "sim/train.py", "--task=stompymicro", "--num_envs=4"]
# python3 sim/train.py --task=stompymicro --num_envs=4

104 changes: 104 additions & 0 deletions docker/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
Preliminaries
```
Ensure nvidia-smi is work on your host machine.
```

There is a prebuilt docker image, tested on a amd64 machine, with Ubuntu 24.04 LTS.
```
docker pull ghcr.io/bigjohnn/zeroth-bot-sim:v1
```

But if that not work, maybe you can build it by yourself.

Make some changes in your Dockerfile && docker-compose.yml.
```
PREBUILD a docker image that have external dependencies
# 1stly, to build zeroth-bot-sim:v0
# FROM nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04
# COPY sources.list /etc/apt/sources.list
# RUN mkdir /root/.pip
# COPY pip.conf /root/.pip/pip.conf
# RUN apt install -y wget zlib1g-dev libssl-dev libncurses5-dev libsqlite3-dev libreadline-dev libtk8.6 libgdm-dev libdb4o-cil-dev libpcap-dev
# RUN wget https://mirrors.huaweicloud.com/python/3.8.19/Python-3.8.19.tar.xz && tar -xvf Python-3.8.19.tar.xz
# WORKDIR /root/Python-3.8.19
# RUN ./configure --prefix=/usr/local && make && make install
# WORKDIR /usr/local/bin/
# RUN ln -s pip3 pip
# WORKDIR /app/sim/
# RUN make install-dev
# RUN wget https://developer.nvidia.com/isaac-gym-preview-4
# # RUN tar -xvf
# RUN make install-third-party-external
```

Then,

Terminal1:
```
docker-compose up --build
```

```
ARNING: Found orphan containers (a55a8ae7a762_docker_my_cuda_service_1) for this project. If you removed or renamed this service in your compose file, you can run this command with the --remove-orphans flag to clean it up.
Building my-service
[+] Building 1.6s (10/10) FINISHED docker:default
=> [internal] load build definition from Dockerfile 0.2s
=> => transferring dockerfile: 348B 0.0s
=> [internal] load metadata for docker.io/nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04 0.0s
=> [internal] load .dockerignore 0.2s
=> => transferring context: 2B 0.0s
=> [1/5] FROM docker.io/nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04 0.0s
=> [internal] load build context 0.2s
=> => transferring context: 34B 0.0s
=> CACHED [2/5] WORKDIR /app 0.0s
=> CACHED [3/5] COPY sources.list /etc/apt/sources.list 0.0s
=> CACHED [4/5] RUN apt update 0.0s
=> CACHED [5/5] WORKDIR /app/sim/ 0.0s
=> exporting to image 0.2s
=> => exporting layers 0.0s
=> => writing image sha256:8e9c02e6c8b50dcbf7d6d1962d51de926126f132b65b65952ad8dfc74634f8c6 0.0s
=> => naming to docker.io/library/docker_my-service 0.1s
WARNING: Image for service my-service was built because it did not already exist. To rebuild this image you must use `docker-compose build` or `docker-compose up --build`.
Creating docker_my-service_1 ... done
Attaching to docker_my-service_1
my-service_1 |
my-service_1 | ==========
my-service_1 | == CUDA ==
my-service_1 | ==========
my-service_1 |
my-service_1 | CUDA Version 12.1.0
my-service_1 |
my-service_1 | Container image Copyright (c) 2016-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
my-service_1 |
my-service_1 | This container image and its contents are governed by the NVIDIA Deep Learning Container License.
my-service_1 | By pulling and using the container, you accept the terms and conditions of this license:
my-service_1 | https://developer.nvidia.com/ngc/nvidia-deep-learning-container-license
my-service_1 |
my-service_1 | A copy of this license is made available in this container at /NGC-DL-CONTAINER-LICENSE for your convenience.
my-service_1 |
my-service_1 | *************************
my-service_1 | ** DEPRECATION NOTICE! **
my-service_1 | *************************
my-service_1 | THIS IMAGE IS DEPRECATED and is scheduled for DELETION.
my-service_1 | https://gitlab.com/nvidia/container-images/cuda/blob/master/doc/support-policy.md
my-service_1 |
```

Terminal2:

```
docker exec -it docker_zeroth-sim_1 /bin/bash
```

```
gymuser@06aac36e0751:/app/sim# python3 sim/train.py --task=stompymicro --num_envs=4
```
24 changes: 24 additions & 0 deletions docker/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# version: '3.8'

services:
zeroth-sim:
build:
context: . # Assuming the Dockerfile is in the current directory
dockerfile: Dockerfile
volumes:
- /home/hph/Codes/zeroth-bot:/app # Must have absolute path
- /tmp/.X11-unix:/tmp/.X11-unix
environment:
- DISPLAY=$DISPLAY
privileged: true
cap_add:
- SYS_PTRACE
shm_size: 6g
tty: true
stdin_open: true
deploy:
resources:
reservations:
devices:
- capabilities: [gpu]
count: all
7 changes: 7 additions & 0 deletions docker/nvidia_icd.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"file_format_version" : "1.0.0",
"ICD": {
"library_path": "libGLX_nvidia.so.0",
"api_version" : "1.1.95"
}
}
3 changes: 3 additions & 0 deletions docker/pip.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[global]
trusted-host=mirrors.aliyun.com
index-url=http://mirrors.aliyun.com/pypi/simple/
4 changes: 4 additions & 0 deletions docker/sources.list
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
deb http://mirrors.ustc.edu.cn/ubuntu/ focal main restricted universe multiverse
deb http://mirrors.ustc.edu.cn/ubuntu/ focal-updates main restricted universe multiverse
deb http://mirrors.ustc.edu.cn/ubuntu/ focal-backports main restricted universe multiverse
deb http://mirrors.ustc.edu.cn/ubuntu/ focal-security main restricted universe multiverse

0 comments on commit 94f4d1a

Please sign in to comment.