Skip to content

Commit

Permalink
address comments
Browse files Browse the repository at this point in the history
  • Loading branch information
xudong963 committed Jun 16, 2022
1 parent f98f909 commit 3a57d32
Show file tree
Hide file tree
Showing 7 changed files with 51 additions and 21 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -59,4 +59,4 @@ __pycache__/
*.zip

# tpch data set
tpch/data
benchmark/tpch/data
1 change: 0 additions & 1 deletion .licenserc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ header:
- "website"
- "tests"
- "tools"
- "tpch"
# Ignore hidden files
- ".cargo"
- ".databend"
Expand Down
11 changes: 11 additions & 0 deletions benchmark/tpch/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# DataBend TPCH-Benchmark

### TPCH DataSet
Run the following command to generate tpch dataset:
```shell
# scale_factor: scale of the database population. scale 1.0 represents ~1 GB of data
../../scripts/setup/dev_setup.sh -t <scale_factor>
```

### TPCH Benchmark
**TBD**
33 changes: 32 additions & 1 deletion scripts/setup/dev_setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,7 @@ function usage {
-d Install development tools
-p Install profile
-s Install codegen tools
-t Install tpch data set
-v Verbose mode
EOF
}
Expand All @@ -342,6 +343,7 @@ Build tools (since -b or no option was provided):
* protobuf-compiler
* thrift-compiler
* openjdk
* tpch dataset for benchmark
EOF
fi

Expand All @@ -368,6 +370,12 @@ Moreover, ~/.profile will be updated (since -p was provided).
EOF
fi

if [[ "$INSTALL_TPCH_DATA" == "true" ]]; then
cat <<EOF
Tpch dataset (since -t was provided):
EOF
fi

cat <<EOF
If you'd prefer to install these dependencies yourself, please exit this script
now with Ctrl-C.
Expand All @@ -380,9 +388,10 @@ INSTALL_BUILD_TOOLS=false
INSTALL_DEV_TOOLS=false
INSTALL_PROFILE=false
INSTALL_CODEGEN=false
INSTALL_TPCH_DATA=false

# parse args
while getopts "ybdpsv" arg; do
while getopts "ybdpstv" arg; do
case "$arg" in
y)
AUTO_APPROVE="true"
Expand All @@ -402,6 +411,10 @@ while getopts "ybdpsv" arg; do
v)
VERBOSE="true"
;;
t)
INSTALL_TPCH_DATA="true"
;;

*)
usage
exit 0
Expand All @@ -416,6 +429,7 @@ fi
if [[ "$INSTALL_BUILD_TOOLS" == "false" ]] &&
[[ "$INSTALL_DEV_TOOLS" == "false" ]] &&
[[ "$INSTALL_PROFILE" == "false" ]] &&
[[ "$INSTALL_TPCH_DATA" == "false" ]] &&
[[ "$INSTALL_CODEGEN" == "false" ]]; then
INSTALL_BUILD_TOOLS="true"
fi
Expand Down Expand Up @@ -548,6 +562,23 @@ if [[ "$INSTALL_CODEGEN" == "true" ]]; then
"${PRE_COMMAND[@]}" python3 -m pip install --quiet coscmd PyYAML
fi

if [[ "$INSTALL_TPCH_DATA" == "true" ]]; then
# Construct a docker imagine to generate tpch-data
if [[ -z $2 ]]; then
docker build -f scripts/setup/tpchdata.dockerfile -t databend:latest .
else
docker build -f scripts/setup/tpchdata.dockerfile -t databend:latest --build-arg scale_factor=$2 .
fi
# Generate data into the ./data directory if it does not already exist
FILE=benchmark/tpch/data/customer.tbl
if test -f "$FILE"; then
echo "$FILE exists."
else
mkdir `pwd`/benchmark/tpch/data 2>/dev/null
docker run -v `pwd`/benchmark/tpch/data:/data --rm databend:latest
fi
fi

[[ "${AUTO_APPROVE}" == "false" ]] && cat <<EOF
Finished installing all dependencies.
Expand Down
2 changes: 1 addition & 1 deletion tpch/run-tpch-dbgen.sh → scripts/setup/run-tpch-dbgen.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash

cd /tpch-dbgen
./dbgen -vf -s 1
./dbgen -vf -s $1
mv *.tbl /data
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
FROM ubuntu:22.04

ARG scale_factor=1
ENV scale_factor=$scale_factor
RUN apt-get update && \
apt-get install -y git build-essential

# Use https://github.com/databricks/tpch-dbgen to generate data
RUN git clone https://github.com/databricks/tpch-dbgen.git && cd tpch-dbgen && make

WORKDIR /tpch-dbgen
ADD run-tpch-dbgen.sh /tpch-dbgen/
ADD scripts/setup/run-tpch-dbgen.sh /tpch-dbgen/

VOLUME /data

ENTRYPOINT [ "bash", "./run-tpch-dbgen.sh" ]
SHELL ["/bin/bash", "-c"]
RUN chmod +x run-tpch-dbgen.sh
ENTRYPOINT ./run-tpch-dbgen.sh $scale_factor
14 changes: 0 additions & 14 deletions tpch/tpch-data.sh

This file was deleted.

0 comments on commit 3a57d32

Please sign in to comment.