diff --git a/hawq/hawq_tables/create_hawq_tables.sql b/hawq/hawq_tables/create_hawq_tables.sql index bbd88bb..aaee834 100644 --- a/hawq/hawq_tables/create_hawq_tables.sql +++ b/hawq/hawq_tables/create_hawq_tables.sql @@ -1,7 +1,5 @@ -DROP SCHEMA IF EXISTS retail_demo CASCADE; -CREATE SCHEMA retail_demo; - --- 1. HAWQ table; load via COPY +-- 1. HAWQ table +DROP TABLE IF EXISTS retail_demo.categories_dim_hawq; CREATE TABLE retail_demo.categories_dim_hawq ( category_id integer NOT NULL, @@ -9,7 +7,8 @@ CREATE TABLE retail_demo.categories_dim_hawq ) WITH (appendonly=true, compresstype=quicklz) DISTRIBUTED RANDOMLY; --- 2. HAWQ table; load via COPY +-- 2. HAWQ table +DROP TABLE IF EXISTS retail_demo.customers_dim_hawq; CREATE TABLE retail_demo.customers_dim_hawq ( customer_id TEXT, @@ -19,7 +18,8 @@ CREATE TABLE retail_demo.customers_dim_hawq ) WITH (appendonly=true, compresstype=quicklz) DISTRIBUTED RANDOMLY; --- 3. HAWQ table; load via COPY +-- 3. HAWQ table +DROP TABLE IF EXISTS retail_demo.order_lineitems_hawq; CREATE TABLE retail_demo.order_lineitems_hawq ( order_id TEXT, @@ -57,7 +57,8 @@ CREATE TABLE retail_demo.order_lineitems_hawq ) WITH (appendonly=true, compresstype=quicklz) DISTRIBUTED RANDOMLY; --- 4. HAWQ table; load via COPY +-- 4. HAWQ table +DROP TABLE IF EXISTS retail_demo.orders_hawq; CREATE TABLE retail_demo.orders_hawq ( order_id TEXT, @@ -94,7 +95,8 @@ CREATE TABLE retail_demo.orders_hawq ) WITH (appendonly=true, compresstype=quicklz) DISTRIBUTED RANDOMLY; --- 5. HAWQ table; load via COPY +-- 5. HAWQ table +DROP TABLE IF EXISTS retail_demo.customer_addresses_dim_hawq; CREATE TABLE retail_demo.customer_addresses_dim_hawq ( customer_address_id TEXT, @@ -113,7 +115,8 @@ CREATE TABLE retail_demo.customer_addresses_dim_hawq ) WITH (appendonly=true, compresstype=quicklz) DISTRIBUTED RANDOMLY; --- 6. HAWQ table; load via COPY +-- 6. HAWQ table +DROP TABLE IF EXISTS retail_demo.date_dim_hawq; CREATE TABLE retail_demo.date_dim_hawq ( calendar_day date, @@ -126,6 +129,7 @@ CREATE TABLE retail_demo.date_dim_hawq WITH (appendonly=true) DISTRIBUTED RANDOMLY; -- 7. HAWQ table +DROP TABLE IF EXISTS retail_demo.email_addresses_dim_hawq; CREATE TABLE retail_demo.email_addresses_dim_hawq ( customer_id TEXT, @@ -134,16 +138,17 @@ CREATE TABLE retail_demo.email_addresses_dim_hawq WITH (appendonly=true, compresstype=quicklz) DISTRIBUTED RANDOMLY; --- 8. HAWQ table; load via COPY +-- 8. HAWQ table +DROP TABLE IF EXISTS retail_demo.payment_methods_hawq; CREATE TABLE retail_demo.payment_methods_hawq ( payment_method_id smallint, payment_method_code character varying(20) ) WITH (appendonly=true, compresstype=quicklz) DISTRIBUTED RANDOMLY; -ALTER TABLE retail_demo.payment_methods_hawq OWNER TO gpadmin; --- 9. HAWQ table; load via COPY +-- 9. HAWQ table +DROP TABLE IF EXISTS retail_demo.products_dim_hawq; CREATE TABLE retail_demo.products_dim_hawq ( product_id TEXT, diff --git a/hawq/hawq_tables/load_data_to_HDFS.sh b/hawq/hawq_tables/load_data_to_HDFS.sh index 174db55..9f0b594 100755 --- a/hawq/hawq_tables/load_data_to_HDFS.sh +++ b/hawq/hawq_tables/load_data_to_HDFS.sh @@ -4,17 +4,9 @@ base_dir="/retail_demo" # Clean up any previous load echo "hadoop fs -rm -r -skipTrash $base_dir" -hadoop fs -rm -r -skipTrash $base_dir +hadoop fs -rm -r -skipTrash $base_dir -echo "hadoop fs -mkdir $base_dir" -hadoop fs -mkdir $base_dir - -for file in *.tsv.gz -do - dir=`echo $file | perl -ne 's/^(.+?)\..+$/$1/;print;'` - echo "hadoop fs -mkdir $base_dir/$dir" - hadoop fs -mkdir $base_dir/$dir - echo "hadoop fs -put $file $base_dir/$dir/" - hadoop fs -put $file $base_dir/$dir/ -done +# Copy the data directory, recursively, into HDFS root +echo "hadoop fs -put /retail_demo /" +hadoop fs -put /retail_demo / diff --git a/hawq/hawq_tables/load_hawq_tables_perl.sh b/hawq/hawq_tables/load_hawq_tables_perl.sh deleted file mode 100755 index 7f34129..0000000 --- a/hawq/hawq_tables/load_hawq_tables_perl.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash - -# Load the three small .tsv.gz files using COPY, on HAWQ master -# The files are TAB delimited, where NULL is just the empty string, '' - -# $Id: load_data_using_COPY.sh,v 1.1 2013/05/04 10:50:14 gpadmin Exp gpadmin $ - -schema=retail_demo - -for table in `ls *.gz | perl -ne 's/^(\w+).+$/$1/;print;'` -do - file="$table.tsv.gz" - zcat $file | psql -c "COPY $schema.$table FROM STDIN DELIMITER E'\t' NULL E'';" -done - -