diff --git a/.gitignore b/.gitignore index 23c2086..524f096 100644 --- a/.gitignore +++ b/.gitignore @@ -22,10 +22,3 @@ # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml hs_err_pid* replay_pid* - -# irrelevant files -*.pdf -*.html - -# data -/demo/neo4j2graphar/ \ No newline at end of file diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..98c2647 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "demo/Neo4j2GraphAr/GraphAr"] + path = demo/Neo4j2GraphAr/GraphAr + url = https://github.com/alibaba/GraphAr.git \ No newline at end of file diff --git a/README.md b/README.md index 5d8c026..00548a1 100644 --- a/README.md +++ b/README.md @@ -1 +1,58 @@ # data-migration + +![导入流程](process.png) + +## 利用spark将数据从neo4j导出到GraphAr + +### 环境搭建 + +- 操作系统: `Linux`, 已在`centos7`和`ubuntu20`上进行测试 +- 安装`git`, `curl`, `java-11`, `maven`, 配置JAVA_HOME路径, maven配置文件加入mirror镜像 + +### 软件安装 + +此处将软件安装在`HOME`路径, 并添加临时环境变量, 具体可根据实际需要调整, 软件版本也可以按需调整 + +#### spark3.2.4(已有可忽略) + +```bash +curl https://mirrors.tuna.tsinghua.edu.cn/apache/spark/spark-3.2.4/spark-3.2.4-bin-hadoop3.2.tgz | tar -xz -C ${HOME}/ +export SPARK_HOME="${HOME}/spark-3.2.4-bin-hadoop3.2" +export PATH="${SPARK_HOME}/bin":"${PATH}" +``` + +#### neo4j-community-4.4.23(已有可忽略) + +```bash +curl https://dist.neo4j.org/neo4j-community-4.4.23-unix.tar.gz | tar -xz -C ${HOME}/ +export NEO4J_HOME="${HOME}/neo4j-community-4.4.23" +export PATH="${NEO4J_HOME}/bin":"${PATH}" +``` + +#### GraphAr spark + +```bash +git clone https://github.com/alibaba/GraphAr.git +cd GraphAr/spark +mvn clean package -DskipTests +``` + +### 使用说明 + +1. 参考`GraphAr/spark/import/neo4j.json`编写导出信息, 包含生成gar的配置, neo4j连接的配置, 导出schema的配置,按字段填写 + +2. `./neo4j.sh neo4j.json` + +## GraphAr数据导入tugraph-db + +### 导入说明 + +1. 使用lgraph_import工具, 设置配置文件为gar数据中的XXX.graph.yml, 注意为绝对路径, 设置gar=true即可 +2. 示例 + `./lgraph_import -c /root/movie-gar/movie.graph.yml --gar true` + +### 注意 + +1. GraphAr数据中的yml配置文件可以修改, 比如可以在graph.yml中删除指定的vertex或者edge, 也可以在对应的vertex.yml用prefix指定数据路径,数据存储路径可以与配置存储路径分离。 +2. GraphAr支持相同的edge_label可以具有不同的properties, 这类数据在tugraph-db中不支持, 导入过程中会报错指出对应edge_label, 可以根据报错信息在GraphAr数据中做修改 + diff --git a/demo/Neo4j2GraphAr/GraphAr b/demo/Neo4j2GraphAr/GraphAr new file mode 160000 index 0000000..39e1d6a --- /dev/null +++ b/demo/Neo4j2GraphAr/GraphAr @@ -0,0 +1 @@ +Subproject commit 39e1d6a11242edd8d1c50940c01010b8f19dce7d diff --git a/demo/Neo4j2GraphAr/README.md b/demo/Neo4j2GraphAr/README.md new file mode 100644 index 0000000..b8d64b8 --- /dev/null +++ b/demo/Neo4j2GraphAr/README.md @@ -0,0 +1,27 @@ +# 默认示例运行说明 + +GraphAr提供了一些shell脚本, 可以安装neo4j, spark, 以及运行导入程序 + +下面是一个运行示例 + +```bash +cd GraphAr/spark +scripts/get-spark-to-home.sh +export SPARK_HOME="${HOME}/spark-3.2.2-bin-hadoop3.2" +export PATH="${SPARK_HOME}/bin":"${PATH}" + +scripts/get-neo4j-to-home.sh +export NEO4J_HOME="${HOME}/neo4j-community-4.4.23" +export PATH="${NEO4J_HOME}/bin":"${PATH}" +neo4j-admin set-initial-password neo4j + +scripts/deploy-neo4j-movie-data.sh + +scripts/build.sh + +export NEO4J_USR="neo4j" +export NEO4J_PWD="neo4j" + +cd import +./neo4j.sh neo4j.json +``` diff --git a/demo/Neo4jToGraphAr/Neo4j2GraphAr.scala b/demo/Neo4jToGraphAr/Neo4j2GraphAr.scala deleted file mode 100644 index 3f940dd..0000000 --- a/demo/Neo4jToGraphAr/Neo4j2GraphAr.scala +++ /dev/null @@ -1,120 +0,0 @@ -/** Copyright 2022 Alibaba Group Holding Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.alibaba.graphar.example - -import com.alibaba.graphar.datasources._ -import com.alibaba.graphar.graph.GraphWriter - -import org.apache.spark.sql.{DataFrame, SparkSession} - -object Neo4j2GraphAr { - - def main(args: Array[String]): Unit = { - // connect to the Neo4j instance - val spark = SparkSession.builder() - .appName("Neo4j to GraphAr for Movie Graph") - .config("neo4j.url", "bolt://localhost:7687") - .config("neo4j.authentication.type", "basic") - .config("neo4j.authentication.basic.username", sys.env.get("NEO4J_USR").get) - .config("neo4j.authentication.basic.password", sys.env.get("NEO4J_PWD").get) - .config("spark.master", "local") - .getOrCreate() - - // initialize a graph writer - val writer: GraphWriter = new GraphWriter() - - // put movie graph data into writer - readAndPutDataIntoWriter(writer, spark) - - // output directory - val outputPath: String = args(0) - // vertex chunk size - val vertexChunkSize: Long = args(1).toLong - // edge chunk size - val edgeChunkSize: Long = args(2).toLong - // file type - val fileType: String = args(3) - - // write in graphar format - writer.write(outputPath, spark, "MovieGraph", vertexChunkSize, edgeChunkSize, fileType) - } - - // read data from Neo4j and put into writer - def readAndPutDataIntoWriter(writer: GraphWriter, spark: SparkSession): Unit = { - // read vertices with label "Person" from Neo4j as a DataFrame - val person_df = spark.read.format("org.neo4j.spark.DataSource") - .option("query", "MATCH (n:Person) RETURN n.name AS name, n.born as born") - .load() - // put into writer, vertex label is "Person" - writer.PutVertexData("Person", person_df) - - // read vertices with label "Movie" from Neo4j as a DataFrame - val movie_df = spark.read.format("org.neo4j.spark.DataSource") - .option("query", "MATCH (n:Movie) RETURN n.title AS title, n.tagline as tagline") - .load() - // put into writer, vertex label is "Movie" - writer.PutVertexData("Movie", movie_df) - - // 注意src和dst - // read edges with type "Person"->"PRODUCED"->"Movie" from Neo4j as a DataFrame - val produced_edge_df = spark.read.format("org.neo4j.spark.DataSource") - .option("query", "MATCH (a:Person)-[r:PRODUCED]->(b:Movie) return a.name as src, b.title as dst") - .load() - // put into writer, source vertex label is "Person", edge label is "PRODUCED" - // target vertex label is "Movie" - writer.PutEdgeData(("Person", "PRODUCED", "Movie"), produced_edge_df) - - // read edges with type "Person"->"ACTED_IN"->"Movie" from Neo4j as a DataFrame - val acted_in_edge_df = spark.read.format("org.neo4j.spark.DataSource") - .option("query", "MATCH (a:Person)-[r:ACTED_IN]->(b:Movie) return a.name as src, b.title as dst") - .load() - // put into writer, source vertex label is "Person", edge label is "ACTED_IN" - // target vertex label is "Movie" - writer.PutEdgeData(("Person", "ACTED_IN", "Movie"), acted_in_edge_df) - - // read edges with type "Person"->"DIRECTED"->"Movie" from Neo4j as a DataFrame - val directed_edge_df = spark.read.format("org.neo4j.spark.DataSource") - .option("query", "MATCH (a:Person)-[r:DIRECTED]->(b:Movie) return a.name as src, b.title as dst") - .load() - // put into writer, source vertex label is "Person", edge label is "DIRECTED" - // target vertex label is "Movie" - writer.PutEdgeData(("Person", "DIRECTED", "Movie"), directed_edge_df) - - // read edges with type "Person"->"FOLLOWS"->"Person" from Neo4j as a DataFrame - val follows_edge_df = spark.read.format("org.neo4j.spark.DataSource") - .option("query", "MATCH (a:Person)-[r:FOLLOWS]->(b:Person) return a.name as src, b.name as dst") - .load() - // put into writer, source vertex label is "Person", edge label is "FOLLOWS" - // target vertex label is "Person" - writer.PutEdgeData(("Person", "FOLLOWS", "Person"), follows_edge_df) - - // read edges with type "Person"->"REVIEWED"->"Movie" from Neo4j as a DataFrame - val reviewed_edge_df = spark.read.format("org.neo4j.spark.DataSource") - .option("query", "MATCH (a:Person)-[r:REVIEWED]->(b:Movie) return a.name as src, b.title as dst, r.rating as rating, r.summary as summary") - .load() - // put into writer, source vertex label is "Person", edge label is "REVIEWED" - // target vertex label is "Movie" - writer.PutEdgeData(("Person", "REVIEWED", "Movie"), reviewed_edge_df) - - // read edges with type "Person"->"WROTE"->"Movie" from Neo4j as a DataFrame - val wrote_edge_df = spark.read.format("org.neo4j.spark.DataSource") - .option("query", "MATCH (a:Person)-[r:WROTE]->(b:Movie) return a.name as src, b.title as dst") - .load() - // put into writer, source vertex label is "Person", edge label is "WROTE" - // target vertex label is "Movie" - writer.PutEdgeData(("Person", "WROTE", "Movie"), wrote_edge_df) - } -} diff --git a/demo/Neo4jToGraphAr/neo4j_export.md b/demo/Neo4jToGraphAr/neo4j_export.md deleted file mode 100644 index 676f413..0000000 --- a/demo/Neo4jToGraphAr/neo4j_export.md +++ /dev/null @@ -1,79 +0,0 @@ -# neo4j导出流程记录 - -## 利用spark将数据在neo4j和GraphAr之间导入导出 - -### 环境搭建 - -Linux 安装git curl java-11和maven, maven配置文件加入mirror镜像 - -### 项目安装 - -拉取GraphAr源代码,编译spark目录下的maven项目, 得到`graphar-0.1.0-SNAPSHOT-shaded.jar`文件 - -下载neo4j-4.4.x, spark-3.2.4, 添加环境变量 - -命令如下: - -```bash -git clone https://github.com/alibaba/GraphAr.git -cd GraphAr -git submodule update --init -cd spark -mvn clean package -DskipTests # 编译不包含测试的代码 确保maven已经有镜像 - -curl https://mirrors.tuna.tsinghua.edu.cn/apache/spark/spark-3.2.4/spark-3.2.4-bin-hadoop3.2.tgz | tar -xz -C ${HOME}/ -export SPARK_HOME="${HOME}/spark-3.2.4-bin-hadoop3.2" -export PATH="${SPARK_HOME}/bin":"${PATH}" - -curl https://dist.neo4j.org/neo4j-community-4.4.23-unix.tar.gz | tar -xz -C ${HOME}/ -export NEO4J_HOME="${HOME}/neo4j-community-4.4.23" -export PATH="${NEO4J_HOME}/bin":"${PATH}" - -# 环境变量也可以加入.bashrc中 -``` - -### 运行neo4j导出到GraphAr的示例 - -neo4j的连接信息已经被写在源代码中, 通过环境变量配置neo4j账号密码 - -加载开源数据集`movies-43.dump`文件, 启动neo4j -导入GraphAr的脚本`scripts/run-neo4j2graphar.sh`已经写好, 里面有一些配置信息, 包含**chunk_size**和**file_type**,用**spark_submit**工具运行 - -导入的文件存储在`/tmp/graphar/neo4j2graphar`中, 可以在脚本文件中修改导入文件地址 - -```bash -export NEO4J_USR="neo4j" -export NEO4J_PWD="neo4j" - -curl https://raw.githubusercontent.com/neo4j-graph-examples/movies/main/data/movies-43.dump -o ${NEO4J_HOME}/movies-43.dump -neo4j-admin load --from ${NEO4J_HOME}/movies-43.dump --database=neo4j -neo4j start - -scripts/run-neo4j2graphar.sh -``` - -### 运行GraphAr导出到neo4j的示例 - -用cypher_shell清空已有数据 - -运行导出文件脚本`scripts/run-graphar2neo4j.sh` - -```bash -echo "match (a) -[r] -> () delete a, r;match (a) delete a;" | cypher-shell -u ${NEO4J_USR} -p ${NEO4J_PWD} -d neo4j --format plain - -scripts/run-graphar2neo4j.sh -``` - -## 总结 - -### 拓展 - -可以修改和拓展的地方, 可以编写自己的Scala文件编译好给spark_submit运行, 设置class入口即可,也可以参考test进行测试 - -```bash -# 代码待补充 -``` - -### Next - -得到的文件有一个总的结构信息yaml文件,可以用spark和c++解析,下一步可以接着这个c++接口以及spark接口继续往下做 diff --git a/demo/Neo4jToGraphAr/run-neo4j2graphar.sh b/demo/Neo4jToGraphAr/run-neo4j2graphar.sh deleted file mode 100644 index 094e1f7..0000000 --- a/demo/Neo4jToGraphAr/run-neo4j2graphar.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash - -set -eu - -cur_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -jar_file="${cur_dir}/../target/graphar-0.1.0-SNAPSHOT-shaded.jar" - -vertex_chunk_size=100 -edge_chunk_size=1024 -file_type="parquet" -spark-submit --class com.alibaba.graphar.example.Neo4j2GraphAr ${jar_file} \ - "/home/jasin/neo4j2graphar" ${vertex_chunk_size} ${edge_chunk_size} ${file_type} diff --git a/demo/ShowGraphAr/.gitignore b/demo/ShowGraphAr/.gitignore new file mode 100644 index 0000000..f3d6549 --- /dev/null +++ b/demo/ShowGraphAr/.gitignore @@ -0,0 +1 @@ +/build/ \ No newline at end of file diff --git a/demo/ShowGraphAr/CMakeLists.txt b/demo/ShowGraphAr/CMakeLists.txt index 24183c6..668c3ec 100644 --- a/demo/ShowGraphAr/CMakeLists.txt +++ b/demo/ShowGraphAr/CMakeLists.txt @@ -1,14 +1,23 @@ cmake_minimum_required(VERSION 3.15) -set(GAR_MAJOR_VERSION 0) -set(GAR_MINOR_VERSION 1) -set(GAR_PATCH_VERSION 0) -set(GAR_VERSION ${GAR_MAJOR_VERSION}.${GAR_MINOR_VERSION}.${GAR_PATCH_VERSION}) - -project(graph-archive-demo LANGUAGES C CXX VERSION ${GAR_VERSION}) +project(graph-archive-demo) +find_package(Threads REQUIRED) add_definitions(-DGAR_NAMESPACE=GraphArchive) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -Wall") -add_executable(show show.cpp) -target_link_libraries(show PRIVATE gar stdc++fs) \ No newline at end of file +set(TARGET "show") + +find_package(Arrow REQUIRED) +find_package(ArrowDataset REQUIRED) +find_package(ArrowAcero REQUIRED) +find_package(Parquet REQUIRED) +find_package(gar REQUIRED) +find_package(Threads REQUIRED) + +add_executable(${TARGET} ${TARGET}.cpp) +target_link_libraries(${TARGET} PRIVATE gar stdc++fs Threads::Threads + Arrow::arrow_static + Parquet::parquet_static + ArrowDataset::arrow_dataset_static + ArrowAcero::arrow_acero_static) \ No newline at end of file diff --git a/demo/ShowGraphAr/readme.md b/demo/ShowGraphAr/readme.md index fee2bc2..aff99e5 100644 --- a/demo/ShowGraphAr/readme.md +++ b/demo/ShowGraphAr/readme.md @@ -2,33 +2,38 @@ ## 环境说明 -需要安装GraphAr +1. 需要安装GraphAr, 同时show程序需要安装arrow +2. 数据来自gar-test项目中 -**注**: GraphAr安装过程会使用apache arrow-10 本地如果有安装apache arrow 会同步本地版本号 但是编译时还是从源代码下载使用 下载过程很漫长 可以通过log文件查看进度 如果报连接错误 需要手动下载移动到指定文件夹 - -GraphAr源码安装示例(ubuntu) +## 运行说明 ``` bash - $ git clone https://github.com/alibaba/GraphAr.git - $ cd GraphAr - $ git submodule update --init - $ cd cpp +$ mkdir build && cd build +$ cmake .. +$ make +$ ./show +``` - $ mkdir build-release - $ cd build-release - $ cmake .. - $ make -j8 +1. show展示数据, 分别展示点, 边 +2. 注意源码中的path路径为绝对路径 - $ sudo make install # make install -``` +## 附录 -## 运行说明 +### arrow安装 -``` bash - $ mkdir build && cd build - $ cmake .. - $ make - $ ./show +注意编译选项 + +```bash +wget https://tugraph-web.oss-cn-beijing.aliyuncs.com/tugraph/deps/graphar/apache-arrow-13.0.0.tar.gz \ + && tar xf apache-arrow-13.0.0.tar.gz && cd apache-arrow-13.0.0/cpp && mkdir build && cd build \ + && cmake .. -DARROW_DATASET=ON -DARROW_PARQUET=ON -DARROW_ORC=ON -DARROW_CSV=ON && make \ + && make install ``` -注意修改源码中的path路径,以及graph.yml中的prefix路径信息 +### gar安装(依赖系统安装arrow) + +```bash +wget https://tugraph-web.oss-cn-beijing.aliyuncs.com/tugraph/deps/graphar/GraphAr-0.8.0.tar.gz \ + && tar xf GraphAr-0.8.0.tar.gz && cd GraphAr-0.8.0/cpp && mkdir build && cd build \ + && cmake .. && make && make install +``` diff --git a/demo/ShowGraphAr/show.cpp b/demo/ShowGraphAr/show.cpp index 9e14b69..edaf49e 100644 --- a/demo/ShowGraphAr/show.cpp +++ b/demo/ShowGraphAr/show.cpp @@ -1,35 +1,37 @@ #include #include -#include "gar/graph.h" -#include "gar/graph_info.h" -#include "gar/reader/arrow_chunk_reader.h" -#include "gar/writer/arrow_chunk_writer.h" +#include +#include -int main(int argc, char* argv[]) { - // path指向graph.yaml文件 - std::string path = std::filesystem::current_path().string() + "/../neo4j2graphar/MovieGraph.graph.yml"; - auto graph_info = GAR_NAMESPACE::GraphInfo::Load(path).value(); +int main(int argc, char *argv[]) +{ + // construct graph information from file + std::string demo_path = std::filesystem::current_path().parent_path().parent_path(); + std::string path = demo_path + "/ldbc_sample/parquet/ldbc_sample.graph.yml"; + auto graph_info = GraphArchive::GraphInfo::Load(path).value(); - // 打印节点信息 - auto vertices_collection = GAR_NAMESPACE::ConstructVerticesCollection(graph_info, "Person"); - auto vertices = vertices_collection.value(); + // get vertex information + auto vertices = GraphArchive::VerticesCollection::Make(graph_info, "person").value(); - for (auto it = vertices.begin(); it != vertices.end(); ++it) { - auto vertex = *it; - std::cout << "id=" << vertex.id() - << ", name=" << vertex.property("name").value() - << std::endl; - } + for (auto it = vertices->begin(); it != vertices->end(); ++it) + { + // get a vertex and access its data + auto vertex = *it; + std::cout << "id=" << vertex.property("id").value() << ", firstName=" << vertex.property("firstName").value() << std::endl; + } - // 打印边信息 - auto expect = GAR_NAMESPACE::ConstructEdgesCollection( - graph_info, "Person", "WROTE", "Movie", - GraphArchive::AdjListType::ordered_by_source) - .value(); - auto edges = std::get>(expect); - for (auto it = edges.begin(); it != edges.end(); ++it) { - auto edge = *it; - std::cout << "src=" << edge.source() << ", dst=" << edge.destination() << std::endl; - } + // get edge information + auto edge_info = graph_info->GetEdgeInfo("person", "knows", + "person"); + auto expect = GraphArchive::EdgesCollection::Make(graph_info, "person", "knows", + "person", + GraphArchive::AdjListType::ordered_by_source); + auto edges = expect.value(); + + for (auto it = edges->begin(); it != edges->end(); ++it) + { + // get an edge and access its data + auto edge = *it; + std::cout << "src=" << edge.source() << ", dst=" << edge.destination() << std::endl; + } } \ No newline at end of file diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/adj_list/part0/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/adj_list/part0/chunk0 new file mode 100644 index 0000000..62484d8 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/adj_list/part0/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/adj_list/part1/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/adj_list/part1/chunk0 new file mode 100644 index 0000000..7ea4c35 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/adj_list/part1/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/adj_list/part2/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/adj_list/part2/chunk0 new file mode 100644 index 0000000..69ccff4 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/adj_list/part2/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/adj_list/part3/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/adj_list/part3/chunk0 new file mode 100644 index 0000000..1eb4b63 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/adj_list/part3/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/adj_list/part4/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/adj_list/part4/chunk0 new file mode 100644 index 0000000..2593256 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/adj_list/part4/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/adj_list/part5/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/adj_list/part5/chunk0 new file mode 100644 index 0000000..7e46046 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/adj_list/part5/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/adj_list/part6/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/adj_list/part6/chunk0 new file mode 100644 index 0000000..25b3ee0 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/adj_list/part6/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/adj_list/part7/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/adj_list/part7/chunk0 new file mode 100644 index 0000000..152f01f Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/adj_list/part7/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/adj_list/part8/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/adj_list/part8/chunk0 new file mode 100644 index 0000000..aaf76db Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/adj_list/part8/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/adj_list/part9/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/adj_list/part9/chunk0 new file mode 100644 index 0000000..cba193f Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/adj_list/part9/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/creationDate/part0/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/creationDate/part0/chunk0 new file mode 100644 index 0000000..5849e70 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/creationDate/part0/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/creationDate/part1/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/creationDate/part1/chunk0 new file mode 100644 index 0000000..d58d8ba Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/creationDate/part1/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/creationDate/part2/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/creationDate/part2/chunk0 new file mode 100644 index 0000000..4694675 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/creationDate/part2/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/creationDate/part3/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/creationDate/part3/chunk0 new file mode 100644 index 0000000..3b0095e Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/creationDate/part3/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/creationDate/part4/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/creationDate/part4/chunk0 new file mode 100644 index 0000000..36529f0 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/creationDate/part4/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/creationDate/part5/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/creationDate/part5/chunk0 new file mode 100644 index 0000000..c32b4a3 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/creationDate/part5/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/creationDate/part6/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/creationDate/part6/chunk0 new file mode 100644 index 0000000..ada5a85 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/creationDate/part6/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/creationDate/part7/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/creationDate/part7/chunk0 new file mode 100644 index 0000000..27702e3 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/creationDate/part7/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/creationDate/part8/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/creationDate/part8/chunk0 new file mode 100644 index 0000000..7bda214 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/creationDate/part8/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/creationDate/part9/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/creationDate/part9/chunk0 new file mode 100644 index 0000000..4f305f5 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/creationDate/part9/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/edge_count0 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/edge_count0 new file mode 100644 index 0000000..a14b29a Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/edge_count0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/edge_count1 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/edge_count1 new file mode 100644 index 0000000..d73511e Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/edge_count1 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/edge_count2 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/edge_count2 new file mode 100644 index 0000000..f7171e9 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/edge_count2 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/edge_count3 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/edge_count3 new file mode 100644 index 0000000..6f76d5c Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/edge_count3 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/edge_count4 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/edge_count4 new file mode 100644 index 0000000..0eb221e Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/edge_count4 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/edge_count5 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/edge_count5 new file mode 100644 index 0000000..4555216 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/edge_count5 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/edge_count6 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/edge_count6 new file mode 100644 index 0000000..d0b8edd Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/edge_count6 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/edge_count7 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/edge_count7 new file mode 100644 index 0000000..8639fdc Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/edge_count7 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/edge_count8 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/edge_count8 new file mode 100644 index 0000000..e55ea4b Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/edge_count8 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/edge_count9 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/edge_count9 new file mode 100644 index 0000000..8372230 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/edge_count9 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/offset/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/offset/chunk0 new file mode 100644 index 0000000..9a8483c Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/offset/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/offset/chunk1 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/offset/chunk1 new file mode 100644 index 0000000..6e0aa06 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/offset/chunk1 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/offset/chunk2 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/offset/chunk2 new file mode 100644 index 0000000..aae3b9b Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/offset/chunk2 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/offset/chunk3 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/offset/chunk3 new file mode 100644 index 0000000..60572fc Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/offset/chunk3 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/offset/chunk4 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/offset/chunk4 new file mode 100644 index 0000000..a12c210 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/offset/chunk4 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/offset/chunk5 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/offset/chunk5 new file mode 100644 index 0000000..155e3b8 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/offset/chunk5 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/offset/chunk6 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/offset/chunk6 new file mode 100644 index 0000000..8cfbe00 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/offset/chunk6 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/offset/chunk7 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/offset/chunk7 new file mode 100644 index 0000000..f39f5c9 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/offset/chunk7 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/offset/chunk8 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/offset/chunk8 new file mode 100644 index 0000000..b661217 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/offset/chunk8 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/offset/chunk9 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/offset/chunk9 new file mode 100644 index 0000000..b816f2b Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/offset/chunk9 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/vertex_count b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/vertex_count new file mode 100644 index 0000000..9830fbe Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_dest/vertex_count differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/adj_list/part0/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/adj_list/part0/chunk0 new file mode 100644 index 0000000..2d11539 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/adj_list/part0/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/adj_list/part1/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/adj_list/part1/chunk0 new file mode 100644 index 0000000..5f14014 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/adj_list/part1/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/adj_list/part2/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/adj_list/part2/chunk0 new file mode 100644 index 0000000..d0309a8 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/adj_list/part2/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/adj_list/part2/chunk1 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/adj_list/part2/chunk1 new file mode 100644 index 0000000..109b260 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/adj_list/part2/chunk1 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/adj_list/part3/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/adj_list/part3/chunk0 new file mode 100644 index 0000000..946ee95 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/adj_list/part3/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/adj_list/part4/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/adj_list/part4/chunk0 new file mode 100644 index 0000000..eb1dcf0 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/adj_list/part4/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/adj_list/part5/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/adj_list/part5/chunk0 new file mode 100644 index 0000000..dbc7776 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/adj_list/part5/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/adj_list/part6/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/adj_list/part6/chunk0 new file mode 100644 index 0000000..5aada1c Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/adj_list/part6/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/adj_list/part7/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/adj_list/part7/chunk0 new file mode 100644 index 0000000..2dc13a3 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/adj_list/part7/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/adj_list/part8/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/adj_list/part8/chunk0 new file mode 100644 index 0000000..f91307f Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/adj_list/part8/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/adj_list/part9/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/adj_list/part9/chunk0 new file mode 100644 index 0000000..ffa93f4 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/adj_list/part9/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/creationDate/part0/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/creationDate/part0/chunk0 new file mode 100644 index 0000000..3ec532a Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/creationDate/part0/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/creationDate/part1/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/creationDate/part1/chunk0 new file mode 100644 index 0000000..05e5e10 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/creationDate/part1/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/creationDate/part2/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/creationDate/part2/chunk0 new file mode 100644 index 0000000..1b221d2 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/creationDate/part2/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/creationDate/part2/chunk1 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/creationDate/part2/chunk1 new file mode 100644 index 0000000..89141ff Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/creationDate/part2/chunk1 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/creationDate/part3/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/creationDate/part3/chunk0 new file mode 100644 index 0000000..1e21b86 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/creationDate/part3/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/creationDate/part4/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/creationDate/part4/chunk0 new file mode 100644 index 0000000..8d8aafa Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/creationDate/part4/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/creationDate/part5/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/creationDate/part5/chunk0 new file mode 100644 index 0000000..4cdff17 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/creationDate/part5/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/creationDate/part6/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/creationDate/part6/chunk0 new file mode 100644 index 0000000..0e8c74b Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/creationDate/part6/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/creationDate/part7/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/creationDate/part7/chunk0 new file mode 100644 index 0000000..bb5745c Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/creationDate/part7/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/creationDate/part8/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/creationDate/part8/chunk0 new file mode 100644 index 0000000..4ff4c49 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/creationDate/part8/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/creationDate/part9/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/creationDate/part9/chunk0 new file mode 100644 index 0000000..3f6ecb5 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/creationDate/part9/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/edge_count0 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/edge_count0 new file mode 100644 index 0000000..fd12e09 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/edge_count0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/edge_count1 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/edge_count1 new file mode 100644 index 0000000..a384772 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/edge_count1 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/edge_count2 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/edge_count2 new file mode 100644 index 0000000..255c67e Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/edge_count2 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/edge_count3 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/edge_count3 new file mode 100644 index 0000000..a6f3cd5 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/edge_count3 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/edge_count4 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/edge_count4 new file mode 100644 index 0000000..0aa991b Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/edge_count4 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/edge_count5 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/edge_count5 new file mode 100644 index 0000000..3bf0211 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/edge_count5 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/edge_count6 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/edge_count6 new file mode 100644 index 0000000..ba21895 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/edge_count6 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/edge_count7 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/edge_count7 new file mode 100644 index 0000000..118a695 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/edge_count7 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/edge_count8 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/edge_count8 new file mode 100644 index 0000000..fbb8a16 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/edge_count8 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/edge_count9 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/edge_count9 new file mode 100644 index 0000000..f1dba9d Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/edge_count9 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/offset/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/offset/chunk0 new file mode 100644 index 0000000..3895554 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/offset/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/offset/chunk1 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/offset/chunk1 new file mode 100644 index 0000000..c30a201 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/offset/chunk1 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/offset/chunk2 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/offset/chunk2 new file mode 100644 index 0000000..6e5006e Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/offset/chunk2 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/offset/chunk3 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/offset/chunk3 new file mode 100644 index 0000000..726a0f5 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/offset/chunk3 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/offset/chunk4 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/offset/chunk4 new file mode 100644 index 0000000..04b0b64 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/offset/chunk4 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/offset/chunk5 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/offset/chunk5 new file mode 100644 index 0000000..1d794ef Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/offset/chunk5 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/offset/chunk6 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/offset/chunk6 new file mode 100644 index 0000000..0e3b499 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/offset/chunk6 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/offset/chunk7 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/offset/chunk7 new file mode 100644 index 0000000..b140d83 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/offset/chunk7 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/offset/chunk8 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/offset/chunk8 new file mode 100644 index 0000000..f52149c Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/offset/chunk8 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/offset/chunk9 b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/offset/chunk9 new file mode 100644 index 0000000..52e6c0a Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/offset/chunk9 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/vertex_count b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/vertex_count new file mode 100644 index 0000000..9830fbe Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/ordered_by_source/vertex_count differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/adj_list/part0/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/adj_list/part0/chunk0 new file mode 100644 index 0000000..2d11539 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/adj_list/part0/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/adj_list/part1/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/adj_list/part1/chunk0 new file mode 100644 index 0000000..5f14014 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/adj_list/part1/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/adj_list/part2/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/adj_list/part2/chunk0 new file mode 100644 index 0000000..d0309a8 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/adj_list/part2/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/adj_list/part2/chunk1 b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/adj_list/part2/chunk1 new file mode 100644 index 0000000..109b260 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/adj_list/part2/chunk1 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/adj_list/part3/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/adj_list/part3/chunk0 new file mode 100644 index 0000000..946ee95 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/adj_list/part3/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/adj_list/part4/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/adj_list/part4/chunk0 new file mode 100644 index 0000000..eb1dcf0 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/adj_list/part4/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/adj_list/part5/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/adj_list/part5/chunk0 new file mode 100644 index 0000000..dbc7776 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/adj_list/part5/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/adj_list/part6/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/adj_list/part6/chunk0 new file mode 100644 index 0000000..5aada1c Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/adj_list/part6/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/adj_list/part7/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/adj_list/part7/chunk0 new file mode 100644 index 0000000..2dc13a3 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/adj_list/part7/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/adj_list/part8/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/adj_list/part8/chunk0 new file mode 100644 index 0000000..f91307f Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/adj_list/part8/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/adj_list/part9/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/adj_list/part9/chunk0 new file mode 100644 index 0000000..ffa93f4 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/adj_list/part9/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/creationDate/part0/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/creationDate/part0/chunk0 new file mode 100644 index 0000000..3ec532a Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/creationDate/part0/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/creationDate/part1/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/creationDate/part1/chunk0 new file mode 100644 index 0000000..05e5e10 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/creationDate/part1/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/creationDate/part2/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/creationDate/part2/chunk0 new file mode 100644 index 0000000..1b221d2 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/creationDate/part2/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/creationDate/part2/chunk1 b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/creationDate/part2/chunk1 new file mode 100644 index 0000000..89141ff Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/creationDate/part2/chunk1 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/creationDate/part3/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/creationDate/part3/chunk0 new file mode 100644 index 0000000..1e21b86 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/creationDate/part3/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/creationDate/part4/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/creationDate/part4/chunk0 new file mode 100644 index 0000000..8d8aafa Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/creationDate/part4/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/creationDate/part5/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/creationDate/part5/chunk0 new file mode 100644 index 0000000..4cdff17 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/creationDate/part5/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/creationDate/part6/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/creationDate/part6/chunk0 new file mode 100644 index 0000000..0e8c74b Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/creationDate/part6/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/creationDate/part7/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/creationDate/part7/chunk0 new file mode 100644 index 0000000..bb5745c Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/creationDate/part7/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/creationDate/part8/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/creationDate/part8/chunk0 new file mode 100644 index 0000000..4ff4c49 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/creationDate/part8/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/creationDate/part9/chunk0 b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/creationDate/part9/chunk0 new file mode 100644 index 0000000..3f6ecb5 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/creationDate/part9/chunk0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/edge_count0 b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/edge_count0 new file mode 100644 index 0000000..fd12e09 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/edge_count0 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/edge_count1 b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/edge_count1 new file mode 100644 index 0000000..a384772 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/edge_count1 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/edge_count2 b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/edge_count2 new file mode 100644 index 0000000..255c67e Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/edge_count2 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/edge_count3 b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/edge_count3 new file mode 100644 index 0000000..a6f3cd5 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/edge_count3 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/edge_count4 b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/edge_count4 new file mode 100644 index 0000000..0aa991b Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/edge_count4 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/edge_count5 b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/edge_count5 new file mode 100644 index 0000000..3bf0211 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/edge_count5 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/edge_count6 b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/edge_count6 new file mode 100644 index 0000000..ba21895 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/edge_count6 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/edge_count7 b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/edge_count7 new file mode 100644 index 0000000..118a695 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/edge_count7 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/edge_count8 b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/edge_count8 new file mode 100644 index 0000000..fbb8a16 Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/edge_count8 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/edge_count9 b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/edge_count9 new file mode 100644 index 0000000..f1dba9d Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/edge_count9 differ diff --git a/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/vertex_count b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/vertex_count new file mode 100644 index 0000000..9830fbe Binary files /dev/null and b/demo/ldbc_sample/parquet/edge/person_knows_person/unordered_by_source/vertex_count differ diff --git a/demo/ldbc_sample/parquet/ldbc_sample.graph.yml b/demo/ldbc_sample/parquet/ldbc_sample.graph.yml new file mode 100644 index 0000000..aeb8691 --- /dev/null +++ b/demo/ldbc_sample/parquet/ldbc_sample.graph.yml @@ -0,0 +1,6 @@ +name: ldbc_sample +vertices: + - person.vertex.yml +edges: + - person_knows_person.edge.yml +version: gar/v1 diff --git a/demo/ldbc_sample/parquet/person.vertex.yml b/demo/ldbc_sample/parquet/person.vertex.yml new file mode 100644 index 0000000..4c4a277 --- /dev/null +++ b/demo/ldbc_sample/parquet/person.vertex.yml @@ -0,0 +1,21 @@ +label: person +chunk_size: 100 +prefix: vertex/person/ +property_groups: + - properties: + - name: id + data_type: int64 + is_primary: true + file_type: parquet + - properties: + - name: firstName + data_type: string + is_primary: false + - name: lastName + data_type: string + is_primary: false + - name: gender + data_type: string + is_primary: false + file_type: parquet +version: gar/v1 diff --git a/demo/ldbc_sample/parquet/person_knows_person.edge.yml b/demo/ldbc_sample/parquet/person_knows_person.edge.yml new file mode 100644 index 0000000..90f224e --- /dev/null +++ b/demo/ldbc_sample/parquet/person_knows_person.edge.yml @@ -0,0 +1,37 @@ +src_label: person +edge_label: knows +dst_label: person +chunk_size: 1024 +src_chunk_size: 100 +dst_chunk_size: 100 +directed: false +prefix: edge/person_knows_person/ +adj_lists: + - ordered: false + aligned_by: src + file_type: parquet + property_groups: + - file_type: parquet + properties: + - name: creationDate + data_type: string + is_primary: false + - ordered: true + aligned_by: src + file_type: parquet + property_groups: + - file_type: parquet + properties: + - name: creationDate + data_type: string + is_primary: false + - ordered: true + aligned_by: dst + file_type: parquet + property_groups: + - file_type: parquet + properties: + - name: creationDate + data_type: string + is_primary: false +version: gar/v1 diff --git a/demo/ldbc_sample/parquet/vertex/person/firstName_lastName_gender/chunk0 b/demo/ldbc_sample/parquet/vertex/person/firstName_lastName_gender/chunk0 new file mode 100644 index 0000000..985e3fa Binary files /dev/null and b/demo/ldbc_sample/parquet/vertex/person/firstName_lastName_gender/chunk0 differ diff --git a/demo/ldbc_sample/parquet/vertex/person/firstName_lastName_gender/chunk1 b/demo/ldbc_sample/parquet/vertex/person/firstName_lastName_gender/chunk1 new file mode 100644 index 0000000..03a74e3 Binary files /dev/null and b/demo/ldbc_sample/parquet/vertex/person/firstName_lastName_gender/chunk1 differ diff --git a/demo/ldbc_sample/parquet/vertex/person/firstName_lastName_gender/chunk2 b/demo/ldbc_sample/parquet/vertex/person/firstName_lastName_gender/chunk2 new file mode 100644 index 0000000..45d40c5 Binary files /dev/null and b/demo/ldbc_sample/parquet/vertex/person/firstName_lastName_gender/chunk2 differ diff --git a/demo/ldbc_sample/parquet/vertex/person/firstName_lastName_gender/chunk3 b/demo/ldbc_sample/parquet/vertex/person/firstName_lastName_gender/chunk3 new file mode 100644 index 0000000..fe70911 Binary files /dev/null and b/demo/ldbc_sample/parquet/vertex/person/firstName_lastName_gender/chunk3 differ diff --git a/demo/ldbc_sample/parquet/vertex/person/firstName_lastName_gender/chunk4 b/demo/ldbc_sample/parquet/vertex/person/firstName_lastName_gender/chunk4 new file mode 100644 index 0000000..ba0d861 Binary files /dev/null and b/demo/ldbc_sample/parquet/vertex/person/firstName_lastName_gender/chunk4 differ diff --git a/demo/ldbc_sample/parquet/vertex/person/firstName_lastName_gender/chunk5 b/demo/ldbc_sample/parquet/vertex/person/firstName_lastName_gender/chunk5 new file mode 100644 index 0000000..e1a1f90 Binary files /dev/null and b/demo/ldbc_sample/parquet/vertex/person/firstName_lastName_gender/chunk5 differ diff --git a/demo/ldbc_sample/parquet/vertex/person/firstName_lastName_gender/chunk6 b/demo/ldbc_sample/parquet/vertex/person/firstName_lastName_gender/chunk6 new file mode 100644 index 0000000..9411f59 Binary files /dev/null and b/demo/ldbc_sample/parquet/vertex/person/firstName_lastName_gender/chunk6 differ diff --git a/demo/ldbc_sample/parquet/vertex/person/firstName_lastName_gender/chunk7 b/demo/ldbc_sample/parquet/vertex/person/firstName_lastName_gender/chunk7 new file mode 100644 index 0000000..e96c254 Binary files /dev/null and b/demo/ldbc_sample/parquet/vertex/person/firstName_lastName_gender/chunk7 differ diff --git a/demo/ldbc_sample/parquet/vertex/person/firstName_lastName_gender/chunk8 b/demo/ldbc_sample/parquet/vertex/person/firstName_lastName_gender/chunk8 new file mode 100644 index 0000000..ed93f6e Binary files /dev/null and b/demo/ldbc_sample/parquet/vertex/person/firstName_lastName_gender/chunk8 differ diff --git a/demo/ldbc_sample/parquet/vertex/person/firstName_lastName_gender/chunk9 b/demo/ldbc_sample/parquet/vertex/person/firstName_lastName_gender/chunk9 new file mode 100644 index 0000000..d08125d Binary files /dev/null and b/demo/ldbc_sample/parquet/vertex/person/firstName_lastName_gender/chunk9 differ diff --git a/demo/ldbc_sample/parquet/vertex/person/id/chunk0 b/demo/ldbc_sample/parquet/vertex/person/id/chunk0 new file mode 100644 index 0000000..3f73643 Binary files /dev/null and b/demo/ldbc_sample/parquet/vertex/person/id/chunk0 differ diff --git a/demo/ldbc_sample/parquet/vertex/person/id/chunk1 b/demo/ldbc_sample/parquet/vertex/person/id/chunk1 new file mode 100644 index 0000000..da7ed50 Binary files /dev/null and b/demo/ldbc_sample/parquet/vertex/person/id/chunk1 differ diff --git a/demo/ldbc_sample/parquet/vertex/person/id/chunk2 b/demo/ldbc_sample/parquet/vertex/person/id/chunk2 new file mode 100644 index 0000000..e5955d1 Binary files /dev/null and b/demo/ldbc_sample/parquet/vertex/person/id/chunk2 differ diff --git a/demo/ldbc_sample/parquet/vertex/person/id/chunk3 b/demo/ldbc_sample/parquet/vertex/person/id/chunk3 new file mode 100644 index 0000000..e9c5630 Binary files /dev/null and b/demo/ldbc_sample/parquet/vertex/person/id/chunk3 differ diff --git a/demo/ldbc_sample/parquet/vertex/person/id/chunk4 b/demo/ldbc_sample/parquet/vertex/person/id/chunk4 new file mode 100644 index 0000000..bfa0b97 Binary files /dev/null and b/demo/ldbc_sample/parquet/vertex/person/id/chunk4 differ diff --git a/demo/ldbc_sample/parquet/vertex/person/id/chunk5 b/demo/ldbc_sample/parquet/vertex/person/id/chunk5 new file mode 100644 index 0000000..d047db5 Binary files /dev/null and b/demo/ldbc_sample/parquet/vertex/person/id/chunk5 differ diff --git a/demo/ldbc_sample/parquet/vertex/person/id/chunk6 b/demo/ldbc_sample/parquet/vertex/person/id/chunk6 new file mode 100644 index 0000000..8f7deba Binary files /dev/null and b/demo/ldbc_sample/parquet/vertex/person/id/chunk6 differ diff --git a/demo/ldbc_sample/parquet/vertex/person/id/chunk7 b/demo/ldbc_sample/parquet/vertex/person/id/chunk7 new file mode 100644 index 0000000..be91fdc Binary files /dev/null and b/demo/ldbc_sample/parquet/vertex/person/id/chunk7 differ diff --git a/demo/ldbc_sample/parquet/vertex/person/id/chunk8 b/demo/ldbc_sample/parquet/vertex/person/id/chunk8 new file mode 100644 index 0000000..d679faf Binary files /dev/null and b/demo/ldbc_sample/parquet/vertex/person/id/chunk8 differ diff --git a/demo/ldbc_sample/parquet/vertex/person/id/chunk9 b/demo/ldbc_sample/parquet/vertex/person/id/chunk9 new file mode 100644 index 0000000..36ba2d7 Binary files /dev/null and b/demo/ldbc_sample/parquet/vertex/person/id/chunk9 differ diff --git a/demo/ldbc_sample/parquet/vertex/person/vertex_count b/demo/ldbc_sample/parquet/vertex/person/vertex_count new file mode 100644 index 0000000..9830fbe Binary files /dev/null and b/demo/ldbc_sample/parquet/vertex/person/vertex_count differ diff --git a/process.png b/process.png new file mode 100644 index 0000000..ec30ca2 Binary files /dev/null and b/process.png differ diff --git a/proposal.md b/proposal.md new file mode 100644 index 0000000..56bbf55 --- /dev/null +++ b/proposal.md @@ -0,0 +1,54 @@ +# TuGraph数据迁移项目提案 + +## 方案调研 + +### 其他图数据库导入导出工具调研 + +**目的**:了解较完善的图数据库对其他图数据库数据导入导出的相关工具,参考相关项目构思TuGraph可以采取的方案 + +**结果**:总结表格如下 + +| 图数据库 | 图数据库导入导出or转换迁移工具 | connector支持 | +| ----------- | ------------------------------ | -------------- | +| neo4j | 未发现 | spark | +| NebulaGraph | Exchange(Spark应用) | spark、flink | +| OrientDB | orientdb-neo4j-importer | 未发现 | +| JanusGraph | 未发现 | Hadoop-Gremlin | +| ArangoDB | 未发现 | spark | + +**思考**: + +1. 对于NebulaGraph来说,Exchange工具的Reader和Writer工具对图数据库导入这一块非常方便,可以统一各种数据库的导入,同时具有更好的导入流程,可以作为一个feature +2. 大部分图数据库都支持spark的连接,有一定的工具解析成DataFrame对象,TuGraph可以考虑参考也做一个spark connector, 接入大数据生态 + +### GraphAr调研 + +**目的**:GraphAr目的就是作为图数据结构数据中间存储格式, 参考GraphAr的例子和接口将gar数据导入到tugraph中 + +**结果**:编译GraphAr项目,运行相关例子,打通了neo4j图数据库的导入导出流程,了解了neo4j spark connector工具,以及GraphAr对neo4j数据的解析流程,可以参考其中的c++接口解析数据导入到TuGraph + +### 实现方案调研 + +**目的**:目前已有方案三种,可以调研实现难度和大致情况 + +1. 基于GraphAr项目,根据相关c++接口,做相关数据导入Tugraph的工具,类似lgraph_import +2. 参考其他图数据库实现方式,实现一个TuGraph的Spark Connector +3. 基于DataX项目,做相关工具支持图数据库数据通过DataX导入TuGraph + +**结果**:方案调研如下, 最终选择基于GraphAr进行开发 + +| 方案 | GraphAr | Spark Connector | DataX | +| ---- | ------------------------------------------------------------------------------------------ | ----------------------------------------------------------------------------------------- | ------------------------------------------------------------ | +| 语言 | C++ | Scala/Java | Java | +| 优点 | GraphAr已经部分支持neo4j导入, 并且提供了c++接口, TuGraph可以直接利用相关工具,开发周期较快 | 丰富TuGraph工具集,可以统一到大数据平台,开发更多功能 | 已有TuGraph reader实现,DataX支持neo4j导出,插件开发文档详细 | +| 缺点 | GraphAr生态还不够完善, 对gar数据的解析存在一定的overhead | connector工具较大,需要从零开始实现,开发周期可能较长,可能设计到技术选型问题,未知性较大 | DataX对图数据库支持不够完善,作为一个导入导出工具可能比较重 | + +## 导入工具实现设计 + +### 改进importer + +外部图数据源统一到GraphAr数据中, 同时拓展lgraph_import工具的GraphAr parser功能, 实现lgraph_import可以直接导入gar数据, 并且实现以下功能 + +- 自动解析GraphAr配置生成schema +- 根据主键作为端点构建边 +- 错误数据和配置报错提示 diff --git a/proposal3.md b/proposal3.md deleted file mode 100644 index 5e3272f..0000000 --- a/proposal3.md +++ /dev/null @@ -1,87 +0,0 @@ -# TuGraph数据迁移项目提案 - -**摘要** 项目提案根据任务细分进行拆解,生成粒度较细的解决方案和执行计划,并给出里程碑式小任务完成日期表 - -## 任务前提 - -- [x] 在系统上搭建开发环境 -- [x] 安装软件,运行项目,运行demo,熟悉TuGraph基本使用 -- [x] fork项目git,打通贡献流程 - -## 任务一 支持图数据库导入导出 - -### 展开GraphAr调研 - -**目的**:熟悉GraphAr对图数据库neo4j导入导出的工作流程,参考GraphAr的例子来设计导入导出工具 -**结果**:编译GraphAr项目,运行相关例子,打通了neo4j图数据库的导入导出流程,了解了neo4j spark connector工具,以及GraphAr对neo4j数据的解析流程,总结成文档,可以参考其中的c++接口解析数据导入到TuGraph - -### 其他图数据库导入导出工具调研 - -**目的**:了解较完善的图数据库对其他图数据库数据导入导出的相关工具,参考相关项目构思TuGraph可以采取的方案 -**结果**:总结表格如下 - -| 图数据库 | 图数据库导入导出or转换迁移工具 | connector支持 | 备注 | -| ----------- | ------------------------------ | -------------- | ------ | -| neo4j | 未发现 | spark | 已实践 | -| NebulaGraph | Exchange(Spark应用) | spark、flink | 未实践 | -| OrientDB | orientdb-neo4j-importer | 未发现 | 未实践 | -| JanusGraph | 未发现 | Hadoop-Gremlin | 未实践 | -| ArangoDB | 未发现 | spark | 未实践 | - -**思考**: - -1. 对于NebulaGraph来说,Exchange工具的Reader和Writer工具对图数据库导入这一块非常方便,可以统一各种数据库的导入,同时可以更好地导入流程,可以作为一个feature -2. 大部分图数据库都支持spark的连接,有一定的工具解析成DataFrame对象,TuGraph可以考虑参考也做一个spark connector - -### 实现方案调研 - -**目的**:目前已有方案三种,可以调研实现难度和大致情况,然后根据自己能力进行选择 - -1. 基于GraphAr项目,根据相关c++接口,做相关数据导入Tugraph的工具,类似lgraph_import -2. 参考其他图数据库实现方式,实现一个TuGraph的Spark Connector -3. 基于DataX项目,做相关工具支持图数据库数据通过DataX导入TuGraph - -**结果**:方案调研如下 - -| 方案 | GraphAr | Spark Connector | DataX | -| ---- | --------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------- | ------------------------------------------------------------ | -| 语言 | C++ | Scala/Java | Java | -| 优点 | 已有部分相关经验,相关问题可以答疑,TuGraph可以直接利用相关工具,开发周期较快 | 丰富TuGraph工具集,可以统一到大数据平台,开发更多功能 | 已有TuGraph reader实现,DataX支持neo4j导出,插件开发文档详细 | -| 缺点 | CMAKE,gtest不熟,大型c++项目经验不足,需要花时间学习,开发过程可能会抛出较多问题 | connector工具较大,需要从零开始实现,开发周期可能较长,可能设计到技术选型问题,未知性较大 | DataX对图数据库支持不够完善,作为一个导入导出工具可能比较重 | - -**思考**:三种方案对于我来说,最难的都是开发环境的搭建,方案选取应该能够尽快启动,搭好写代码架子,其中的语言逻辑和代码的编写应该没有问题。经过综合评估,优先选择GraphAr方案。 - -### GraphAr方案讨论 - -**目的**: 根据自己技术栈和时间, 制定GraphAr导出到TuGraph的实现方案 -**前置条件**: - -1. 补充学习cmake, 可以阅读CmakeLists.txt, 会复用以及编写cmake语句 -2. 跑通GraphAr c++ example, 能编写自己的demo程序 -3. 阅读TuGraph importer源码, 总结自己的理解和问题 - -**讨论**: - -1. demo展示 + importer理解 -2. 自己遇到的问题 -3. 实现方案讨论 - -**结果**:阅读v3源码,参考v3的csv parser和jsonline parser制作一个graphar的parser - -## TimeLine - -时间线以及任务总结 - -| 时间 | 内容 | 里程碑 | -| ----------- | -------------------------------- | ------------------------------------------------------ | -| 6.28-7.7 | 研究数据库相关文档, 制定基础方案 | ✔️完成[任务前提](#任务前提) | -| 7.7 双周会 | 评估方案, 详细讲解任务 | 调整任务顺序 | -| 7.7-7.12 | 使用GraphAr, 总结问题 | ✔️完成[GraphAr调研](#展开graphar调研) | -| 7.12 交流会 | 探讨GraphAr问题 | 建立交流群 | -| 7.12-7.21 | 调研相关工具 | ✔️完成[其他图数据库调研](#其他图数据库导入导出工具调研) | -| 7.21 双周会 | 细化方案, 整合proposal | | -| 7.21-7.25 | 调研详细方案 | ✔️完成[细化方案调研](#实现方案调研) | -| 7.25-8.4 | 转战c++, 学习cmake | | -| 8.4 双周会 | 评估进度, 请教阅读源码方式 | | -| 8.4-8.7 | 优化demo, 读懂impoter, 准备讨论 | | -| 8.7 | 讨论方案 | ✔️完成[GraphAr方案讨论](#graphar方案讨论) |