Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 0 additions & 7 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,3 @@
# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
hs_err_pid*
replay_pid*

# irrelevant files
*.pdf
*.html

# data
/demo/neo4j2graphar/
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "demo/Neo4j2GraphAr/GraphAr"]
path = demo/Neo4j2GraphAr/GraphAr
url = https://github.com/alibaba/GraphAr.git
57 changes: 57 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1 +1,58 @@
# data-migration

![导入流程](process.png)

## 利用spark将数据从neo4j导出到GraphAr

### 环境搭建

- 操作系统: `Linux`, 已在`centos7`和`ubuntu20`上进行测试
- 安装`git`, `curl`, `java-11`, `maven`, 配置JAVA_HOME路径, maven配置文件加入mirror镜像

### 软件安装

此处将软件安装在`HOME`路径, 并添加临时环境变量, 具体可根据实际需要调整, 软件版本也可以按需调整

#### spark3.2.4(已有可忽略)

```bash
curl https://mirrors.tuna.tsinghua.edu.cn/apache/spark/spark-3.2.4/spark-3.2.4-bin-hadoop3.2.tgz | tar -xz -C ${HOME}/
export SPARK_HOME="${HOME}/spark-3.2.4-bin-hadoop3.2"
export PATH="${SPARK_HOME}/bin":"${PATH}"
```

#### neo4j-community-4.4.23(已有可忽略)

```bash
curl https://dist.neo4j.org/neo4j-community-4.4.23-unix.tar.gz | tar -xz -C ${HOME}/
export NEO4J_HOME="${HOME}/neo4j-community-4.4.23"
export PATH="${NEO4J_HOME}/bin":"${PATH}"
```

#### GraphAr spark

```bash
git clone https://github.com/alibaba/GraphAr.git
cd GraphAr/spark
mvn clean package -DskipTests
```

### 使用说明

1. 参考`GraphAr/spark/import/neo4j.json`编写导出信息, 包含生成gar的配置, neo4j连接的配置, 导出schema的配置,按字段填写

2. `./neo4j.sh neo4j.json`

## GraphAr数据导入tugraph-db

### 导入说明

1. 使用lgraph_import工具, 设置配置文件为gar数据中的XXX.graph.yml, 注意为绝对路径, 设置gar=true即可
2. 示例
`./lgraph_import -c /root/movie-gar/movie.graph.yml --gar true`

### 注意

1. GraphAr数据中的yml配置文件可以修改, 比如可以在graph.yml中删除指定的vertex或者edge, 也可以在对应的vertex.yml用prefix指定数据路径,数据存储路径可以与配置存储路径分离。
2. GraphAr支持相同的edge_label可以具有不同的properties, 这类数据在tugraph-db中不支持, 导入过程中会报错指出对应edge_label, 可以根据报错信息在GraphAr数据中做修改

1 change: 1 addition & 0 deletions demo/Neo4j2GraphAr/GraphAr
Submodule GraphAr added at 39e1d6
27 changes: 27 additions & 0 deletions demo/Neo4j2GraphAr/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# 默认示例运行说明

GraphAr提供了一些shell脚本, 可以安装neo4j, spark, 以及运行导入程序

下面是一个运行示例

```bash
cd GraphAr/spark
scripts/get-spark-to-home.sh
export SPARK_HOME="${HOME}/spark-3.2.2-bin-hadoop3.2"
export PATH="${SPARK_HOME}/bin":"${PATH}"

scripts/get-neo4j-to-home.sh
export NEO4J_HOME="${HOME}/neo4j-community-4.4.23"
export PATH="${NEO4J_HOME}/bin":"${PATH}"
neo4j-admin set-initial-password neo4j

scripts/deploy-neo4j-movie-data.sh

scripts/build.sh

export NEO4J_USR="neo4j"
export NEO4J_PWD="neo4j"

cd import
./neo4j.sh neo4j.json
```
120 changes: 0 additions & 120 deletions demo/Neo4jToGraphAr/Neo4j2GraphAr.scala

This file was deleted.

79 changes: 0 additions & 79 deletions demo/Neo4jToGraphAr/neo4j_export.md

This file was deleted.

12 changes: 0 additions & 12 deletions demo/Neo4jToGraphAr/run-neo4j2graphar.sh

This file was deleted.

1 change: 1 addition & 0 deletions demo/ShowGraphAr/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/build/
25 changes: 17 additions & 8 deletions demo/ShowGraphAr/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,14 +1,23 @@
cmake_minimum_required(VERSION 3.15)

set(GAR_MAJOR_VERSION 0)
set(GAR_MINOR_VERSION 1)
set(GAR_PATCH_VERSION 0)
set(GAR_VERSION ${GAR_MAJOR_VERSION}.${GAR_MINOR_VERSION}.${GAR_PATCH_VERSION})

project(graph-archive-demo LANGUAGES C CXX VERSION ${GAR_VERSION})
project(graph-archive-demo)

find_package(Threads REQUIRED)
add_definitions(-DGAR_NAMESPACE=GraphArchive)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -Wall")

add_executable(show show.cpp)
target_link_libraries(show PRIVATE gar stdc++fs)
set(TARGET "show")

find_package(Arrow REQUIRED)
find_package(ArrowDataset REQUIRED)
find_package(ArrowAcero REQUIRED)
find_package(Parquet REQUIRED)
find_package(gar REQUIRED)
find_package(Threads REQUIRED)

add_executable(${TARGET} ${TARGET}.cpp)
target_link_libraries(${TARGET} PRIVATE gar stdc++fs Threads::Threads
Arrow::arrow_static
Parquet::parquet_static
ArrowDataset::arrow_dataset_static
ArrowAcero::arrow_acero_static)
Loading