This project aims to use Flink to operate JanusGraph(Hbase Backend) simply and efficiently.
Using Map to store properties will be more flexible than the current
connector.
-- register a vertex table in FlinkSQL
CREATE TABLE t_jg_vertex (
`id` ROW <`label` STRING, `ident` STRING>, -- identify a unique vertex
`label` STRING, -- graph's label
`value` STRING, -- graph's value property
`create_at` TIMESTAMP_LTZ(3), -- graph's create_at property
`update_at` TIMESTAMP_LTZ(3) -- graph's update_at property
) WITH (
'connector' = 'janusgraph',
'hosts' = '0.0.0.0,0.0.0.0',
'port' = '2181',
'backend-type' = 'hbase',
'table-name' = 'jg_table',
'table-type' = 'vertex',
'sink.flush-interval' = '10s',
'sink.batch-size' = '2000',
'sink.non-update-columns' = 'create_at,..',
'sink.parallelism' = '2',
...
);
-- register a edge table in FlinkSQL
CREATE TABLE t_jg_edge (
`id` ROW<label STRING,..>, -- id + in_v + out_v can identify a unique edge
`in_v` ROW<label STRING,..>,
`out_v` ROW<label STRING,..>,
`label` STRING,
`value` STRING,
) WITH (
'connector' = 'janusgraph',
'hosts' = '0.0.0.0,0.0.0.0',
'port' = '2181',
'backend-type' = 'hbase',
'table-name' = 'jg_table',
'table-type' = 'edge',
...
)
-- 1. id, label, in_v, out_v are internally defined columns.
-- 2. Type of id, in_v, out_v can be BIGINT or ROW.| Option | Required | Default | Type | Description |
|---|---|---|---|---|
| connector | true | none | String | Specify which connector to use, here should be 'janusgraph'. |
| factory | false | org.janusgraph.core.JanusGraphFactory | String | The Factory for creating a JanusGraph instance, equal to JanusGraph config gremlin.graph |
| hosts | true | none | String | A comma-separated list of storage backend servers, equal to JanusGraph config storage.hostname. |
| port | true | none | Integer | The port on which to connect to storage backend servers, equal to JanusGraph config storage.port. |
| backend-type | false | hbase | String | Type of storage backend, currently only supports hbase. |
| table-name | true | none | String | The JanusGraph table name, currently equal to JanusGraph config storage.hbase.table. |
| table-type | true | none | String | The type of current table, available: vertex, edge. |
| username | false | none | String | Username to authenticate against storage backend, equal to JanusGraph config storage.username. |
| password | false | none | String | Password to authenticate against storage backend, equal to JanusGraph config storage.password. |
| sink.parallelism | false | none | Integer | Defines a custom parallelism for the sink. |
| sink.batch-size | false | 1000 | Integer | The max flush size, over this number of records, will flush data. |
| sink.flush-interval | false | 1s | Integer | The flush interval mills, over this time, asynchronous threads will flush data. |
| sink.max-retries | false | 3 | Integer | The max retry times if writing records to JanusGraph failed. |
| sink.non-update-columns | false | none | String | A comma-separated list of columns that are not allowed to be updated. |
| properties.* | optional | none | String | This can set and pass JanusGraph configurations. |
| Flink Type | JanusGraph Type |
|---|---|
| CHAR / VARCHAR / STRING | String |
| BOOLEAN | Boolean |
| BINARY / VARBINARY / BYTES | byte[] |
| TINYINT | Byte |
| SMALLINT | Short |
| INTEGER / INTERVAL_YEAR_MONTH | Integer |
| BIGINT / INTERVAL_DAY_TIME | Long |
| FLOAT | Float |
| DOUBLE | Double |
| DATE / TIME / TIMESTAMP / TIMESTAMP_LTZ | Date |
| ARRAY | Array |
| MAP | Map |
| ROW | Supported by columns of id, in_v, out_v |
| DECIMAL | Not supported |
| MULTISET | Not supported |
| RAW | Not supported |