From 1a384e92b34ab62e1febf1dd92c27f7de5387bae Mon Sep 17 00:00:00 2001 From: Stephen Lawrence Date: Thu, 13 Mar 2025 20:13:26 +0000 Subject: [PATCH] experimental: try out an A2B Apache IoTDB streaming sync Setup up a docker environment for a streaming sync between two Apache IoTDB instances. One representing an in-vehicle HPC VSS data store, the other a cloud VSS data store. The latter could also be an in-vehicle telematics unit that used AMQP or MQTT for the cloud connection. Signed-off-by: Stephen Lawrence --- .../data-store-sync-iotdb/docker-compose.yml | 44 ++++++ .../down-sample-and-sync.md | 134 ++++++++++++++++++ 2 files changed, 178 insertions(+) create mode 100644 examples/data-store-sync-iotdb/docker-compose.yml create mode 100644 examples/data-store-sync-iotdb/down-sample-and-sync.md diff --git a/examples/data-store-sync-iotdb/docker-compose.yml b/examples/data-store-sync-iotdb/docker-compose.yml new file mode 100644 index 0000000..fdc9b2f --- /dev/null +++ b/examples/data-store-sync-iotdb/docker-compose.yml @@ -0,0 +1,44 @@ +name: cdsp-data-store-sync-iotdb +services: + + vehicle-hpc-iotdb-service: + extends: + file: ../../docker/docker-compose-cdsp.yml + service: iotdb-service + hostname: hpc-iotb-service + container_name: hpc-iotdb-service + build: + args: + SERVICE_HOSTNAME: hpc-iotdb-service + environment: + - cn_internal_address=hpc-iotdb-service + - cn_seed_config_node=hpc-iotdb-service:10710 + - dn_rpc_address=hpc-iotdb-service + - dn_internal_address=hpc-iotdb-service + - dn_seed_config_node=hpc-iotdb-service:10710 + volumes: + - ./hpc-iotdb-data:/iotdb/data + - ./hpc-iotdb-logs:/iotdb/logs +# - ./hpc-iotdb-conf:/iotdb/conf + + cloud-iotdb-service: + extends: + file: ../../docker/docker-compose-cdsp.yml + service: iotdb-service + hostname: cloud-iotdb-service + container_name: cloud-iotdb-service + build: + args: + SERVICE_HOSTNAME: cloud-iotdb-service + ports: !override + - "6666:6667" + environment: + - cn_internal_address=cloud-iotdb-service + - cn_seed_config_node=cloud-iotdb-service:10710 + - dn_rpc_address=cloud-iotdb-service + - dn_internal_address=cloud-iotdb-service + - dn_seed_config_node=cloud-iotdb-service:10710 + volumes: + - ./cloud-iotdb-data:/iotdb/data + - ./cloud-iotdb-logs:/iotdb/logs +# - ./cloud-iotdb-conf:/iotdb/conf diff --git a/examples/data-store-sync-iotdb/down-sample-and-sync.md b/examples/data-store-sync-iotdb/down-sample-and-sync.md new file mode 100644 index 0000000..bc593f4 --- /dev/null +++ b/examples/data-store-sync-iotdb/down-sample-and-sync.md @@ -0,0 +1,134 @@ + +# Automatically streaming/synchronising VSS data between two data stores +These are basic notes to demonstrate using the example docker compose to sync data between two Apache IoTDB instances. + +System context: ++ Data store 1 is Vehicle HPC DB. ++ Data store 2 is Cloud DB. + +Apache IoTDB v1.3.3 Data Sync documentation can be found [here](https://iotdb.apache.org/UserGuide/V1.3.x/User-Manual/Data-Sync_apache.html) + +## Setup +1. Start the Playground docker environment + +Start two docker services `vehicle-hpc-iotdb-service` and `cloud-iotdb-service` each of which contain a (standalone) Apache IoTDB instance. +``` +docker compose up -d +``` + +2. Connect to IoTDB client in HPC container + +Open new terminal then: +``` +docker exec -ti hpc-iotdb-service /iotdb/sbin/start-cli.sh -h hpc-iotdb-service +``` + +3. Connect to IoTDB client in Cloud container + +Open new terminal then: +``` +docker exec -ti cloud-iotdb-service /iotdb/sbin/start-cli.sh -h cloud-iotdb-service +``` + +## Create some data to sync in HPC DB +Here I reuse data from the CDSP down-sample example, but you can use your own. + +1. Create database in HPC DB + +In the IoTDB client in the HPC container: +```sql +create database root.test2 +``` +2. Copy dataset into HPC DB volume for importing + +From host terminal: +``` +sudo cp ../vehicle-speed-downsample-iotdb/vehicle_speed_rl_dataset.csv hpc-iotdb-data/ +``` +3. Import data into HPC + +From host terminal: +``` +docker exec -ti hpc-iotdb-service /iotdb/tools/import-data.sh -h hpc-iotdb-service -p 6667 -u root -pw root -s /iotdb/data/vehicle_speed_rl_dataset.csv +``` +Data will be in the timeseries `` root.test2.vin123test.`Vehicle.Speed` `` + +4. (Optional) Downsample the data in HPC DB into new timeseries + +**Setup**: The down-sample tutorial uses the DB function `sample` from the optional IoTDB Data Quality library. To be able to call it we must do a one time registration of the library functions. This can be done from the host terminal by executing the supplied script (detail in the tutorial [here](https://github.com/COVESA/cdsp/tree/main/examples/vehicle-speed-downsample-iotdb#data-quality-library-setup)): +``` +docker exec -ti hpc-iotdb-service /iotdb/sbin/register-UDF.sh +``` + +**Down-sample**: In HPC client down sample the `Vehicle.Speed` data into a new timeseries `root.test2.vin123test.speed_upload` as if we were doing data reduction: +```sql +select sample(`Vehicle.Speed`,'method'='triangle','k'='100') into root.test2.vin123test(speed_upload) from root.test2.vin123test +``` + +## HPC to Cloud sync + +### Sync everything +1. Create pipe on HPC side towards Cloud DB + +In HPC client: +```sql +create pipe V2C +WITH SOURCE ( + 'source'= 'iotdb-source', + 'realtime.mode' = 'stream' +) +with SINK ( + 'sink'='iotdb-thrift-async-sink', + 'node-urls' = 'cloud-iotdb-service:6667', +) +``` +Pipe will execute immediately. + +Confirm the timeseries in the DB arrived in the Cloud DB CLI: +``` +show timeseries +``` + + +### Sync only relevant data +Emulate the syncing only of relevant data by first processing in-vehicle and only synching the result. This also emulates the concept of data reduction for reduced transmission costs and network traffic. + +What timeseries are synched is controlled using the `path` variable of the `source` plugin in the `pipe`. See IoTDB documentation for details, e.g. wildcards. + +1. Create pipe on HPC DB to only sync data prepared for upload in the timeseries +`root.test2.vin123test.speed_upload` + +In the HPC DB Client: +```sql +create pipe V2CPartial +WITH SOURCE ( + 'source'= 'iotdb-source', + 'realtime.mode' = 'stream', + 'path'='root.test2.vin123test.speed_upload' +) +with SINK ( + 'sink'='iotdb-thrift-async-sink', + 'node-urls' = 'cloud-iotdb-service:6667', +) +``` + +Confirm the timeseries arrived in the Cloud DB client: +```sql +select count(speed_upload) from root.** +``` +### Pipe admin + +Stop pipe: +``` +stop pipe +``` + +Start pipe: +``` +start pipe +``` + +Show pipes: +``` +show pipes +``` \ No newline at end of file