diff --git a/src/.vuepress/sidebar/V1.3.x/en.ts b/src/.vuepress/sidebar/V1.3.x/en.ts index fbb377dde..37b7f5ce4 100644 --- a/src/.vuepress/sidebar/V1.3.x/en.ts +++ b/src/.vuepress/sidebar/V1.3.x/en.ts @@ -98,7 +98,8 @@ export const enSidebar = { { text: 'Command Line Interface (CLI)', link: 'CLI' }, { text: 'Monitor Tool', link: 'Monitor-Tool_apache' }, { text: 'Benchmark Tool', link: 'Benchmark' }, - { text: 'Maintenance Tool', link: 'Maintenance-Tool_apache' }, + { text: 'Maintenance Tool', link: 'Maintenance-Tool_apache' }, + { text: 'Data Import', link: 'Data-Import-Tool' }, { text: 'Data Export', link: 'Data-Export-Tool' }, ], }, diff --git a/src/.vuepress/sidebar/V1.3.x/zh.ts b/src/.vuepress/sidebar/V1.3.x/zh.ts index 919dc80c7..76e5fe9b3 100644 --- a/src/.vuepress/sidebar/V1.3.x/zh.ts +++ b/src/.vuepress/sidebar/V1.3.x/zh.ts @@ -99,7 +99,9 @@ export const zhSidebar = { { text: '监控工具', link: 'Monitor-Tool_apache' }, { text: '测试工具', link: 'Benchmark' }, { text: '运维工具', link: 'Maintenance-Tool_apache' }, + { text: '数据导入', link: 'Data-Import-Tool' }, { text: '数据导出', link: 'Data-Export-Tool' }, + ], }, { diff --git a/src/.vuepress/sidebar_timecho/V1.3.x/en.ts b/src/.vuepress/sidebar_timecho/V1.3.x/en.ts index d887f6ef5..1e7398721 100644 --- a/src/.vuepress/sidebar_timecho/V1.3.x/en.ts +++ b/src/.vuepress/sidebar_timecho/V1.3.x/en.ts @@ -108,6 +108,7 @@ export const enSidebar = { { text: 'Monitor Tool', link: 'Monitor-Tool_timecho' }, { text: 'Benchmark Tool', link: 'Benchmark' }, { text: 'Maintenance Tool', link: 'Maintenance-Tool_timecho' }, + { text: 'Data Import', link: 'Data-Import-Tool' }, { text: 'Data Export', link: 'Data-Export-Tool' }, ], }, diff --git a/src/.vuepress/sidebar_timecho/V1.3.x/zh.ts b/src/.vuepress/sidebar_timecho/V1.3.x/zh.ts index 0eca851e2..cfda0f27f 100644 --- a/src/.vuepress/sidebar_timecho/V1.3.x/zh.ts +++ b/src/.vuepress/sidebar_timecho/V1.3.x/zh.ts @@ -108,6 +108,7 @@ export const zhSidebar = { { text: '监控工具', link: 'Monitor-Tool_timecho' }, { text: '测试工具', link: 'Benchmark' }, { text: '运维工具', link: 'Maintenance-Tool_timecho' }, + { text: '数据导入', link: 'Data-Import-Tool' }, { text: '数据导出', link: 'Data-Export-Tool' }, ], }, diff --git a/src/UserGuide/Master/Reference/Common-Config-Manual.md b/src/UserGuide/Master/Reference/Common-Config-Manual.md index 97a9414fe..4a4f4ff33 100644 --- a/src/UserGuide/Master/Reference/Common-Config-Manual.md +++ b/src/UserGuide/Master/Reference/Common-Config-Manual.md @@ -2162,3 +2162,50 @@ Different configuration parameters take effect in the following three ways: | Effective | hot-load | +#### TsFile Active Listening&Loading Function Configuration + +* load\_active\_listening\_enable + +|Name| load\_active\_listening\_enable | +|:---:|:---| +|Description| Whether to enable the DataNode's active listening and loading of tsfile functionality (default is enabled). | +|Type| Boolean | +|Default| true | +|Effective| hot-load | + +* load\_active\_listening\_dirs + +|Name| load\_active\_listening\_dirs | +|:---:|:---| +|Description| The directories to be listened to (automatically includes subdirectories of the directory), if there are multiple, separate with “,”. The default directory is ext/load/pending (supports hot loading). | +|Type| String | +|Default| ext/load/pending | +|Effective|hot-load| + +* load\_active\_listening\_fail\_dir + +|Name| load\_active\_listening\_fail\_dir | +|:---:|:---| +|Description| The directory to which files are transferred after the execution of loading tsfile files fails, only one directory can be configured. | +|Type| String | +|Default| ext/load/failed | +|Effective|hot-load| + +* load\_active\_listening\_max\_thread\_num + +|Name| load\_active\_listening\_max\_thread\_num | +|:---:|:---| +|Description| The maximum number of threads to perform loading tsfile tasks simultaneously. The default value when the parameter is commented out is max(1, CPU core count / 2). When the user sets a value not in the range [1, CPU core count / 2], it will be set to the default value (1, CPU core count / 2). | +|Type| Long | +|Default| max(1, CPU core count / 2) | +|Effective|Effective after restart| + + +* load\_active\_listening\_check\_interval\_seconds + +|Name| load\_active\_listening\_check\_interval\_seconds | +|:---:|:---| +|Description| Active listening polling interval in seconds. The function of actively listening to tsfile is achieved by polling the folder. This configuration specifies the time interval between two checks of load_active_listening_dirs, and the next check will be executed after load_active_listening_check_interval_seconds seconds of each check. When the user sets the polling interval to less than 1, it will be set to the default value of 5 seconds. | +|Type| Long | +|Default| 5| +|Effective|Effective after restart| \ No newline at end of file diff --git a/src/UserGuide/Master/Tools-System/Data-Import-Tool.md b/src/UserGuide/Master/Tools-System/Data-Import-Tool.md new file mode 100644 index 000000000..f03c3a5b1 --- /dev/null +++ b/src/UserGuide/Master/Tools-System/Data-Import-Tool.md @@ -0,0 +1,217 @@ +# Data Import + +## 1. IoTDB Data Import + +IoTDB currently supports importing data in CSV, SQL, and TsFile (IoTDB's underlying open-time series file format) into the database. The specific functionalities are as follows: + + + + + + + + + + + + + + + + + + + + + + + + + + +
File FormatIoTDB ToolDescription
CSVimport-data.sh/batCan be used for single or batch import of CSV files into IoTDB
SQLimport-data.sh/batCan be used for single or batch import of SQL files into IoTDB
TsFileload-tsfile.sh/batCan be used for single or batch import of TsFile files into IoTDB
TsFile Active Listening & Loading FeatureAccording to user configuration, it listens for changes in TsFile files in the specified path and loads newly added TsFile files into IoTDB
+ +## 2. import-data Scripts + +- Supported formats: CSV、SQL + +### 2.1 Command + +```Bash +# Unix/OS X +>tools/import-data.sh -h -p -u -pw -s [-fd <./failedDirectory> -aligned -batch -tp -typeInfer -lpf ] + +# Windows +>tools\import-data.bat -h -p -u -pw -s [-fd <./failedDirectory> -aligned -batch -tp -typeInfer -lpf ] +``` + +### 2.2 Parameter Introduction + + +| **Parameter** | **Definition** | **Required** | **Default** | +| --------- | ------------------------------------------------------------ | ------------ | ------------------------ | +| -h | Hostname | No | 127.0.0.1 | +| -p | Port | No | 6667 | +| -u | Username | No | root | +| -pw | Password | No | root | +| -s | Specify the data to be imported, here you can specify files or folders. If a folder is specified, all files with suffixes of csv or sql in the folder will be batch imported (In V1.3.2, the parameter is `-f`) | Yes | | +| -fd | Specify the directory for storing failed SQL files. If this parameter is not specified, failed files will be saved in the source data directory. Note: For unsupported SQL, illegal SQL, and failed SQL, they will be put into the failed directory under the failed file (default is the file name with `.failed` suffix) | No |The source filename with `.failed` suffix | +| -aligned | Specify whether to use the `aligned` interface, options are true or false. Note: This parameter is only effective when importing csv files. | No | false | +| -batch | Used to specify the number of data points per batch (minimum value is 1, maximum value is Integer.*MAX_VALUE*). If the program reports the error `org.apache.thrift.transport.TTransportException: Frame size larger than protect max size`, you can appropriately reduce this parameter. | No | 100000 | +| -tp | Specify the time precision, options include `ms` (milliseconds), `ns` (nanoseconds), `us` (microseconds) | No | ms | +| -lpf | Specify the number of data lines written per failed file (In V1.3.2, the parameter is `-linesPerFailedFile`) | No | 10000 | +| -typeInfer | Used to specify type inference rules, such as . Note: Used to specify type inference rules. `srcTsDataType` includes `boolean`, `int`, `long`, `float`, `double`, `NaN`. `dstTsDataType` includes `boolean`, `int`, `long`, `float`, `double`, `text`. When `srcTsDataType` is `boolean`, `dstTsDataType` can only be `boolean` or `text`. When `srcTsDataType` is `NaN`, `dstTsDataType` can only be `float`, `double`, or `text`. When `srcTsDataType` is a numerical type, the precision of `dstTsDataType` needs to be higher than `srcTsDataType`. For example: `-typeInfer boolean=text,float=double` | No | | + + +### 2.3 Running Example + + +- Import the `dump0_0.sql` data in the current `data` directory to the local IoTDB database. + +```Bash +# Unix/OS X +>tools/import-data.sh -s ./data/dump0_0.sql +# Windows +>tools/import-data.bat -s ./data/dump0_0.sql +``` + +- Import all data in the current `data` directory in an aligned manner to the local IoTDB database. + +```Bash +# Unix/OS X +>tools/import-data.sh -s ./data/ -fd ./failed/ -aligned true +# Windows +>tools/import-data.bat -s ./data/ -fd ./failed/ -aligned true +``` + +- Import the `dump0_0.csv` data in the current `data` directory to the local IoTDB database. + +```Bash +# Unix/OS X +>tools/import-data.sh -s ./data/dump0_0.csv -fd ./failed/ +# Windows +>tools/import-data.bat -s ./data/dump0_0.csv -fd ./failed/ +``` + +- Import the `dump0_0.csv` data in the current `data` directory in an aligned manner, batch import 100000 lines to the IoTDB database on the host with IP `192.168.100.1`, record failures in the current `failed` directory, with a maximum of 1000 lines per file. + + +```Bash +# Unix/OS X +>tools/import-data.sh -h 192.168.100.1 -p 6667 -u root -pw root -s ./data/dump0_0.csv -fd ./failed/ -aligned true -batch 100000 -tp ms -typeInfer boolean=text,float=double -lpf 1000 +# Windows +>tools/import-data.bat -h 192.168.100.1 -p 6667 -u root -pw root -s ./data/dump0_0.csv -fd ./failed/ -aligned true -batch 100000 -tp ms -typeInfer boolean=text,float=double -lpf 1000 +``` + + +## 3. load-tsfile Script + +- Supported formats: TsFile + +### 3.1 Command + +```Bash +# Unix/OS X +>tools/load-tsfile.sh -h -p -u -pw -s -os [-sd ] -of [-fd ] [-tn ] + +# Windows +>tools\load-tsfile.bat -h -p -u -pw -s -os [-sd ] -of [-fd ] [-tn ] +``` + +### 3.2 Parameter Introduction + + +| **Parameter** | **Description** | **Required** | **Default** | +| -------- | ------------------------------------------------------------ | ----------------------------------- | ------------------- | +| -h | Hostname | No | root | +| -p | Port | No | root | +| -u | Username | No | 127.0.0.1 | +| -pw | Password | No | 6667 | +| -s | The local directory path of the script file (folder) to be loaded | Yes | | +| -os | none: Do not delete
mv: Move successful files to the target folder
cp: Hard link (copy) successful files to the target folder
delete: Delete | Yes | | +| -sd | When --on_success is mv or cp, the target folder for mv or cp. The file name of the file becomes the folder flattened and then concatenated with the original file name. | When --on_success is mv or cp, it is required to fill in Yes | ${EXEC_DIR}/success | +| -of | none: Skip
mv: Move failed files to the target folder
cp: Hard link (copy) failed files to the target folder
delete: Delete | Yes | | +| -fd | When --on_fail is specified as mv or cp, the target folder for mv or cp. The file name of the file becomes the folder flattened and then concatenated with the original file name. | When --on_fail is specified as mv or cp, it is required to fill in | ${EXEC_DIR}/fail | +| -tn | Maximum number of parallel threads | Yes | 8 | + + + +### 3.3 Running Examples + + +```Bash +# Unix/OS X +> tools/load-tsfile.sh -h 127.0.0.1 -p 6667 -u root -pw root -s /path/sql -os delete -of delete -tn 8 +> tools/load-tsfile.sh -h 127.0.0.1 -p 6667 -u root -pw root -s /path/sql -os mv -of cp -sd /path/success/dir -fd /path/failure/dir -tn 8 + +# Windows +> tools/load_data.bat -h 127.0.0.1 -p 6667 -u root -pw root -s /path/sql -os mv -of cp -sd /path/success/dir -fd /path/failure/dir -tn 8 +> tools/load_data.bat -h 127.0.0.1 -p 6667 -u root -pw root -s /path/sql -os delete -of delete -tn 8 +``` + +## 4. TsFile Active Listening & Loading Feature + +The TsFile Active Listening & Loading Feature can actively monitor TsFile file changes in the specified target path (configured by the user) and automatically synchronize TsFile files from the target path to the specified reception path (configured by the user). Through this feature, IoTDB can automatically detect and load these files without the need for any additional manual loading operations. This automated process not only simplifies the user's operational steps but also reduces potential errors that may occur during the operation, effectively reducing the complexity for users during the usage process. + +![](https://alioss.timecho.com/docs/img/Data-import2.png) + + +### 4.1 Configuration Parameters + +You can enable the TsFile Active Listening & Loading Feature by finding the following parameters in the configuration file template `iotdb-system.properties.template` and adding them to the IoTDB configuration file `iotdb-system.properties`. The complete configuration is as follows: + + +| **Configuration Parameter** | **Description** | **Value Range** | **Required** | **Default Value** | **Loading Method** | +| -------------------------------------------- | ------------------------------------------------------------ | -------------------------- | ------------ | ---------------------- | ---------------- | +| load_active_listening_enable | Whether to enable the DataNode's active listening and loading of tsfile functionality (default is enabled). | Boolean: true,false | Optional | true | Hot Loading | +| load_active_listening_dirs | The directories to be listened to (automatically includes subdirectories of the directory), if there are multiple, separate with “,”. The default directory is ext/load/pending (supports hot loading). | String: one or more file directories | Optional | ext/load/pending | Hot Loading | +| load_active_listening_fail_dir | The directory to which files are transferred after the execution of loading tsfile files fails, only one directory can be configured. | String: one file directory | Optional | ext/load/failed | Hot Loading | +| load_active_listening_max_thread_num | The maximum number of threads to perform loading tsfile tasks simultaneously. The default value when the parameter is commented out is max(1, CPU core count / 2). When the user sets a value not in the range [1, CPU core count / 2], it will be set to the default value (1, CPU core count / 2). | Long: [1, Long.MAX_VALUE] | Optional | max(1, CPU core count / 2) | Effective after restart | +| load_active_listening_check_interval_seconds | Active listening polling interval in seconds. The function of actively listening to tsfile is achieved by polling the folder. This configuration specifies the time interval between two checks of load_active_listening_dirs, and the next check will be executed after load_active_listening_check_interval_seconds seconds of each check. When the user sets the polling interval to less than 1, it will be set to the default value of 5 seconds. | Long: [1, Long.MAX_VALUE] | Optional | 5 | Effective after restart | + + +### 4.2 Precautions + +1. If there is a mods file in the files to be loaded, the mods file should be moved to the listening directory first, and then the tsfile files should be moved, with the mods file and the corresponding tsfile file in the same directory. This prevents the loading of tsfile files without the corresponding mods files. + + +```SQL +FUNCTION moveFilesToListeningDirectory(sourceDirectory, listeningDirectory) + // Move mods files + modsFiles = searchFiles(sourceDirectory, "*mods*") + IF modsFiles IS NOT EMPTY + FOR EACH file IN modsFiles + MOVE(file, listeningDirectory) + END FOR + END IF + + // Move tsfile files + tsfileFiles = searchFiles(sourceDirectory, "*tsfile*") + IF tsfileFiles IS NOT EMPTY + FOR EACH file IN tsfileFiles + MOVE(file, listeningDirectory) + END FOR + END IF +END FUNCTION + +FUNCTION searchFiles(directory, pattern) + matchedFiles = [] + FOR EACH file IN directory.files + IF file.name MATCHES pattern + APPEND file TO matchedFiles + END IF + END FOR + RETURN matchedFiles +END FUNCTION + +FUNCTION MOVE(sourceFile, targetDirectory) + // Implement the logic of moving files from sourceFile to targetDirectory +END FUNCTION +``` + +2. Prohibit setting the receiver directory of Pipe, the data directory for storing data, etc., as the listening directory. + +3. Prohibit `load_active_listening_fail_dir` from having the same directory as `load_active_listening_dirs`, or each other's nesting. + +4. Ensure that the `load_active_listening_dirs` directory has sufficient permissions. After the load is successful, the files will be deleted. If there is no delete permission, it will lead to repeated loading. + diff --git a/src/UserGuide/latest/Reference/Common-Config-Manual.md b/src/UserGuide/latest/Reference/Common-Config-Manual.md index 97a9414fe..2f7fe3405 100644 --- a/src/UserGuide/latest/Reference/Common-Config-Manual.md +++ b/src/UserGuide/latest/Reference/Common-Config-Manual.md @@ -2162,3 +2162,52 @@ Different configuration parameters take effect in the following three ways: | Effective | hot-load | + + +#### TsFile Active Listening&Loading Function Configuration + +* load\_active\_listening\_enable + +|Name| load\_active\_listening\_enable | +|:---:|:---| +|Description| Whether to enable the DataNode's active listening and loading of tsfile functionality (default is enabled). | +|Type| Boolean | +|Default| true | +|Effective| hot-load | + +* load\_active\_listening\_dirs + +|Name| load\_active\_listening\_dirs | +|:---:|:---| +|Description| The directories to be listened to (automatically includes subdirectories of the directory), if there are multiple, separate with “,”. The default directory is ext/load/pending (supports hot loading). | +|Type| String | +|Default| ext/load/pending | +|Effective|hot-load| + +* load\_active\_listening\_fail\_dir + +|Name| load\_active\_listening\_fail\_dir | +|:---:|:---| +|Description| The directory to which files are transferred after the execution of loading tsfile files fails, only one directory can be configured. | +|Type| String | +|Default| ext/load/failed | +|Effective|hot-load| + +* load\_active\_listening\_max\_thread\_num + +|Name| load\_active\_listening\_max\_thread\_num | +|:---:|:---| +|Description| The maximum number of threads to perform loading tsfile tasks simultaneously. The default value when the parameter is commented out is max(1, CPU core count / 2). When the user sets a value not in the range [1, CPU core count / 2], it will be set to the default value (1, CPU core count / 2). | +|Type| Long | +|Default| max(1, CPU core count / 2) | +|Effective|Effective after restart| + + +* load\_active\_listening\_check\_interval\_seconds + +|Name| load\_active\_listening\_check\_interval\_seconds | +|:---:|:---| +|Description| Active listening polling interval in seconds. The function of actively listening to tsfile is achieved by polling the folder. This configuration specifies the time interval between two checks of load_active_listening_dirs, and the next check will be executed after load_active_listening_check_interval_seconds seconds of each check. When the user sets the polling interval to less than 1, it will be set to the default value of 5 seconds. | +|Type| Long | +|Default| 5| +|Effective|Effective after restart| \ No newline at end of file diff --git a/src/UserGuide/latest/Tools-System/Data-Import-Tool.md b/src/UserGuide/latest/Tools-System/Data-Import-Tool.md new file mode 100644 index 000000000..f03c3a5b1 --- /dev/null +++ b/src/UserGuide/latest/Tools-System/Data-Import-Tool.md @@ -0,0 +1,217 @@ +# Data Import + +## 1. IoTDB Data Import + +IoTDB currently supports importing data in CSV, SQL, and TsFile (IoTDB's underlying open-time series file format) into the database. The specific functionalities are as follows: + + + + + + + + + + + + + + + + + + + + + + + + + + +
File FormatIoTDB ToolDescription
CSVimport-data.sh/batCan be used for single or batch import of CSV files into IoTDB
SQLimport-data.sh/batCan be used for single or batch import of SQL files into IoTDB
TsFileload-tsfile.sh/batCan be used for single or batch import of TsFile files into IoTDB
TsFile Active Listening & Loading FeatureAccording to user configuration, it listens for changes in TsFile files in the specified path and loads newly added TsFile files into IoTDB
+ +## 2. import-data Scripts + +- Supported formats: CSV、SQL + +### 2.1 Command + +```Bash +# Unix/OS X +>tools/import-data.sh -h -p -u -pw -s [-fd <./failedDirectory> -aligned -batch -tp -typeInfer -lpf ] + +# Windows +>tools\import-data.bat -h -p -u -pw -s [-fd <./failedDirectory> -aligned -batch -tp -typeInfer -lpf ] +``` + +### 2.2 Parameter Introduction + + +| **Parameter** | **Definition** | **Required** | **Default** | +| --------- | ------------------------------------------------------------ | ------------ | ------------------------ | +| -h | Hostname | No | 127.0.0.1 | +| -p | Port | No | 6667 | +| -u | Username | No | root | +| -pw | Password | No | root | +| -s | Specify the data to be imported, here you can specify files or folders. If a folder is specified, all files with suffixes of csv or sql in the folder will be batch imported (In V1.3.2, the parameter is `-f`) | Yes | | +| -fd | Specify the directory for storing failed SQL files. If this parameter is not specified, failed files will be saved in the source data directory. Note: For unsupported SQL, illegal SQL, and failed SQL, they will be put into the failed directory under the failed file (default is the file name with `.failed` suffix) | No |The source filename with `.failed` suffix | +| -aligned | Specify whether to use the `aligned` interface, options are true or false. Note: This parameter is only effective when importing csv files. | No | false | +| -batch | Used to specify the number of data points per batch (minimum value is 1, maximum value is Integer.*MAX_VALUE*). If the program reports the error `org.apache.thrift.transport.TTransportException: Frame size larger than protect max size`, you can appropriately reduce this parameter. | No | 100000 | +| -tp | Specify the time precision, options include `ms` (milliseconds), `ns` (nanoseconds), `us` (microseconds) | No | ms | +| -lpf | Specify the number of data lines written per failed file (In V1.3.2, the parameter is `-linesPerFailedFile`) | No | 10000 | +| -typeInfer | Used to specify type inference rules, such as . Note: Used to specify type inference rules. `srcTsDataType` includes `boolean`, `int`, `long`, `float`, `double`, `NaN`. `dstTsDataType` includes `boolean`, `int`, `long`, `float`, `double`, `text`. When `srcTsDataType` is `boolean`, `dstTsDataType` can only be `boolean` or `text`. When `srcTsDataType` is `NaN`, `dstTsDataType` can only be `float`, `double`, or `text`. When `srcTsDataType` is a numerical type, the precision of `dstTsDataType` needs to be higher than `srcTsDataType`. For example: `-typeInfer boolean=text,float=double` | No | | + + +### 2.3 Running Example + + +- Import the `dump0_0.sql` data in the current `data` directory to the local IoTDB database. + +```Bash +# Unix/OS X +>tools/import-data.sh -s ./data/dump0_0.sql +# Windows +>tools/import-data.bat -s ./data/dump0_0.sql +``` + +- Import all data in the current `data` directory in an aligned manner to the local IoTDB database. + +```Bash +# Unix/OS X +>tools/import-data.sh -s ./data/ -fd ./failed/ -aligned true +# Windows +>tools/import-data.bat -s ./data/ -fd ./failed/ -aligned true +``` + +- Import the `dump0_0.csv` data in the current `data` directory to the local IoTDB database. + +```Bash +# Unix/OS X +>tools/import-data.sh -s ./data/dump0_0.csv -fd ./failed/ +# Windows +>tools/import-data.bat -s ./data/dump0_0.csv -fd ./failed/ +``` + +- Import the `dump0_0.csv` data in the current `data` directory in an aligned manner, batch import 100000 lines to the IoTDB database on the host with IP `192.168.100.1`, record failures in the current `failed` directory, with a maximum of 1000 lines per file. + + +```Bash +# Unix/OS X +>tools/import-data.sh -h 192.168.100.1 -p 6667 -u root -pw root -s ./data/dump0_0.csv -fd ./failed/ -aligned true -batch 100000 -tp ms -typeInfer boolean=text,float=double -lpf 1000 +# Windows +>tools/import-data.bat -h 192.168.100.1 -p 6667 -u root -pw root -s ./data/dump0_0.csv -fd ./failed/ -aligned true -batch 100000 -tp ms -typeInfer boolean=text,float=double -lpf 1000 +``` + + +## 3. load-tsfile Script + +- Supported formats: TsFile + +### 3.1 Command + +```Bash +# Unix/OS X +>tools/load-tsfile.sh -h -p -u -pw -s -os [-sd ] -of [-fd ] [-tn ] + +# Windows +>tools\load-tsfile.bat -h -p -u -pw -s -os [-sd ] -of [-fd ] [-tn ] +``` + +### 3.2 Parameter Introduction + + +| **Parameter** | **Description** | **Required** | **Default** | +| -------- | ------------------------------------------------------------ | ----------------------------------- | ------------------- | +| -h | Hostname | No | root | +| -p | Port | No | root | +| -u | Username | No | 127.0.0.1 | +| -pw | Password | No | 6667 | +| -s | The local directory path of the script file (folder) to be loaded | Yes | | +| -os | none: Do not delete
mv: Move successful files to the target folder
cp: Hard link (copy) successful files to the target folder
delete: Delete | Yes | | +| -sd | When --on_success is mv or cp, the target folder for mv or cp. The file name of the file becomes the folder flattened and then concatenated with the original file name. | When --on_success is mv or cp, it is required to fill in Yes | ${EXEC_DIR}/success | +| -of | none: Skip
mv: Move failed files to the target folder
cp: Hard link (copy) failed files to the target folder
delete: Delete | Yes | | +| -fd | When --on_fail is specified as mv or cp, the target folder for mv or cp. The file name of the file becomes the folder flattened and then concatenated with the original file name. | When --on_fail is specified as mv or cp, it is required to fill in | ${EXEC_DIR}/fail | +| -tn | Maximum number of parallel threads | Yes | 8 | + + + +### 3.3 Running Examples + + +```Bash +# Unix/OS X +> tools/load-tsfile.sh -h 127.0.0.1 -p 6667 -u root -pw root -s /path/sql -os delete -of delete -tn 8 +> tools/load-tsfile.sh -h 127.0.0.1 -p 6667 -u root -pw root -s /path/sql -os mv -of cp -sd /path/success/dir -fd /path/failure/dir -tn 8 + +# Windows +> tools/load_data.bat -h 127.0.0.1 -p 6667 -u root -pw root -s /path/sql -os mv -of cp -sd /path/success/dir -fd /path/failure/dir -tn 8 +> tools/load_data.bat -h 127.0.0.1 -p 6667 -u root -pw root -s /path/sql -os delete -of delete -tn 8 +``` + +## 4. TsFile Active Listening & Loading Feature + +The TsFile Active Listening & Loading Feature can actively monitor TsFile file changes in the specified target path (configured by the user) and automatically synchronize TsFile files from the target path to the specified reception path (configured by the user). Through this feature, IoTDB can automatically detect and load these files without the need for any additional manual loading operations. This automated process not only simplifies the user's operational steps but also reduces potential errors that may occur during the operation, effectively reducing the complexity for users during the usage process. + +![](https://alioss.timecho.com/docs/img/Data-import2.png) + + +### 4.1 Configuration Parameters + +You can enable the TsFile Active Listening & Loading Feature by finding the following parameters in the configuration file template `iotdb-system.properties.template` and adding them to the IoTDB configuration file `iotdb-system.properties`. The complete configuration is as follows: + + +| **Configuration Parameter** | **Description** | **Value Range** | **Required** | **Default Value** | **Loading Method** | +| -------------------------------------------- | ------------------------------------------------------------ | -------------------------- | ------------ | ---------------------- | ---------------- | +| load_active_listening_enable | Whether to enable the DataNode's active listening and loading of tsfile functionality (default is enabled). | Boolean: true,false | Optional | true | Hot Loading | +| load_active_listening_dirs | The directories to be listened to (automatically includes subdirectories of the directory), if there are multiple, separate with “,”. The default directory is ext/load/pending (supports hot loading). | String: one or more file directories | Optional | ext/load/pending | Hot Loading | +| load_active_listening_fail_dir | The directory to which files are transferred after the execution of loading tsfile files fails, only one directory can be configured. | String: one file directory | Optional | ext/load/failed | Hot Loading | +| load_active_listening_max_thread_num | The maximum number of threads to perform loading tsfile tasks simultaneously. The default value when the parameter is commented out is max(1, CPU core count / 2). When the user sets a value not in the range [1, CPU core count / 2], it will be set to the default value (1, CPU core count / 2). | Long: [1, Long.MAX_VALUE] | Optional | max(1, CPU core count / 2) | Effective after restart | +| load_active_listening_check_interval_seconds | Active listening polling interval in seconds. The function of actively listening to tsfile is achieved by polling the folder. This configuration specifies the time interval between two checks of load_active_listening_dirs, and the next check will be executed after load_active_listening_check_interval_seconds seconds of each check. When the user sets the polling interval to less than 1, it will be set to the default value of 5 seconds. | Long: [1, Long.MAX_VALUE] | Optional | 5 | Effective after restart | + + +### 4.2 Precautions + +1. If there is a mods file in the files to be loaded, the mods file should be moved to the listening directory first, and then the tsfile files should be moved, with the mods file and the corresponding tsfile file in the same directory. This prevents the loading of tsfile files without the corresponding mods files. + + +```SQL +FUNCTION moveFilesToListeningDirectory(sourceDirectory, listeningDirectory) + // Move mods files + modsFiles = searchFiles(sourceDirectory, "*mods*") + IF modsFiles IS NOT EMPTY + FOR EACH file IN modsFiles + MOVE(file, listeningDirectory) + END FOR + END IF + + // Move tsfile files + tsfileFiles = searchFiles(sourceDirectory, "*tsfile*") + IF tsfileFiles IS NOT EMPTY + FOR EACH file IN tsfileFiles + MOVE(file, listeningDirectory) + END FOR + END IF +END FUNCTION + +FUNCTION searchFiles(directory, pattern) + matchedFiles = [] + FOR EACH file IN directory.files + IF file.name MATCHES pattern + APPEND file TO matchedFiles + END IF + END FOR + RETURN matchedFiles +END FUNCTION + +FUNCTION MOVE(sourceFile, targetDirectory) + // Implement the logic of moving files from sourceFile to targetDirectory +END FUNCTION +``` + +2. Prohibit setting the receiver directory of Pipe, the data directory for storing data, etc., as the listening directory. + +3. Prohibit `load_active_listening_fail_dir` from having the same directory as `load_active_listening_dirs`, or each other's nesting. + +4. Ensure that the `load_active_listening_dirs` directory has sufficient permissions. After the load is successful, the files will be deleted. If there is no delete permission, it will lead to repeated loading. + diff --git a/src/zh/UserGuide/Master/Reference/Common-Config-Manual.md b/src/zh/UserGuide/Master/Reference/Common-Config-Manual.md index fd76cb4e3..8f84741f9 100644 --- a/src/zh/UserGuide/Master/Reference/Common-Config-Manual.md +++ b/src/zh/UserGuide/Master/Reference/Common-Config-Manual.md @@ -2160,3 +2160,51 @@ IoTDB ConfigNode 和 DataNode 的公共配置参数位于 `conf` 目录下。 |类型| int32 | |默认值| 1048576 | |改后生效方式|热加载| + +#### TsFile 主动监听&加载功能配置 + +* load\_active\_listening\_enable + +|名字| load\_active\_listening\_enable | +|:---:|:---| +|描述| 是否开启 DataNode 主动监听并且加载 tsfile 的功能(默认开启)。 | +|类型| Boolean | +|默认值| true | +|改后生效方式|热加载| + +* load\_active\_listening\_dirs + +|名字| load\_active\_listening\_dirs | +|:---:|:---| +|描述| 需要监听的目录(自动包括目录中的子目录),如有多个使用 “,“ 隔开默认的目录为 ext/load/pending(支持热装载)。 | +|类型| String | +|默认值| ext/load/pending | +|改后生效方式|热加载| + +* load\_active\_listening\_fail\_dir + +|名字| load\_active\_listening\_fail\_dir | +|:---:|:---| +|描述| 执行加载 tsfile 文件失败后将文件转存的目录,只能配置一个。 | +|类型| String | +|默认值| ext/load/failed | +|改后生效方式|热加载| + +* load\_active\_listening\_max\_thread\_num + +|名字| load\_active\_listening\_max\_thread\_num | +|:---:|:---| +|描述| 同时执行加载 tsfile 任务的最大线程数,参数被注释掉时的默值为 max(1, CPU 核心数 / 2),当用户设置的值不在这个区间[1, CPU核心数 /2]内时,会设置为默认值 (1, CPU 核心数 / 2)。 | +|类型| Long | +|默认值| max(1, CPU 核心数 / 2) | +|改后生效方式|重启后生效| + + +* load\_active\_listening\_check\_interval\_seconds + +|名字| load\_active\_listening\_check\_interval\_seconds | +|:---:|:---| +|描述| 主动监听轮询间隔,单位秒。主动监听 tsfile 的功能是通过轮询检查文件夹实现的。该配置指定了两次检查 load_active_listening_dirs 的时间间隔,每次检查完成 load_active_listening_check_interval_seconds 秒后,会执行下一次检查。当用户设置的轮询间隔小于 1 时,会被设置为默认值 5 秒。 | +|类型| Long | +|默认值| 5| +|改后生效方式|重启后生效| \ No newline at end of file diff --git a/src/zh/UserGuide/Master/Tools-System/Data-Import-Tool.md b/src/zh/UserGuide/Master/Tools-System/Data-Import-Tool.md new file mode 100644 index 000000000..08659116a --- /dev/null +++ b/src/zh/UserGuide/Master/Tools-System/Data-Import-Tool.md @@ -0,0 +1,206 @@ +# 数据导入 + +## 1. IoTDB 数据导入 + +IoTDB 目前支持将 CSV、SQL、及TsFile(IoTDB底层开放式时间序列文件格式)格式的数据导入数据库。具体功能如下: + + + + + + + + + + + + + + + + + + + + + + + + + + +
文件格式IoTDB工具具体介绍
CSVimport-data.sh/bat可用于单个或一个目录的 CSV 文件批量导入 IoTDB
SQLimport-data.sh/bat可用于单个或一个目录的 SQL 文件批量导入 IoTDB
TsFileload-tsfile.sh/bat可用于单个或一个目录的 TsFile 文件批量导入 IoTDB
TsFile 主动监听&加载功能根据用户配置,监听指定路径下TsFile文件的变化,将新增加的TsFile文件加载入IoTDB
+ +## 2. import-data 脚本 + +- 支持格式:CSV、SQL + +### 2.1 运行命令: + +```Bash +# Unix/OS X +>tools/import-data.sh -h -p -u -pw -s [-fd <./failedDirectory> -aligned -batch -tp -typeInfer -lpf ] + +# Windows +>tools\import-data.bat -h -p -u -pw -s [-fd <./failedDirectory> -aligned -batch -tp -typeInfer -lpf ] +``` + +### 2.2 参数介绍: + +| **参数** | **定义** | **是否必填** | **默认** | +| ---------- | ------------------------------------------------------------ | ------------ | ------------------------- | +| -h | 数据库IP地址 | 否 | 127.0.0.1 | +| -p | 数据库端口 | 否 | 6667 | +| -u | 数据库连接用户名 | 否 | root | +| -pw | 数据库连接密码 | 否 | root | +| -s | 指定想要导入的数据,这里可以指定文件或者文件夹。如果指定的是文件夹,将会把文件夹中所有的后缀为 csv 或者 sql 的文件进行批量导入(V1.3.2版本参数是`-f`) | 是 | | +| -fd | 指定存放失败 SQL 文件的目录,如果未指定这个参数,失败的文件将会被保存到源数据的目录中。 说明:对于不支持的 SQL ,不合法的 SQL ,执行失败的 SQL 都会放到失败目录下的失败文件里(默认为 文件名.failed) | 否 | 源文件名加上`.failed`后缀 | +| -aligned | 指定是否使用`aligned`接口,选项为 true 或者 false 说明:这个参数只在导入文件为csv文件时生效 | 否 | false | +| -batch | 用于指定每一批插入的数据的点数(最小值为1,最大值为 Integer.*MAX_VALUE*)。如果程序报了`org.apache.thrift.transport.TTransportException: Frame size larger than protect max size`这个错的话,就可以适当的调低这个参数。 | 否 | 100000 | +| -tp | 指定时间精度,可选值包括`ms`(毫秒),`ns`(纳秒),`us`(微秒) | 否 | ms | +| -lpf | 指定每个导入失败文件写入数据的行数(V1.3.2版本参数是`-linesPerFailedFile`) | 否 | 10000 | +| -typeInfer | 用于指定类型推断规则,如。 说明:用于指定类型推断规则.`srcTsDataType` 包括 `boolean`,`int`,`long`,`float`,`double`,`NaN`.`dstTsDataType` 包括 `boolean`,`int`,`long`,`float`,`double`,`text`.当`srcTsDataType`为`boolean`, `dstTsDataType`只能为`boolean`或`text`.当`srcTsDataType`为`NaN`, `dstTsDataType`只能为`float`, `double`或`text`.当`srcTsDataType`为数值类型, `dstTsDataType`的精度需要高于`srcTsDataType`.例如:`-typeInfer boolean=text,float=double` | 否 | | + +### 2.3 运行示例: + +- 导入当前`data`目录下的`dump0_0.sql`数据到本机 IoTDB 数据库中。 + +```Bash +# Unix/OS X +>tools/import-data.sh -s ./data/dump0_0.sql +# Windows +>tools/import-data.bat -s ./data/dump0_0.sql +``` + +- 将当前`data`目录下的所有数据以对齐的方式导入到本机 IoTDB 数据库中。 + +```Bash +# Unix/OS X +>tools/import-data.sh -s ./data/ -fd ./failed/ -aligned true +# Windows +>tools/import-data.bat -s ./data/ -fd ./failed/ -aligned true +``` + +- 导入当前`data`目录下的`dump0_0.csv`数据到本机 IoTDB 数据库中。 + +```Bash +# Unix/OS X +>tools/import-data.sh -s ./data/dump0_0.csv -fd ./failed/ +# Windows +>tools/import-data.bat -s ./data/dump0_0.csv -fd ./failed/ +``` + +- 将当前`data`目录下的`dump0_0.csv`数据以对齐的方式,一批导入100000条导入到`192.168.100.1`IP所在主机的 IoTDB 数据库中,失败的记录记在当前`failed`目录下,每个文件最多记1000条。 + +```Bash +# Unix/OS X +>tools/import-data.sh -h 192.168.100.1 -p 6667 -u root -pw root -s ./data/dump0_0.csv -fd ./failed/ -aligned true -batch 100000 -tp ms -typeInfer boolean=text,float=double -lpf 1000 +# Windows +>tools/import-data.bat -h 192.168.100.1 -p 6667 -u root -pw root -s ./data/dump0_0.csv -fd ./failed/ -aligned true -batch 100000 -tp ms -typeInfer boolean=text,float=double -lpf 1000 +``` + +## 3. load-tsfile 脚本 + +- 支持格式:TsFile + +### 3.1 运行命令 + +```Bash +# Unix/OS X +>tools/load-tsfile.sh -h -p -u -pw -s -os [-sd ] -of [-fd ] [-tn ] + +# Windows +>tools\load-tsfile.bat -h -p -u -pw -s -os [-sd ] -of [-fd ] [-tn ] +``` + +### 3.2 参数介绍 + +| **参数** | **定义** | **是否必填** | **默认** | +| -------- | ------------------------------------------------------------ | ----------------------------------- | ------------------- | +| -h | 主机名 | 否 | root | +| -p | 端口号 | 否 | root | +| -u | 用户名 | 否 | 127.0.0.1 | +| -pw | 密码 | 否 | 6667 | +| -s | 待加载的脚本文件(夹)的本地目录路径 | 是 | | +| -os | none:不删除
mv:移动成功的文件到目标文件夹
cp:硬连接(拷贝)成功的文件到目标文件夹
delete:删除 | 是 | | +| -sd | 当--on_succcess为mv或cp时,mv或cp的目标文件夹。文件的文件名变为文件夹打平后拼接原有文件名 | 当--on_succcess为mv或cp时需要填写是 | ${EXEC_DIR}/success | +| -of | none:跳过
mv:移动失败的文件到目标文件夹
cp:硬连接(拷贝)失败的文件到目标文件夹
delete:删除 | 是 | | +| -fd | 当--on_fail指定为mv或cp时,mv或cp的目标文件夹。文件的文件名变为文件夹打平后拼接原有文件名 | 当--on_fail指定为mv或cp时需要填写 | ${EXEC_DIR}/fail | +| -tn | 最大并行线程数 | 是 | 8 | + +### 3.3 运行示例: + +```Bash +# Unix/OS X +> tools/load-tsfile.sh -h 127.0.0.1 -p 6667 -u root -pw root -s /path/sql -os delete -of delete -tn 8 +> tools/load-tsfile.sh -h 127.0.0.1 -p 6667 -u root -pw root -s /path/sql -os mv -of cp -sd /path/success/dir -fd /path/failure/dir -tn 8 + +# Windows +> tools/load_data.bat -h 127.0.0.1 -p 6667 -u root -pw root -s /path/sql -os mv -of cp -sd /path/success/dir -fd /path/failure/dir -tn 8 +> tools/load_data.bat -h 127.0.0.1 -p 6667 -u root -pw root -s /path/sql -os delete -of delete -tn 8 +``` + +## 4. TsFile 主动监听&加载功能 + +TsFile 主动监听&加载功能能够主动监听指定目标路径(用户配置)下TsFile的文件变化,并将目标路径下的TsFile文件自动同步至指定接收路径(用户配置)。通过此功能,IoTDB 能自动检测并加载这些文件,无需手动执行任何额外的加载操作。这种自动化流程不仅简化了用户的操作步骤,还减少了操作过程中可能出现的错误,有效降低了用户在使用过程中的复杂性。 + +![](https://alioss.timecho.com/docs/img/Data-import1.png) + + +### 4.1 配置参数 + +可通过从配置文件模版 `iotdb-system.properties.template` 中找到下列参数,添加到 IoTDB 配置文件 `iotdb-system.properties` 中开启TsFile 主动监听&加载功能。完整配置如下:: + + +| **配置参数** | **参数说明** | **value 取值范围** | **是否必填** | **默认值** | **加载方式** | +| -------------------------------------------- | ------------------------------------------------------------ | -------------------------- | ------------ | ---------------------- | ---------------- | +| load_active_listening_enable | 是否开启 DataNode 主动监听并且加载 tsfile 的功能(默认开启)。 | Boolean: true,false | 选填 | true | 热加载 | +| load_active_listening_dirs | 需要监听的目录(自动包括目录中的子目录),如有多个使用 “,“ 隔开默认的目录为 ext/load/pending(支持热装载) | String: 一个或多个文件目录 | 选填 | ext/load/pending | 热加载 | +| load_active_listening_fail_dir | 执行加载 tsfile 文件失败后将文件转存的目录,只能配置一个 | String: 一个文件目录 | 选填 | ext/load/failed | 热加载 | +| load_active_listening_max_thread_num | 同时执行加载 tsfile 任务的最大线程数,参数被注释掉时的默值为 max(1, CPU 核心数 / 2),当用户设置的值不在这个区间[1, CPU核心数 /2]内时,会设置为默认值 (1, CPU 核心数 / 2) | Long: [1, Long.MAX_VALUE] | 选填 | max(1, CPU 核心数 / 2) | 重启后生效 | +| load_active_listening_check_interval_seconds | 主动监听轮询间隔,单位秒。主动监听 tsfile 的功能是通过轮询检查文件夹实现的。该配置指定了两次检查 load_active_listening_dirs 的时间间隔,每次检查完成 load_active_listening_check_interval_seconds 秒后,会执行下一次检查。当用户设置的轮询间隔小于 1 时,会被设置为默认值 5 秒 | Long: [1, Long.MAX_VALUE] | 选填 | 5 | 重启后生效 | + +### 4.2 注意事项 + +1. 如果待加载的文件中,存在 mods 文件,应优先将 mods 文件移动到监听目录下面,然后再移动 tsfile 文件,且 mods 文件应和对应的 tsfile 文件处于同一目录。防止加载到 tsfile 文件时,加载不到对应的 mods 文件 + +```SQL +FUNCTION moveFilesToListeningDirectory(sourceDirectory, listeningDirectory) + // 移动 mods 文件 + modsFiles = searchFiles(sourceDirectory, "*mods*") + IF modsFiles IS NOT EMPTY + FOR EACH file IN modsFiles + MOVE(file, listeningDirectory) + END FOR + END IF + + // 移动 tsfile 文件 + tsfileFiles = searchFiles(sourceDirectory, "*tsfile*") + IF tsfileFiles IS NOT EMPTY + FOR EACH file IN tsfileFiles + MOVE(file, listeningDirectory) + END FOR + END IF +END FUNCTION + +FUNCTION searchFiles(directory, pattern) + matchedFiles = [] + FOR EACH file IN directory.files + IF file.name MATCHES pattern + APPEND file TO matchedFiles + END IF + END FOR + RETURN matchedFiles +END FUNCTION + +FUNCTION MOVE(sourceFile, targetDirectory) + // 实现文件从 sourceFile 移动到 targetDirectory 的逻辑 +END FUNCTION +``` + +2. 禁止设置 Pipe 的 receiver 目录、存放数据的 data 目录等作为监听目录 + +3. 禁止 `load_active_listening_fail_dir` 与 `load_active_listening_dirs` 存在相同的目录,或者互相嵌套 + +4. 保证 `load_active_listening_dirs` 目录有足够的权限,在加载成功之后,文件将会被删除,如果没有删除权限,则会重复加载 + diff --git a/src/zh/UserGuide/latest/Reference/Common-Config-Manual.md b/src/zh/UserGuide/latest/Reference/Common-Config-Manual.md index fd76cb4e3..8f84741f9 100644 --- a/src/zh/UserGuide/latest/Reference/Common-Config-Manual.md +++ b/src/zh/UserGuide/latest/Reference/Common-Config-Manual.md @@ -2160,3 +2160,51 @@ IoTDB ConfigNode 和 DataNode 的公共配置参数位于 `conf` 目录下。 |类型| int32 | |默认值| 1048576 | |改后生效方式|热加载| + +#### TsFile 主动监听&加载功能配置 + +* load\_active\_listening\_enable + +|名字| load\_active\_listening\_enable | +|:---:|:---| +|描述| 是否开启 DataNode 主动监听并且加载 tsfile 的功能(默认开启)。 | +|类型| Boolean | +|默认值| true | +|改后生效方式|热加载| + +* load\_active\_listening\_dirs + +|名字| load\_active\_listening\_dirs | +|:---:|:---| +|描述| 需要监听的目录(自动包括目录中的子目录),如有多个使用 “,“ 隔开默认的目录为 ext/load/pending(支持热装载)。 | +|类型| String | +|默认值| ext/load/pending | +|改后生效方式|热加载| + +* load\_active\_listening\_fail\_dir + +|名字| load\_active\_listening\_fail\_dir | +|:---:|:---| +|描述| 执行加载 tsfile 文件失败后将文件转存的目录,只能配置一个。 | +|类型| String | +|默认值| ext/load/failed | +|改后生效方式|热加载| + +* load\_active\_listening\_max\_thread\_num + +|名字| load\_active\_listening\_max\_thread\_num | +|:---:|:---| +|描述| 同时执行加载 tsfile 任务的最大线程数,参数被注释掉时的默值为 max(1, CPU 核心数 / 2),当用户设置的值不在这个区间[1, CPU核心数 /2]内时,会设置为默认值 (1, CPU 核心数 / 2)。 | +|类型| Long | +|默认值| max(1, CPU 核心数 / 2) | +|改后生效方式|重启后生效| + + +* load\_active\_listening\_check\_interval\_seconds + +|名字| load\_active\_listening\_check\_interval\_seconds | +|:---:|:---| +|描述| 主动监听轮询间隔,单位秒。主动监听 tsfile 的功能是通过轮询检查文件夹实现的。该配置指定了两次检查 load_active_listening_dirs 的时间间隔,每次检查完成 load_active_listening_check_interval_seconds 秒后,会执行下一次检查。当用户设置的轮询间隔小于 1 时,会被设置为默认值 5 秒。 | +|类型| Long | +|默认值| 5| +|改后生效方式|重启后生效| \ No newline at end of file diff --git a/src/zh/UserGuide/latest/Tools-System/Data-Import-Tool.md b/src/zh/UserGuide/latest/Tools-System/Data-Import-Tool.md new file mode 100644 index 000000000..08659116a --- /dev/null +++ b/src/zh/UserGuide/latest/Tools-System/Data-Import-Tool.md @@ -0,0 +1,206 @@ +# 数据导入 + +## 1. IoTDB 数据导入 + +IoTDB 目前支持将 CSV、SQL、及TsFile(IoTDB底层开放式时间序列文件格式)格式的数据导入数据库。具体功能如下: + + + + + + + + + + + + + + + + + + + + + + + + + + +
文件格式IoTDB工具具体介绍
CSVimport-data.sh/bat可用于单个或一个目录的 CSV 文件批量导入 IoTDB
SQLimport-data.sh/bat可用于单个或一个目录的 SQL 文件批量导入 IoTDB
TsFileload-tsfile.sh/bat可用于单个或一个目录的 TsFile 文件批量导入 IoTDB
TsFile 主动监听&加载功能根据用户配置,监听指定路径下TsFile文件的变化,将新增加的TsFile文件加载入IoTDB
+ +## 2. import-data 脚本 + +- 支持格式:CSV、SQL + +### 2.1 运行命令: + +```Bash +# Unix/OS X +>tools/import-data.sh -h -p -u -pw -s [-fd <./failedDirectory> -aligned -batch -tp -typeInfer -lpf ] + +# Windows +>tools\import-data.bat -h -p -u -pw -s [-fd <./failedDirectory> -aligned -batch -tp -typeInfer -lpf ] +``` + +### 2.2 参数介绍: + +| **参数** | **定义** | **是否必填** | **默认** | +| ---------- | ------------------------------------------------------------ | ------------ | ------------------------- | +| -h | 数据库IP地址 | 否 | 127.0.0.1 | +| -p | 数据库端口 | 否 | 6667 | +| -u | 数据库连接用户名 | 否 | root | +| -pw | 数据库连接密码 | 否 | root | +| -s | 指定想要导入的数据,这里可以指定文件或者文件夹。如果指定的是文件夹,将会把文件夹中所有的后缀为 csv 或者 sql 的文件进行批量导入(V1.3.2版本参数是`-f`) | 是 | | +| -fd | 指定存放失败 SQL 文件的目录,如果未指定这个参数,失败的文件将会被保存到源数据的目录中。 说明:对于不支持的 SQL ,不合法的 SQL ,执行失败的 SQL 都会放到失败目录下的失败文件里(默认为 文件名.failed) | 否 | 源文件名加上`.failed`后缀 | +| -aligned | 指定是否使用`aligned`接口,选项为 true 或者 false 说明:这个参数只在导入文件为csv文件时生效 | 否 | false | +| -batch | 用于指定每一批插入的数据的点数(最小值为1,最大值为 Integer.*MAX_VALUE*)。如果程序报了`org.apache.thrift.transport.TTransportException: Frame size larger than protect max size`这个错的话,就可以适当的调低这个参数。 | 否 | 100000 | +| -tp | 指定时间精度,可选值包括`ms`(毫秒),`ns`(纳秒),`us`(微秒) | 否 | ms | +| -lpf | 指定每个导入失败文件写入数据的行数(V1.3.2版本参数是`-linesPerFailedFile`) | 否 | 10000 | +| -typeInfer | 用于指定类型推断规则,如。 说明:用于指定类型推断规则.`srcTsDataType` 包括 `boolean`,`int`,`long`,`float`,`double`,`NaN`.`dstTsDataType` 包括 `boolean`,`int`,`long`,`float`,`double`,`text`.当`srcTsDataType`为`boolean`, `dstTsDataType`只能为`boolean`或`text`.当`srcTsDataType`为`NaN`, `dstTsDataType`只能为`float`, `double`或`text`.当`srcTsDataType`为数值类型, `dstTsDataType`的精度需要高于`srcTsDataType`.例如:`-typeInfer boolean=text,float=double` | 否 | | + +### 2.3 运行示例: + +- 导入当前`data`目录下的`dump0_0.sql`数据到本机 IoTDB 数据库中。 + +```Bash +# Unix/OS X +>tools/import-data.sh -s ./data/dump0_0.sql +# Windows +>tools/import-data.bat -s ./data/dump0_0.sql +``` + +- 将当前`data`目录下的所有数据以对齐的方式导入到本机 IoTDB 数据库中。 + +```Bash +# Unix/OS X +>tools/import-data.sh -s ./data/ -fd ./failed/ -aligned true +# Windows +>tools/import-data.bat -s ./data/ -fd ./failed/ -aligned true +``` + +- 导入当前`data`目录下的`dump0_0.csv`数据到本机 IoTDB 数据库中。 + +```Bash +# Unix/OS X +>tools/import-data.sh -s ./data/dump0_0.csv -fd ./failed/ +# Windows +>tools/import-data.bat -s ./data/dump0_0.csv -fd ./failed/ +``` + +- 将当前`data`目录下的`dump0_0.csv`数据以对齐的方式,一批导入100000条导入到`192.168.100.1`IP所在主机的 IoTDB 数据库中,失败的记录记在当前`failed`目录下,每个文件最多记1000条。 + +```Bash +# Unix/OS X +>tools/import-data.sh -h 192.168.100.1 -p 6667 -u root -pw root -s ./data/dump0_0.csv -fd ./failed/ -aligned true -batch 100000 -tp ms -typeInfer boolean=text,float=double -lpf 1000 +# Windows +>tools/import-data.bat -h 192.168.100.1 -p 6667 -u root -pw root -s ./data/dump0_0.csv -fd ./failed/ -aligned true -batch 100000 -tp ms -typeInfer boolean=text,float=double -lpf 1000 +``` + +## 3. load-tsfile 脚本 + +- 支持格式:TsFile + +### 3.1 运行命令 + +```Bash +# Unix/OS X +>tools/load-tsfile.sh -h -p -u -pw -s -os [-sd ] -of [-fd ] [-tn ] + +# Windows +>tools\load-tsfile.bat -h -p -u -pw -s -os [-sd ] -of [-fd ] [-tn ] +``` + +### 3.2 参数介绍 + +| **参数** | **定义** | **是否必填** | **默认** | +| -------- | ------------------------------------------------------------ | ----------------------------------- | ------------------- | +| -h | 主机名 | 否 | root | +| -p | 端口号 | 否 | root | +| -u | 用户名 | 否 | 127.0.0.1 | +| -pw | 密码 | 否 | 6667 | +| -s | 待加载的脚本文件(夹)的本地目录路径 | 是 | | +| -os | none:不删除
mv:移动成功的文件到目标文件夹
cp:硬连接(拷贝)成功的文件到目标文件夹
delete:删除 | 是 | | +| -sd | 当--on_succcess为mv或cp时,mv或cp的目标文件夹。文件的文件名变为文件夹打平后拼接原有文件名 | 当--on_succcess为mv或cp时需要填写是 | ${EXEC_DIR}/success | +| -of | none:跳过
mv:移动失败的文件到目标文件夹
cp:硬连接(拷贝)失败的文件到目标文件夹
delete:删除 | 是 | | +| -fd | 当--on_fail指定为mv或cp时,mv或cp的目标文件夹。文件的文件名变为文件夹打平后拼接原有文件名 | 当--on_fail指定为mv或cp时需要填写 | ${EXEC_DIR}/fail | +| -tn | 最大并行线程数 | 是 | 8 | + +### 3.3 运行示例: + +```Bash +# Unix/OS X +> tools/load-tsfile.sh -h 127.0.0.1 -p 6667 -u root -pw root -s /path/sql -os delete -of delete -tn 8 +> tools/load-tsfile.sh -h 127.0.0.1 -p 6667 -u root -pw root -s /path/sql -os mv -of cp -sd /path/success/dir -fd /path/failure/dir -tn 8 + +# Windows +> tools/load_data.bat -h 127.0.0.1 -p 6667 -u root -pw root -s /path/sql -os mv -of cp -sd /path/success/dir -fd /path/failure/dir -tn 8 +> tools/load_data.bat -h 127.0.0.1 -p 6667 -u root -pw root -s /path/sql -os delete -of delete -tn 8 +``` + +## 4. TsFile 主动监听&加载功能 + +TsFile 主动监听&加载功能能够主动监听指定目标路径(用户配置)下TsFile的文件变化,并将目标路径下的TsFile文件自动同步至指定接收路径(用户配置)。通过此功能,IoTDB 能自动检测并加载这些文件,无需手动执行任何额外的加载操作。这种自动化流程不仅简化了用户的操作步骤,还减少了操作过程中可能出现的错误,有效降低了用户在使用过程中的复杂性。 + +![](https://alioss.timecho.com/docs/img/Data-import1.png) + + +### 4.1 配置参数 + +可通过从配置文件模版 `iotdb-system.properties.template` 中找到下列参数,添加到 IoTDB 配置文件 `iotdb-system.properties` 中开启TsFile 主动监听&加载功能。完整配置如下:: + + +| **配置参数** | **参数说明** | **value 取值范围** | **是否必填** | **默认值** | **加载方式** | +| -------------------------------------------- | ------------------------------------------------------------ | -------------------------- | ------------ | ---------------------- | ---------------- | +| load_active_listening_enable | 是否开启 DataNode 主动监听并且加载 tsfile 的功能(默认开启)。 | Boolean: true,false | 选填 | true | 热加载 | +| load_active_listening_dirs | 需要监听的目录(自动包括目录中的子目录),如有多个使用 “,“ 隔开默认的目录为 ext/load/pending(支持热装载) | String: 一个或多个文件目录 | 选填 | ext/load/pending | 热加载 | +| load_active_listening_fail_dir | 执行加载 tsfile 文件失败后将文件转存的目录,只能配置一个 | String: 一个文件目录 | 选填 | ext/load/failed | 热加载 | +| load_active_listening_max_thread_num | 同时执行加载 tsfile 任务的最大线程数,参数被注释掉时的默值为 max(1, CPU 核心数 / 2),当用户设置的值不在这个区间[1, CPU核心数 /2]内时,会设置为默认值 (1, CPU 核心数 / 2) | Long: [1, Long.MAX_VALUE] | 选填 | max(1, CPU 核心数 / 2) | 重启后生效 | +| load_active_listening_check_interval_seconds | 主动监听轮询间隔,单位秒。主动监听 tsfile 的功能是通过轮询检查文件夹实现的。该配置指定了两次检查 load_active_listening_dirs 的时间间隔,每次检查完成 load_active_listening_check_interval_seconds 秒后,会执行下一次检查。当用户设置的轮询间隔小于 1 时,会被设置为默认值 5 秒 | Long: [1, Long.MAX_VALUE] | 选填 | 5 | 重启后生效 | + +### 4.2 注意事项 + +1. 如果待加载的文件中,存在 mods 文件,应优先将 mods 文件移动到监听目录下面,然后再移动 tsfile 文件,且 mods 文件应和对应的 tsfile 文件处于同一目录。防止加载到 tsfile 文件时,加载不到对应的 mods 文件 + +```SQL +FUNCTION moveFilesToListeningDirectory(sourceDirectory, listeningDirectory) + // 移动 mods 文件 + modsFiles = searchFiles(sourceDirectory, "*mods*") + IF modsFiles IS NOT EMPTY + FOR EACH file IN modsFiles + MOVE(file, listeningDirectory) + END FOR + END IF + + // 移动 tsfile 文件 + tsfileFiles = searchFiles(sourceDirectory, "*tsfile*") + IF tsfileFiles IS NOT EMPTY + FOR EACH file IN tsfileFiles + MOVE(file, listeningDirectory) + END FOR + END IF +END FUNCTION + +FUNCTION searchFiles(directory, pattern) + matchedFiles = [] + FOR EACH file IN directory.files + IF file.name MATCHES pattern + APPEND file TO matchedFiles + END IF + END FOR + RETURN matchedFiles +END FUNCTION + +FUNCTION MOVE(sourceFile, targetDirectory) + // 实现文件从 sourceFile 移动到 targetDirectory 的逻辑 +END FUNCTION +``` + +2. 禁止设置 Pipe 的 receiver 目录、存放数据的 data 目录等作为监听目录 + +3. 禁止 `load_active_listening_fail_dir` 与 `load_active_listening_dirs` 存在相同的目录,或者互相嵌套 + +4. 保证 `load_active_listening_dirs` 目录有足够的权限,在加载成功之后,文件将会被删除,如果没有删除权限,则会重复加载 +