Skip to content

Commit 742584a

Browse files
committed
Adding XLS UI elements for ftp source
1 parent f36a716 commit 742584a

File tree

3 files changed

+156
-6
lines changed

3 files changed

+156
-6
lines changed

docs/FTPSource-batchsource.md

+16-3
Original file line numberDiff line numberDiff line change
@@ -28,15 +28,28 @@ Properties
2828
**Password:** Password to use for authentication.
2929

3030
**Format:** Format of the data to read.
31-
The format must be one of 'blob', 'csv', 'delimited', 'json', 'text', 'tsv', or the
31+
The format must be one of 'blob', 'csv', 'delimited', 'json', 'text', 'tsv', 'xls', or the
3232
name of any format plugin that you have deployed to your environment. Note that FTP does
3333
not support seeking in a file, so formats like avro and parquet cannot be used.
3434
If the format is a macro, only the formats listed above can be used.
3535
If the format is 'blob', every input file will be read into a separate record.
3636
The 'blob' format also requires a schema that contains a field named 'body' of type 'bytes'.
3737
If the format is 'text', the schema must contain a field named 'body' of type 'string'.
3838

39-
**Get Schema:** Auto-detects schema from file. Supported formats are: csv, delimited, tsv, blob and text.
39+
**Sample Size:** The maximum number of rows that will get investigated for automatic data type detection.
40+
The default value is 1000. This is only used when the format is 'xls'.
41+
42+
**Override:** A list of columns with the corresponding data types for whom the automatic data type detection gets
43+
skipped. This is only used when the format is 'xls'.
44+
45+
**Terminate If Empty Row:** Specify whether to stop reading after encountering the first empty row. Defaults to false. When false the reader will read all rows in the sheet. This is only used when the format is 'xls'.
46+
47+
**Select Sheet Using:** Select the sheet by name or number. Default is 'Sheet Number'. This is only used when the format is 'xls'.
48+
49+
**Sheet Value:** The name/number of the sheet to read from. If not specified, the first sheet will be read.
50+
Sheet Numbers are 0 based, ie first sheet is 0. This is only used when the format is 'xls'.
51+
52+
**Get Schema:** Auto-detects schema from file. Supported formats are: csv, delimited, tsv, xls, blob and text.
4053

4154
Blob - is set by default as field named 'body' of type bytes.
4255

@@ -47,7 +60,7 @@ JSON - is not supported. You must manually provide the output schema.
4760
**Delimiter:** Delimiter to use when the format is 'delimited'. This will be ignored for other formats.
4861

4962
**Use First Row as Header:** Whether to use the first line of each file as the column headers. Supported formats are '
50-
text', 'csv', 'tsv', and 'delimited'.
63+
text', 'csv', 'tsv', 'xls', and 'delimited'.
5164

5265
**Enable Quoted Values** Whether to treat content between quotes as a value. This value will only be used if the format
5366
is 'csv', 'tsv' or 'delimited'. For example, if this is set to true, a line that looks like `1, "a, b, c"` will output

src/main/java/io/cdap/plugin/batch/source/ftp/FTPConfig.java

+36-2
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import com.google.gson.Gson;
2323
import io.cdap.cdap.api.annotation.Description;
2424
import io.cdap.cdap.api.annotation.Macro;
25+
import io.cdap.cdap.api.annotation.Name;
2526
import io.cdap.cdap.api.data.schema.Schema;
2627
import io.cdap.cdap.api.plugin.PluginConfig;
2728
import io.cdap.cdap.etl.api.FailureCollector;
@@ -58,6 +59,9 @@ public class FTPConfig extends PluginConfig implements FileSourceProperties {
5859
private static final Type MAP_STRING_STRING_TYPE = new TypeToken<Map<String, String>>() {
5960
}.getType();
6061
private static final List<String> LOCATION_PROPERTIES = Arrays.asList("type", "host", "path", "user", "password");
62+
private static final String NAME_SHEET = "sheet";
63+
private static final String NAME_SHEET_VALUE = "sheetValue";
64+
private static final String NAME_TERMINATE_IF_EMPTY_ROW = "terminateIfEmptyRow";
6165

6266
@Macro
6367
@Nullable
@@ -112,13 +116,24 @@ public class FTPConfig extends PluginConfig implements FileSourceProperties {
112116

113117
@Macro
114118
@Nullable
115-
@Description("Whether to use first row as header. Supported formats are 'text', 'csv', 'tsv', " +
119+
@Description("The maximum number of rows that will get investigated for automatic data type detection.")
120+
private Long sampleSize;
121+
122+
@Macro
123+
@Nullable
124+
@Description("A list of columns with the corresponding data types for whom the automatic data type detection gets" +
125+
" skipped.")
126+
private String override;
127+
128+
@Macro
129+
@Nullable
130+
@Description("Whether to use first row as header. Supported formats are 'text', 'csv', 'tsv', 'xls', " +
116131
"'delimited'. Default value is false.")
117132
private final Boolean skipHeader;
118133

119134
@Macro
120135
@Description("Format of the data to read. Supported formats are 'avro', 'blob', 'csv', 'delimited', 'json', "
121-
+ "'parquet', 'text', or 'tsv'. If no format is given, it will default to 'text'.")
136+
+ "'parquet', 'text', or 'tsv', 'xls'. If no format is given, it will default to 'text'.")
122137
private final String format;
123138

124139
@Macro
@@ -150,6 +165,25 @@ public class FTPConfig extends PluginConfig implements FileSourceProperties {
150165
@Description("Maximum time in milliseconds to wait for connection initialization before time out.")
151166
private final Integer connectTimeout;
152167

168+
@Name(NAME_SHEET)
169+
@Macro
170+
@Nullable
171+
@Description("Select the sheet by name or number. Default is 'Sheet Number'.")
172+
private String sheet;
173+
174+
@Name(NAME_SHEET_VALUE)
175+
@Macro
176+
@Nullable
177+
@Description("The name/number of the sheet to read from. If not specified, the first sheet will be read." +
178+
"Sheet Numbers are 0 based, ie first sheet is 0.")
179+
private String sheetValue;
180+
181+
@Name(NAME_TERMINATE_IF_EMPTY_ROW)
182+
@Macro
183+
@Nullable
184+
@Description("Specify whether to stop reading after encountering the first empty row. Defaults to false.")
185+
private String terminateIfEmptyRow;
186+
153187
@VisibleForTesting
154188
private FTPConfig(@Nullable String referenceName, String type, String host, @Nullable Integer port, String path,
155189
String user, String password, @Nullable String fileSystemProperties,

widgets/FTPSource-batchsource.json

+104-1
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,10 @@
8888
{
8989
"label": "tsv",
9090
"value": "tsv"
91+
},
92+
{
93+
"label": "xls",
94+
"value": "xls"
9195
}
9296
]
9397
}
@@ -96,6 +100,36 @@
96100
"widget-type": "get-schema",
97101
"widget-category": "plugin"
98102
},
103+
{
104+
"widget-type": "number",
105+
"label": "Sample Size",
106+
"name": "sampleSize",
107+
"widget-attributes": {
108+
"default": "1000",
109+
"minimum": "1"
110+
}
111+
},
112+
{
113+
"widget-type": "keyvalue-dropdown",
114+
"label": "Override",
115+
"name": "override",
116+
"widget-attributes": {
117+
"key-placeholder": "Field Name",
118+
"value-placeholder": "Data Type",
119+
"dropdownOptions": [
120+
"boolean",
121+
"bytes",
122+
"double",
123+
"float",
124+
"int",
125+
"long",
126+
"string",
127+
"date",
128+
"time",
129+
"timestamp"
130+
]
131+
}
132+
},
99133
{
100134
"widget-type": "textbox",
101135
"label": "Delimiter",
@@ -151,6 +185,42 @@
151185
"label": "False"
152186
}
153187
}
188+
},
189+
{
190+
"widget-type": "toggle",
191+
"label": "Terminate If Empty Row",
192+
"name": "terminateIfEmptyRow",
193+
"widget-attributes": {
194+
"default": "false",
195+
"on": {
196+
"value": "true",
197+
"label": "True"
198+
},
199+
"off": {
200+
"value": "false",
201+
"label": "False"
202+
}
203+
}
204+
},
205+
{
206+
"widget-type": "select",
207+
"label": "Select Sheet Using",
208+
"name": "sheet",
209+
"widget-attributes": {
210+
"values": [
211+
"Sheet Name",
212+
"Sheet Number"
213+
],
214+
"default": "Sheet Number"
215+
}
216+
},
217+
{
218+
"widget-type": "textbox",
219+
"label": "Sheet Value",
220+
"name": "sheetValue",
221+
"widget-attributes": {
222+
"default": "0"
223+
}
154224
}
155225
]
156226
},
@@ -257,13 +327,46 @@
257327
{
258328
"name": "skipHeader",
259329
"condition": {
260-
"expression": "format == 'delimited' || format == 'csv' || format == 'tsv'"
330+
"expression": "format == 'delimited' || format == 'csv' || format == 'tsv' || format == 'xls'"
261331
},
262332
"show": [
263333
{
264334
"name": "skipHeader"
265335
}
266336
]
337+
},
338+
{
339+
"name": "sheet",
340+
"condition": {
341+
"expression": "format == 'xls'"
342+
},
343+
"show": [
344+
{
345+
"name": "sheet"
346+
}
347+
]
348+
},
349+
{
350+
"name": "sheetValue",
351+
"condition": {
352+
"expression": "format == 'xls'"
353+
},
354+
"show": [
355+
{
356+
"name": "sheetValue"
357+
}
358+
]
359+
},
360+
{
361+
"name": "terminateIfEmptyRow",
362+
"condition": {
363+
"expression": "format == 'xls'"
364+
},
365+
"show": [
366+
{
367+
"name": "terminateIfEmptyRow"
368+
}
369+
]
267370
}
268371
],
269372
"jump-config": {

0 commit comments

Comments
 (0)