Skip to content

Commit 73a59c8

Browse files
1 parent 98cef12 commit 73a59c8

File tree

6 files changed

+260
-6
lines changed

6 files changed

+260
-6
lines changed

README.md

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,18 @@ If a column has multiple jsonschema types, the following order is using to order
302302
- BOOLEAN
303303
- NOTYPE
304304

305+
### `x-sql-datatype` extension
306+
307+
This target supports the [`x-sql-datatype` extension](https://sdk.meltano.com/en/latest/guides/sql-target.html#use-the-x-sql-datatype-json-schema-extension) to the JSON schema. This extension allows you to specify the Postgres data type that should be used for a given field. This can be useful when the default mapping is not what you want.
308+
309+
<!-- insert a table with the mapping -->
310+
311+
| `x-sql-datatype` | Postgres | Description |
312+
| :--------------- | :------- | :----------------------------------------------------------------- |
313+
| smallint | smallint | small-range integer (-32768 to +32767) |
314+
| integer | integer | typical choice for integer (-2147483648 to +2147483647) |
315+
| bigint | bigint | large-range integer (-9223372036854775808 to +9223372036854775807) |
316+
305317
### Using the Singer catalog to narrow down the Postgres data types
306318

307319
You can use [Singer catalog's schema](https://github.com/singer-io/getting-started/blob/master/docs/DISCOVERY_MODE.md#schemas) to override the data types coming from the tap. The easiest way to do this is to use Meltano and its [`schema` setting](https://docs.meltano.com/concepts/plugins/#schema-extra) for the tap:
@@ -320,6 +332,20 @@ plugins:
320332
maximum: 1000
321333
```
322334

335+
Or to use the `x-sql-datatype` extension:
336+
337+
```yaml
338+
# meltano.yml
339+
plugins:
340+
extractors:
341+
- name: tap-my-tap
342+
schema:
343+
some_stream_id:
344+
my_column:
345+
type: integer
346+
x-sql-datatype: smallint
347+
```
348+
323349
## Content Encoding Support
324350

325351
Json Schema supports the [`contentEncoding` keyword](https://datatracker.ietf.org/doc/html/rfc4648#section-8), which can be used to specify the encoding of input string types.

meltano.yml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ project_id: target-postgres
55
plugins:
66
extractors:
77
- name: tap-smoke-test
8-
namespace: tap_smoke_test
8+
variant: meltano
99
pip_url: git+https://github.com/meltano/tap-smoke-test.git
1010
executable: tap-smoke-test
1111
config:
@@ -19,6 +19,11 @@ plugins:
1919
__key_properties__: [id]
2020
page_views:
2121
__key_properties__: [vistor_id]
22+
schema:
23+
animals:
24+
views:
25+
type: integer
26+
x-sql-datatype: smallint
2227
- name: tap-github
2328
variant: meltanolabs
2429
pip_url: git+https://github.com/MeltanoLabs/tap-github.git

plugins/extractors/tap-github--meltanolabs.lock

Lines changed: 95 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,12 @@
66
"label": "GitHub",
77
"docs": "https://hub.meltano.com/extractors/tap-github--meltanolabs",
88
"repo": "https://github.com/MeltanoLabs/tap-github",
9-
"pip_url": "git+https://github.com/MeltanoLabs/tap-github.git",
9+
"pip_url": "meltanolabs-tap-github",
1010
"description": "Code hosting platform",
1111
"logo_url": "https://hub.meltano.com/assets/logos/extractors/github.png",
1212
"capabilities": [
1313
"about",
14+
"batch",
1415
"catalog",
1516
"discover",
1617
"schema-flattening",
@@ -41,22 +42,90 @@
4142
"label": "Additional Auth Tokens",
4243
"description": "List of GitHub tokens to authenticate with. Streams will loop through them when hitting rate limits."
4344
},
45+
{
46+
"name": "auth_app_keys",
47+
"kind": "array",
48+
"label": "Auth App Keys",
49+
"description": "List of GitHub App credentials to authenticate with. Each credential can be constructed by combining an App ID and App private key into the format `:app_id:;;-----BEGIN RSA PRIVATE KEY----- _YOUR_P_KEY_ -----END RSA PRIVATE KEY-----`."
50+
},
4451
{
4552
"name": "auth_token",
46-
"kind": "password",
53+
"kind": "string",
4754
"label": "Auth Token",
48-
"description": "GitHub token to authenticate with."
55+
"description": "GitHub token to authenticate with.",
56+
"sensitive": true
57+
},
58+
{
59+
"name": "batch_config.encoding.compression",
60+
"kind": "options",
61+
"label": "Batch Compression Format",
62+
"description": "Compression format to use for batch files.",
63+
"options": [
64+
{
65+
"label": "GZIP",
66+
"value": "gzip"
67+
},
68+
{
69+
"label": "None",
70+
"value": "none"
71+
}
72+
]
73+
},
74+
{
75+
"name": "batch_config.encoding.format",
76+
"kind": "options",
77+
"label": "Batch Encoding Format",
78+
"description": "Format to use for batch files.",
79+
"options": [
80+
{
81+
"label": "JSONL",
82+
"value": "jsonl"
83+
},
84+
{
85+
"label": "Parquet",
86+
"value": "parquet"
87+
}
88+
]
89+
},
90+
{
91+
"name": "batch_config.storage.prefix",
92+
"kind": "string",
93+
"label": "Batch Storage Prefix",
94+
"description": "Prefix to use when writing batch files."
95+
},
96+
{
97+
"name": "batch_config.storage.root",
98+
"kind": "string",
99+
"label": "Batch Storage Root",
100+
"description": "Root path to use when writing batch files."
101+
},
102+
{
103+
"name": "expiry_time_buffer",
104+
"kind": "integer",
105+
"label": "Expiry Time Buffer"
106+
},
107+
{
108+
"name": "faker_config.locale",
109+
"kind": "array",
110+
"label": "Faker Locale",
111+
"description": "One or more LCID locale strings to produce localized output for: https://faker.readthedocs.io/en/master/#localization"
112+
},
113+
{
114+
"name": "faker_config.seed",
115+
"kind": "string",
116+
"label": "Faker Seed",
117+
"description": "Value to seed the Faker generator for deterministic output: https://faker.readthedocs.io/en/master/#seeding-the-generator"
49118
},
50119
{
51120
"name": "flattening_enabled",
52121
"kind": "boolean",
53-
"label": "Flattening Enabled",
122+
"label": "Enable Schema Flattening",
54123
"description": "'True' to enable schema flattening and automatically expand nested properties."
55124
},
56125
{
57126
"name": "flattening_max_depth",
58127
"kind": "integer",
59-
"label": "Flattening Max Depth",
128+
"label": "Max Flattening Depth",
60129
"description": "The max depth to flatten schemas."
61130
},
62131
{
@@ -110,6 +179,27 @@
110179
"kind": "object",
111180
"label": "Stream Maps"
112181
},
182+
{
183+
"name": "stream_options.milestones.state",
184+
"kind": "options",
185+
"value": "open",
186+
"label": "Stream Options Milestones State",
187+
"description": "Configures which states are of interest. Must be one of [open, closed, all], defaults to open.",
188+
"options": [
189+
{
190+
"label": "Open",
191+
"value": "open"
192+
},
193+
{
194+
"label": "Closed",
195+
"value": "closed"
196+
},
197+
{
198+
"label": "All",
199+
"value": "all"
200+
}
201+
]
202+
},
113203
{
114204
"name": "user_agent",
115205
"kind": "string",
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
{
2+
"plugin_type": "extractors",
3+
"name": "tap-smoke-test",
4+
"namespace": "tap_smoke_test",
5+
"variant": "meltano",
6+
"label": "Smoke Test",
7+
"docs": "https://hub.meltano.com/extractors/tap-smoke-test--meltano",
8+
"repo": "https://github.com/meltano/tap-smoke-test",
9+
"pip_url": "git+https://github.com/meltano/tap-smoke-test.git",
10+
"executable": "tap-smoke-test",
11+
"description": "Generates sample data to be used for testing.",
12+
"logo_url": "https://hub.meltano.com/assets/logos/extractors/smoke-test.png",
13+
"capabilities": [
14+
"about",
15+
"batch",
16+
"catalog",
17+
"discover",
18+
"schema-flattening",
19+
"state",
20+
"stream-maps"
21+
],
22+
"settings_group_validation": [
23+
[
24+
"streams"
25+
]
26+
],
27+
"settings": [
28+
{
29+
"name": "batch_config.encoding.compression",
30+
"kind": "options",
31+
"label": "Batch Compression Format",
32+
"description": "Compression format to use for batch files.",
33+
"options": [
34+
{
35+
"label": "GZIP",
36+
"value": "gzip"
37+
},
38+
{
39+
"label": "None",
40+
"value": "none"
41+
}
42+
]
43+
},
44+
{
45+
"name": "batch_config.encoding.format",
46+
"kind": "options",
47+
"label": "Batch Encoding Format",
48+
"description": "Format to use for batch files.",
49+
"options": [
50+
{
51+
"label": "JSONL",
52+
"value": "jsonl"
53+
},
54+
{
55+
"label": "Parquet",
56+
"value": "parquet"
57+
}
58+
]
59+
},
60+
{
61+
"name": "batch_config.storage.prefix",
62+
"kind": "string",
63+
"label": "Batch Storage Prefix",
64+
"description": "Prefix to use when writing batch files."
65+
},
66+
{
67+
"name": "batch_config.storage.root",
68+
"kind": "string",
69+
"label": "Batch Storage Root",
70+
"description": "Root path to use when writing batch files."
71+
},
72+
{
73+
"name": "faker_config.locale",
74+
"kind": "array",
75+
"label": "Faker Locale",
76+
"description": "One or more LCID locale strings to produce localized output for: https://faker.readthedocs.io/en/master/#localization"
77+
},
78+
{
79+
"name": "faker_config.seed",
80+
"kind": "string",
81+
"label": "Faker Seed",
82+
"description": "Value to seed the Faker generator for deterministic output: https://faker.readthedocs.io/en/master/#seeding-the-generator"
83+
},
84+
{
85+
"name": "flattening_enabled",
86+
"kind": "boolean",
87+
"label": "Enable Schema Flattening",
88+
"description": "'True' to enable schema flattening and automatically expand nested properties."
89+
},
90+
{
91+
"name": "flattening_max_depth",
92+
"kind": "integer",
93+
"label": "Max Flattening Depth",
94+
"description": "The max depth to flatten schemas."
95+
},
96+
{
97+
"name": "schema_inference_record_count",
98+
"kind": "integer",
99+
"value": 5,
100+
"label": "Schema Inference Record Count",
101+
"description": "How many records of the source data should be used for schema inference/construction."
102+
},
103+
{
104+
"name": "stream_map_config",
105+
"kind": "object",
106+
"label": "User Stream Map Configuration",
107+
"description": "User-defined config values to be used within map expressions."
108+
},
109+
{
110+
"name": "stream_maps",
111+
"kind": "object",
112+
"label": "Stream Maps",
113+
"description": "Config object for stream maps capability. For more information check out [Stream Maps](https://sdk.meltano.com/en/latest/stream_maps.html)."
114+
},
115+
{
116+
"name": "streams",
117+
"kind": "array",
118+
"label": "Streams",
119+
"description": "An array of objects containing:\n* `stream_name`: The name of the stream.\n* `input_filename`: Path to a jsonl file containing records to use for mock data.\n* `client_exception`: (Default False) Whether we should simulate failing by having the client raise an exception.\n* `schema_gen_exception`: (Default False) Whether we should simulate failing by raising an exception during schema inference.\n* `loop_count`: (Default 1) The number of times we should playback the input file.\n\nFor example:\n\n```yaml\nstreams:\n- stream_name: animals\n input_filename: https://raw.githubusercontent.com/meltano/tap-smoke-test/main/demo-data/animals-data.jsonl\n```\n"
120+
}
121+
]
122+
}

target_postgres/connector.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,9 @@ def jsonschema_to_sql(self) -> JSONSchemaToSQL:
310310
to_sql.register_format_handler("hostname", TEXT)
311311
to_sql.register_format_handler("ipv4", TEXT)
312312
to_sql.register_format_handler("ipv6", TEXT)
313+
to_sql.register_sql_datatype_handler("smallint", SMALLINT)
314+
to_sql.register_sql_datatype_handler("integer", INTEGER)
315+
to_sql.register_sql_datatype_handler("bigint", BIGINT)
313316
return to_sql
314317

315318
def to_sql_type(self, jsonschema_type: dict) -> sa.types.TypeEngine:

target_postgres/tests/test_types.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,14 @@ def test_datetime_string(self, to_postgres: JSONSchemaToPostgres):
9393
BIGINT,
9494
id="bigint",
9595
),
96+
pytest.param(
97+
{
98+
"type": "integer",
99+
"x-sql-datatype": "smallint",
100+
},
101+
SMALLINT,
102+
id="x-sql-datatype-smallint",
103+
),
96104
],
97105
)
98106
def test_integers(

0 commit comments

Comments
 (0)