11#!/usr/bin/env python3
22# pylint: disable=missing-docstring,not-an-iterable,too-many-locals,too-many-arguments,invalid-name,too-many-return-statements,too-many-branches,len-as-condition,too-many-nested-blocks,wrong-import-order,duplicate-code, anomalous-backslash-in-string, too-many-statements, singleton-comparison, consider-using-in
33
4- import singer
4+ from functools import reduce
5+ from select import select
6+ import copy
7+ import csv
58import datetime
69import decimal
10+ import json
11+ import re
12+
13+ from dateutil .parser import parse
14+ import psycopg2
15+ import singer
716from singer import utils , get_bookmark
817import singer .metadata as metadata
918import tap_postgres .db as post_db
1019import tap_postgres .sync_strategies .common as sync_common
11- from dateutil .parser import parse
12- import psycopg2
13- from psycopg2 import sql
14- import copy
15- from select import select
16- from functools import reduce
17- import json
18- import re
20+
1921
2022LOGGER = singer .get_logger ()
2123
@@ -65,81 +67,29 @@ def get_stream_version(tap_stream_id, state):
6567
6668 return stream_version
6769
68- def tuples_to_map (accum , t ):
69- accum [t [0 ]] = t [1 ]
70- return accum
71-
72- def create_hstore_elem_query (elem ):
73- return sql .SQL ("SELECT hstore_to_array({})" ).format (sql .Literal (elem ))
74-
75- def create_hstore_elem (conn_info , elem ):
76- with post_db .open_connection (conn_info ) as conn :
77- with conn .cursor () as cur :
78- query = create_hstore_elem_query (elem )
79- cur .execute (query )
80- res = cur .fetchone ()[0 ]
81- hstore_elem = reduce (tuples_to_map , [res [i :i + 2 ] for i in range (0 , len (res ), 2 )], {})
82- return hstore_elem
83-
84- def create_array_elem (elem , sql_datatype , conn_info ):
70+ def create_hstore_elem (elem ):
71+ array = [(item .replace ('"' , '' ).split ('=>' )) for item in elem ]
72+ hstore = {}
73+ for item in array :
74+ if len (item ) == 2 :
75+ key , value = item
76+ if key in hstore :
77+ raise KeyError ('Duplicate key {} found when creating hstore' .format (key ))
78+ if value .lower () == 'null' :
79+ value = None
80+ d [key ] = value
81+
82+ return hstore
83+
84+ def create_array_elem (elem ):
8585 if elem is None :
8686 return None
8787
88- with post_db .open_connection (conn_info ) as conn :
89- with conn .cursor () as cur :
90- if sql_datatype == 'bit[]' :
91- cast_datatype = 'boolean[]'
92- elif sql_datatype == 'boolean[]' :
93- cast_datatype = 'boolean[]'
94- elif sql_datatype == 'character varying[]' :
95- cast_datatype = 'character varying[]'
96- elif sql_datatype == 'cidr[]' :
97- cast_datatype = 'cidr[]'
98- elif sql_datatype == 'citext[]' :
99- cast_datatype = 'text[]'
100- elif sql_datatype == 'date[]' :
101- cast_datatype = 'text[]'
102- elif sql_datatype == 'double precision[]' :
103- cast_datatype = 'double precision[]'
104- elif sql_datatype == 'hstore[]' :
105- cast_datatype = 'text[]'
106- elif sql_datatype == 'integer[]' :
107- cast_datatype = 'integer[]'
108- elif sql_datatype == 'bigint[]' :
109- cast_datatype = 'bigint[]'
110- elif sql_datatype == 'inet[]' :
111- cast_datatype = 'inet[]'
112- elif sql_datatype == 'json[]' :
113- cast_datatype = 'text[]'
114- elif sql_datatype == 'jsonb[]' :
115- cast_datatype = 'text[]'
116- elif sql_datatype == 'macaddr[]' :
117- cast_datatype = 'macaddr[]'
118- elif sql_datatype == 'money[]' :
119- cast_datatype = 'text[]'
120- elif sql_datatype == 'numeric[]' :
121- cast_datatype = 'text[]'
122- elif sql_datatype == 'real[]' :
123- cast_datatype = 'real[]'
124- elif sql_datatype == 'smallint[]' :
125- cast_datatype = 'smallint[]'
126- elif sql_datatype == 'text[]' :
127- cast_datatype = 'text[]'
128- elif sql_datatype in ('time without time zone[]' , 'time with time zone[]' ):
129- cast_datatype = 'text[]'
130- elif sql_datatype in ('timestamp with time zone[]' , 'timestamp without time zone[]' ):
131- cast_datatype = 'text[]'
132- elif sql_datatype == 'uuid[]' :
133- cast_datatype = 'text[]'
134-
135- else :
136- #custom datatypes like enums
137- cast_datatype = 'text[]'
138-
139- sql_stmt = """SELECT $stitch_quote${}$stitch_quote$::{}""" .format (elem , cast_datatype )
140- cur .execute (sql_stmt )
141- res = cur .fetchone ()[0 ]
142- return res
88+ elem = [elem [1 :- 1 ]]
89+ reader = csv .reader (elem , delimiter = ',' , escapechar = '\\ ' , quotechar = '"' )
90+ array = next (reader )
91+ array = [None if element .lower () == 'null' else element for element in array ]
92+ return array
14393
14494#pylint: disable=too-many-branches,too-many-nested-blocks
14595def selected_value_to_singer_value_impl (elem , og_sql_datatype , conn_info ):
@@ -166,17 +116,21 @@ def selected_value_to_singer_value_impl(elem, og_sql_datatype, conn_info):
166116 #for ordinary bits, elem will == '1'
167117 return elem == '1' or elem == True
168118 if sql_datatype == 'boolean' :
169- return elem
119+ return bool ( elem )
170120 if sql_datatype == 'hstore' :
171- return create_hstore_elem (conn_info , elem )
121+ return create_hstore_elem (elem )
172122 if 'numeric' in sql_datatype :
173- return decimal .Decimal (str (elem ))
174- if isinstance (elem , int ):
175- return elem
176- if isinstance (elem , float ):
177- return elem
178- if isinstance (elem , str ):
179- return elem
123+ return decimal .Decimal (elem )
124+ if sql_datatype == 'money' :
125+ return decimal .Decimal (elem [1 :])
126+ if sql_datatype in ('integer' , 'smallint' , 'bigint' ):
127+ return int (elem )
128+ if sql_datatype in ('double precision' , 'real' , 'float' ):
129+ return float (elem )
130+ if sql_datatype in ('text' , 'character varying' ):
131+ return elem # return as string
132+ if sql_datatype in ('cidr' , 'citext' , 'json' , 'jsonb' , 'inet' , 'macaddr' , 'uuid' ):
133+ return elem # return as string
180134
181135 raise Exception ("do not know how to marshall value of type {}" .format (elem .__class__ ))
182136
@@ -189,7 +143,7 @@ def selected_array_to_singer_value(elem, sql_datatype, conn_info):
189143def selected_value_to_singer_value (elem , sql_datatype , conn_info ):
190144 #are we dealing with an array?
191145 if sql_datatype .find ('[]' ) > 0 :
192- cleaned_elem = create_array_elem (elem , sql_datatype , conn_info )
146+ cleaned_elem = create_array_elem (elem )
193147 return list (map (lambda elem : selected_array_to_singer_value (elem , sql_datatype , conn_info ), (cleaned_elem or [])))
194148
195149 return selected_value_to_singer_value_impl (elem , sql_datatype , conn_info )
0 commit comments