Skip to content

Commit 1ccb825

Browse files
committed
Delinting; disabled bad-continuation in .pylintrc (relying on PyCharm's formatter)
1 parent 3825c08 commit 1ccb825

File tree

8 files changed

+49
-38
lines changed

8 files changed

+49
-38
lines changed

.pylintrc

+2-1
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,8 @@ disable=print-statement,
138138
xreadlines-attribute,
139139
deprecated-sys-function,
140140
exception-escape,
141-
comprehension-escape
141+
comprehension-escape,
142+
bad-continuation
142143

143144
# Enable the message, report, category or checker with the given id(s). You can
144145
# either give multiple identifier separated by comma (,) or put this option

splitgraph/core/fragment_manager.py

+38-29
Original file line numberDiff line numberDiff line change
@@ -65,12 +65,12 @@ def _split_changeset(changeset, min_max, table_pks):
6565

6666

6767
# Custom min/max functions that ignore Nones
68-
def _min(a, b):
69-
return b if a is None else (a if b is None else min(a, b))
68+
def _min(left, right):
69+
return right if left is None else (left if right is None else min(left, right))
7070

7171

72-
def _max(a, b):
73-
return b if a is None else (a if b is None else max(a, b))
72+
def _max(left, right):
73+
return right if left is None else (left if right is None else max(left, right))
7474

7575

7676
class Digest:
@@ -99,6 +99,7 @@ def __init__(self, shorts):
9999

100100
@classmethod
101101
def empty(cls):
102+
"""Return an empty Digest instance such that for any Digest D, D + empty == D - empty == D"""
102103
return cls((0,) * 16)
103104

104105
@classmethod
@@ -297,20 +298,10 @@ def _store_changesets(self, table, changesets, parents):
297298
# Store the fragment in a temporary location and then find out its hash and rename to the actual target.
298299
# Optimisation: in the future, we can hash the upserted rows that we need preemptively and possibly
299300
# avoid storing the object altogether if it's a duplicate.
300-
tmp_object_id = get_random_object_id()
301-
upserted = [pk for pk, data in sub_changeset.items() if data[0]]
302-
deleted = [pk for pk, data in sub_changeset.items() if not data[0]]
303-
self.object_engine.store_fragment(upserted, deleted, SPLITGRAPH_META_SCHEMA, tmp_object_id,
304-
table.repository.to_schema(),
305-
table.table_name)
306-
# Digest the rows.
307-
deletion_hash = self._hash_old_changeset_values(sub_changeset, table.table_schema)
308-
insertion_hash = self.calculate_fragment_insertion_hash(SPLITGRAPH_META_SCHEMA, tmp_object_id)
309-
310-
# We currently don't store the insertion/deletion hashes at all.
311-
content_hash = (insertion_hash - deletion_hash).hex()
312-
schema_hash = sha256(str(table.table_schema).encode('ascii')).hexdigest()
313-
object_id = "o" + sha256((content_hash + schema_hash).encode('ascii')).hexdigest()[:-2]
301+
tmp_object_id = self._store_changeset(sub_changeset, table)
302+
303+
deletion_hash, insertion_hash, object_id = self._get_patch_fragment_hashes(sub_changeset, table,
304+
tmp_object_id)
314305

315306
object_ids.append(object_id)
316307
if not self.get_new_objects([object_id]):
@@ -326,6 +317,24 @@ def _store_changesets(self, table, changesets, parents):
326317
insertion_hash=insertion_hash.hex(), deletion_hash=deletion_hash.hex())
327318
return object_ids
328319

320+
def _get_patch_fragment_hashes(self, sub_changeset, table, tmp_object_id):
321+
# Digest the rows.
322+
deletion_hash = self._hash_old_changeset_values(sub_changeset, table.table_schema)
323+
insertion_hash = self.calculate_fragment_insertion_hash(SPLITGRAPH_META_SCHEMA, tmp_object_id)
324+
content_hash = (insertion_hash - deletion_hash).hex()
325+
schema_hash = sha256(str(table.table_schema).encode('ascii')).hexdigest()
326+
object_id = "o" + sha256((content_hash + schema_hash).encode('ascii')).hexdigest()[:-2]
327+
return deletion_hash, insertion_hash, object_id
328+
329+
def _store_changeset(self, sub_changeset, table):
330+
tmp_object_id = get_random_object_id()
331+
upserted = [pk for pk, data in sub_changeset.items() if data[0]]
332+
deleted = [pk for pk, data in sub_changeset.items() if not data[0]]
333+
self.object_engine.store_fragment(upserted, deleted, SPLITGRAPH_META_SCHEMA, tmp_object_id,
334+
table.repository.to_schema(),
335+
table.table_name)
336+
return tmp_object_id
337+
329338
def calculate_fragment_insertion_hash(self, schema, table):
330339
"""
331340
Calculate the homomorphic hash of just the rows that a given fragment inserts
@@ -588,9 +597,9 @@ def _conflate_changes(changeset, new_changes):
588597

589598

590599
def get_random_object_id():
591-
"""Assign each table a random ID that it will be stored as. Note that postgres limits table names to 63 characters,
592-
so the IDs shall be 248-bit strings, hex-encoded, + a letter prefix since Postgres doesn't seem to support table
593-
names starting with a digit."""
600+
"""Generate a random ID for temporary/staging objects that haven't had their ID calculated yet.
601+
Note that Postgres limits table names to 63 characters, so the IDs shall be 248-bit strings, hex-encoded,
602+
+ a letter prefix since Postgres doesn't seem to support table names starting with a digit."""
594603
# Make sure we're padded to 62 characters (otherwise if the random number generated is less than 2^247 we'll be
595604
# dropping characters from the hex format)
596605
return str.format('o{:062x}', getrandbits(248))
@@ -599,7 +608,7 @@ def get_random_object_id():
599608
def _qual_to_index_clause(qual, ctype):
600609
"""Convert our internal qual format into a WHERE clause that runs against an object's index entry.
601610
Returns a Postgres clause (as a Composable) and a tuple of arguments to be mogrified into it."""
602-
column_name, operator, value = qual
611+
column_name, qual_op, value = qual
603612

604613
# Our index is essentially a bloom filter: it returns True if an object _might_ have rows
605614
# that affect the result of a query with a given qual and False if it definitely doesn't.
@@ -611,14 +620,14 @@ def _qual_to_index_clause(qual, ctype):
611620

612621
# If the column has to be greater than (or equal to) X, it only might exist in objects
613622
# whose maximum value is greater than (or equal to) X.
614-
if operator in ('>', '>='):
615-
query += SQL("(index #>> '{{{},1}}')::" + ctype + " " + operator + " %s").format((Identifier(column_name)))
623+
if qual_op in ('>', '>='):
624+
query += SQL("(index #>> '{{{},1}}')::" + ctype + " " + qual_op + " %s").format((Identifier(column_name)))
616625
args.append(value)
617626
# Similar for smaller than, but here we check that the minimum value is smaller than X.
618-
elif operator in ('<', '<='):
619-
query += SQL("(index #>> '{{{},0}}')::" + ctype + " " + operator + " %s").format((Identifier(column_name)))
627+
elif qual_op in ('<', '<='):
628+
query += SQL("(index #>> '{{{},0}}')::" + ctype + " " + qual_op + " %s").format((Identifier(column_name)))
620629
args.append(value)
621-
elif operator == '=':
630+
elif qual_op == '=':
622631
query += SQL("%s BETWEEN (index #>> '{{{0},0}}')::" + ctype
623632
+ " AND (index #>> '{{{0},1}}')::" + ctype).format((Identifier(column_name)))
624633
args.append(value)
@@ -636,8 +645,8 @@ def _qual_to_index_clause(qual, ctype):
636645

637646
def _qual_to_sql_clause(qual, ctype):
638647
"""Convert a qual to a normal SQL clause that can be run against the actual object rather than the index."""
639-
column_name, operator, value = qual
640-
return SQL("{}::" + ctype + " " + operator + " %s").format(Identifier(column_name)), (value,)
648+
column_name, qual_op, value = qual
649+
return SQL("{}::" + ctype + " " + qual_op + " %s").format(Identifier(column_name)), (value,)
641650

642651

643652
def _quals_to_clause(quals, column_types, qual_to_clause=_qual_to_index_clause):

splitgraph/core/metadata_manager.py

-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515

1616
class Object(namedtuple('Object', OBJECT_COLS)):
1717
"""Represents a Splitgraph object that tables are composed of."""
18-
pass
1918

2019

2120
class MetadataManager:

splitgraph/core/repository.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ def by_tag(self, tag, raise_on_none=True):
6161
(self.repository.namespace, self.repository.repository),
6262
return_shape=ResultShape.ONE_MANY)
6363
if result is None:
64-
raise SplitGraphException("No images found in %s!", self.repository.to_schema())
64+
raise SplitGraphException("No images found in %s!" % self.repository.to_schema())
6565
return self._make_image(result)
6666

6767
result = engine.run_sql(select("get_tagged_images", "image_hash", "tag = %s",

splitgraph/core/table.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ def materialize(self, destination, destination_schema=None, lq_server=None):
3939
# Copy the given snap id over to "staging" and apply the DIFFS
4040
with object_manager.ensure_objects(self) as required_objects:
4141
engine.create_table(schema=destination_schema, table=destination, schema_spec=self.table_schema)
42-
if len(required_objects) > 0:
42+
if required_objects:
4343
logging.info("Applying %d fragment(s)...", (len(required_objects)))
4444
engine.apply_fragments([(SPLITGRAPH_META_SCHEMA, d) for d in required_objects],
4545
destination_schema, destination)

splitgraph/engine/__init__.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -113,9 +113,9 @@ def copy_table(self, source_schema, source_table, target_schema, target_table, w
113113
query += SQL(" OFFSET %s")
114114
query_args.append(offset)
115115
if with_pk_constraints and pks:
116-
query += SQL(";ALTER TABLE {}.{} ADD PRIMARY KEY (").format(
117-
Identifier(target_schema), Identifier(target_table)) + SQL(',').join(
118-
SQL("{}").format(Identifier(c)) for c, _ in pks) + SQL(")")
116+
query += SQL(";ALTER TABLE {}.{} ADD PRIMARY KEY (").format(
117+
Identifier(target_schema), Identifier(target_table)) + SQL(',').join(
118+
SQL("{}").format(Identifier(c)) for c, _ in pks) + SQL(")")
119119
self.run_sql(query, query_args, return_shape=ResultShape.NONE)
120120

121121
def delete_table(self, schema, table):

splitgraph/engine/postgres/engine.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,8 @@ def _prepare_ri_data(self, schema, table):
367367
non_ri_cols, non_ri_types = zip(*non_ri_cols_types) if non_ri_cols_types else ((), ())
368368
return ri_cols, non_ri_cols, non_ri_types
369369

370-
def _generate_fragment_application(self, source_schema, source_table,
370+
@staticmethod
371+
def _generate_fragment_application(source_schema, source_table,
371372
target_schema, target_table, ri_data, extra_quals=None):
372373
ri_cols, non_ri_cols, _ = ri_data
373374
all_cols = ri_cols + non_ri_cols

splitgraph/ingestion/pandas.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,10 @@
55
import pandas as pd
66
from pandas.io.sql import get_schema
77
from psycopg2.sql import Identifier, SQL
8+
from sqlalchemy import create_engine
9+
810
from splitgraph import SplitGraphException
911
from splitgraph.core.image import Image
10-
from sqlalchemy import create_engine
1112

1213

1314
def _get_sqlalchemy_engine(engine):

0 commit comments

Comments
 (0)