Skip to content

Commit e5b1e35

Browse files
kkleinivergara
andauthored
Add experimental support for db2. (#107)
* Draft integration of db2. * Fix date gap. * Add dependencies. * Fix capitaliation tests. * Add case distinction for varchar column. * Add bash script for local development. * Add changelog entry. * Update CHANGELOG.rst Co-authored-by: Ignacio Vergara Kausel <[email protected]> * Add skip message. --------- Co-authored-by: Ignacio Vergara Kausel <[email protected]>
1 parent 9f2d5e2 commit e5b1e35

File tree

9 files changed

+156
-19
lines changed

9 files changed

+156
-19
lines changed

.github/workflows/ci.yaml

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,55 @@ jobs:
155155
with:
156156
file: ./coverage.xml
157157

158+
159+
linux-integration_tests-db2:
160+
name: "Linux - integration tests - Python ${{ matrix.PYTHON_VERSION }} - DB2"
161+
runs-on: ubuntu-20.04
162+
env:
163+
CI: True
164+
strategy:
165+
fail-fast: false
166+
matrix:
167+
PYTHON_VERSION: [ '3.8', '3.9', '3.10' ]
168+
services:
169+
DB:
170+
image: ibmcom/db2:11.5.5.1
171+
env:
172+
LICENSE: accept
173+
DB2INSTANCE: db2inst1
174+
DB2INST1_PASSWORD: password
175+
DBNAME: testdb
176+
UPDATEAVAIL: "NO"
177+
options: --privileged
178+
ports:
179+
- 50000:50000
180+
181+
steps:
182+
- name: Checkout branch
183+
uses: actions/checkout@v3
184+
with:
185+
ref: ${{ github.head_ref }}
186+
- name: Fetch full git history
187+
run: git fetch --prune --unshallow
188+
- uses: conda-incubator/setup-miniconda@v2
189+
with:
190+
python-version: ${{ matrix.PYTHON_VERSION }}
191+
miniforge-variant: Mambaforge
192+
miniforge-version: 4.11.0-2
193+
use-mamba: true
194+
environment-file: environment.yml
195+
activate-environment: datajudge
196+
- name: Run Integration Tests
197+
shell: bash -l {0}
198+
run: |
199+
flit install -s
200+
pytest --cov=datajudge --cov-report=xml --cov-append --backend=db2 tests/integration
201+
- name: Generate code coverage report
202+
uses: codecov/[email protected]
203+
with:
204+
file: ./coverage.xml
205+
206+
158207
linux-integration_tests-snowflake:
159208
name: "Linux - integration tests - Python ${{ matrix.PYTHON_VERSION }} - Snowflake"
160209
runs-on: ubuntu-latest
@@ -228,6 +277,7 @@ jobs:
228277
with:
229278
file: ./coverage.xml
230279

280+
231281
linux-integration_tests-impala-column-pt1:
232282
name: "Linux - integration tests - Python ${{ matrix.PYTHON_VERSION }} - Impala - pt1"
233283
runs-on: ubuntu-20.04

CHANGELOG.rst

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,15 @@
77
Changelog
88
=========
99

10-
1.3.0 - 2022.xx.xx
10+
1.4.0 - 2022.02.24
11+
------------------
12+
13+
**New features**
14+
15+
- Add partial and experimental support for db2 as a backend.
16+
17+
18+
1.3.0 - 2022.01.17
1119
------------------
1220

1321
**New features**

environment.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,5 @@ dependencies:
2525
- flit
2626
- sphinx-autodoc-typehints
2727
- impyla
28+
- ibm_db
29+
- ibm_db_sa

src/datajudge/db_access.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,10 @@ def is_impala(engine: sa.engine.Engine) -> bool:
3333
return engine.name == "impala"
3434

3535

36+
def is_db2(engine: sa.engine.Engine) -> bool:
37+
return engine.name == "ibm_db_sa"
38+
39+
3640
def get_table_columns(table, column_names):
3741
return [table.c[column_name] for column_name in column_names]
3842

@@ -421,6 +425,15 @@ def get_date_span(engine, ref, date_column_name):
421425
)
422426
]
423427
)
428+
elif is_db2(engine):
429+
selection = sa.select(
430+
[
431+
sa.func.days_between(
432+
sa.func.max(column),
433+
sa.func.min(column),
434+
)
435+
]
436+
)
424437
else:
425438
raise NotImplementedError(
426439
"Date spans not yet implemented for this sql dialect."
@@ -663,6 +676,14 @@ def get_date_gaps(
663676
)
664677
> legitimate_gap_size
665678
)
679+
elif is_db2(engine):
680+
gap_condition = (
681+
sa.func.days_between(
682+
start_table.c[start_column],
683+
end_table.c[end_column],
684+
)
685+
> legitimate_gap_size
686+
)
666687
else:
667688
raise NotImplementedError(f"Date gaps not yet implemented for {engine.name}.")
668689

start_db2.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
#!/bin/bash
2+
3+
set -e
4+
5+
docker run -itd --name mydb2 --privileged=true -p 50000:50000 -e LICENSE=accept -e DB2INST1_PASSWORD=password -e DBNAME=testdb -v ~/database ibmcom/db2

tests/integration/conftest.py

Lines changed: 31 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import sqlalchemy as sa
99
from impala.dbapi import connect
1010

11-
from datajudge.db_access import apply_patches, is_bigquery, is_impala, is_mssql
11+
from datajudge.db_access import apply_patches, is_bigquery, is_db2, is_impala, is_mssql
1212

1313
TEST_DB_NAME = "tempdb"
1414
SCHEMA = "dbo" # 'dbo' is the standard schema in mssql
@@ -30,6 +30,8 @@ def conn_creator():
3030

3131
if backend == "postgres":
3232
connection_string = f"postgresql://datajudge:datajudge@{address}:5432/datajudge"
33+
if backend == "db2":
34+
connection_string = f"db2+ibm_db://db2inst1:password@{address}:50000/testdb"
3335
elif "mssql" in backend:
3436
connection_string = (
3537
f"mssql+pyodbc://sa:datajudge-123@{address}:1433/{TEST_DB_NAME}"
@@ -56,6 +58,12 @@ def conn_creator():
5658
return engine
5759

5860

61+
def _string_column(engine):
62+
if is_db2(engine):
63+
return sa.String(40)
64+
return sa.String()
65+
66+
5967
@pytest.fixture(scope="module")
6068
def engine(backend):
6169
engine = get_engine(backend)
@@ -111,7 +119,7 @@ def mix_table1(engine, metadata):
111119
table_name = "mix_table1"
112120
columns = [
113121
sa.Column("col_int", sa.Integer()),
114-
sa.Column("col_varchar", sa.String()),
122+
sa.Column("col_varchar", _string_column(engine)),
115123
sa.Column("col_date", sa.DateTime()),
116124
]
117125
data = [
@@ -131,7 +139,7 @@ def mix_table2(engine, metadata):
131139
table_name = "mix_table2"
132140
columns = [
133141
sa.Column("col_int", sa.Integer()),
134-
sa.Column("col_varchar", sa.String()),
142+
sa.Column("col_varchar", _string_column(engine)),
135143
sa.Column("col_date", sa.DateTime()),
136144
]
137145
data = [
@@ -152,7 +160,7 @@ def mix_table2_pk(engine, metadata):
152160
table_name = "mix_table2_pk"
153161
columns = [
154162
sa.Column("col_int", sa.Integer(), primary_key=True),
155-
sa.Column("col_varchar", sa.String()),
163+
sa.Column("col_varchar", _string_column(engine)),
156164
sa.Column("col_date", sa.DateTime()),
157165
]
158166
data = [
@@ -477,7 +485,7 @@ def unique_table1(engine, metadata):
477485
table_name = "unique_table1"
478486
columns = [
479487
sa.Column("col_int", sa.Integer()),
480-
sa.Column("col_varchar", sa.String()),
488+
sa.Column("col_varchar", _string_column(engine)),
481489
]
482490
data = [{"col_int": i // 2, "col_varchar": f"hi{i // 3}"} for i in range(60)]
483491
data += [
@@ -493,7 +501,7 @@ def unique_table2(engine, metadata):
493501
table_name = "unique_table2"
494502
columns = [
495503
sa.Column("col_int", sa.Integer()),
496-
sa.Column("col_varchar", sa.String()),
504+
sa.Column("col_varchar", _string_column(engine)),
497505
]
498506
data = [{"col_int": i // 2, "col_varchar": f"hi{i // 3}"} for i in range(40)]
499507
_handle_table(engine, metadata, table_name, columns, data)
@@ -503,7 +511,7 @@ def unique_table2(engine, metadata):
503511
@pytest.fixture(scope="module")
504512
def nested_table(engine, metadata):
505513
table_name = "nested_table"
506-
columns = [sa.Column("nested_varchar", sa.String())]
514+
columns = [sa.Column("nested_varchar", _string_column(engine))]
507515
data = [
508516
{"nested_varchar": "ABC#1,"},
509517
{"nested_varchar": "ABC#1,DEF#2,"},
@@ -517,7 +525,7 @@ def nested_table(engine, metadata):
517525
def varchar_table1(engine, metadata):
518526
table_name = "varchar_table1"
519527
columns = [
520-
sa.Column("col_varchar", sa.String()),
528+
sa.Column("col_varchar", _string_column(engine)),
521529
]
522530
data = [{"col_varchar": "qq" * i} for i in range(1, 10)]
523531
data.append({"col_varchar": None})
@@ -529,7 +537,7 @@ def varchar_table1(engine, metadata):
529537
def varchar_table2(engine, metadata):
530538
table_name = "varchar_table2"
531539
columns = [
532-
sa.Column("col_varchar", sa.String()),
540+
sa.Column("col_varchar", _string_column(engine)),
533541
]
534542
data = [{"col_varchar": "qq" * i} for i in range(2, 11)]
535543
_handle_table(engine, metadata, table_name, columns, data)
@@ -540,7 +548,7 @@ def varchar_table2(engine, metadata):
540548
def varchar_table_real(engine, metadata):
541549
table_name = "varchar_table_real"
542550
columns = [
543-
sa.Column("col_varchar", sa.String()),
551+
sa.Column("col_varchar", _string_column(engine)),
544552
]
545553
data = [
546554
{"col_varchar": val}
@@ -754,6 +762,10 @@ def capitalization_table(engine, metadata):
754762
str_datatype = "STRING"
755763
# Impala supports primary keys but uses a different grammar.
756764
primary_key = ""
765+
elif is_db2(engine):
766+
str_datatype = "VARCHAR(20)"
767+
# Primary key needs to be non-nullable.
768+
primary_key = ""
757769
else:
758770
str_datatype = "TEXT"
759771
with engine.connect() as connection:
@@ -796,7 +808,15 @@ def pytest_addoption(parser):
796808
parser.addoption(
797809
"--backend",
798810
choices=(
799-
("mssql", "mssql-freetds", "postgres", "snowflake", "bigquery", "impala")
811+
(
812+
"mssql",
813+
"mssql-freetds",
814+
"postgres",
815+
"snowflake",
816+
"bigquery",
817+
"impala",
818+
"db2",
819+
)
800820
),
801821
help="which database backend to use to run the integration tests",
802822
)

tests/integration/test_column_capitalization.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import pytest
22

33
from datajudge import Condition, WithinRequirement
4-
from datajudge.db_access import is_bigquery, is_impala, is_mssql, is_postgresql
4+
from datajudge.db_access import is_bigquery, is_db2, is_impala, is_mssql, is_postgresql
55

66
# These tests
77

@@ -21,6 +21,10 @@ def test_column_existence(
2121
)
2222
if is_postgresql(engine):
2323
pytest.skip("Postgres interface always expects lower-cased columns.")
24+
if is_db2(engine) and use_uppercase_query:
25+
pytest.skip(
26+
"Db2 interface transforms writes to lower-case, expects lower-case reads."
27+
)
2428
(
2529
db_name,
2630
schema_name,

0 commit comments

Comments
 (0)