diff --git a/data-pipeline/data_pipeline/aisr.py b/data-pipeline/data_pipeline/aisr.py new file mode 100644 index 0000000..a89a577 --- /dev/null +++ b/data-pipeline/data_pipeline/aisr.py @@ -0,0 +1,91 @@ +""" +Module for interactions with AISR +""" + +import logging +import uuid +from dataclasses import dataclass +from typing import Tuple +from urllib.parse import parse_qs, quote, urlparse + +import requests +from bs4 import BeautifulSoup, Tag + +logger = logging.getLogger(__name__) + + +def _get_session_code_and_tab_id( + session: requests.Session, auth_realm_url: str +) -> Tuple[str, str]: + """ + The session and tab are needed to authenticate with AISR. + """ + state = uuid.uuid4() + nonce = uuid.uuid4() + + # pylint: disable-next=line-too-long + url = f"{auth_realm_url}/protocol/openid-connect/auth?client_id=aisr-app&redirect_uri=https%3A%2F%2Faisr.web.health.state.mn.us%2Fhome&state={state}&response_mode=fragment&response_type=code&scope=openid&nonce={nonce}" + + response = session.request("GET", url, headers={}, data={}) + soup = BeautifulSoup(response.content, "html.parser") + form_element = soup.find("form", id="kc-form-login") + + # the session code and tab id are found in the action URL of the form + if isinstance(form_element, Tag): + action_url = form_element.get("action") + if isinstance(action_url, str): + parsed_url = urlparse(action_url) + query_dict = parse_qs(parsed_url.query) + return query_dict["session_code"][0], query_dict["tab_id"][0] + raise ValueError("The action URL is not a valid string.") + raise ValueError("Login form not found or is not a valid HTML form element.") + + +@dataclass +class AISRResponse: + """ + Dataclass to hold the response from interactions with AISR. + """ + + is_successful: bool + message: str + + +def login( + session: requests.Session, auth_realm_url: str, username: str, password: str +) -> AISRResponse: + """ + Login with AISR. + """ + logger.info("Logging into MIIC") + session_code, tab_id = _get_session_code_and_tab_id(session, auth_realm_url) + + # pylint: disable-next=line-too-long + url = f"{auth_realm_url}/login-actions/authenticate?session_code={session_code}&execution=084dee30-925f-4a8f-829d-7a372e38d0de&client_id=aisr-app&tab_id={tab_id}" + + payload = f"password={quote(password)}&username={username}" + headers = {"Content-Type": "application/x-www-form-urlencoded"} + + response = session.request("POST", url, headers=headers, data=payload) + + if response.status_code == 200 and "KEYCLOAK_IDENTITY" in response.cookies: + logger.info("Logged in successfully") + return AISRResponse(is_successful=True, message="Logged in successfully") + + logger.error("Login failed or KEYCLOAK_IDENTITY cookie is missing") + return AISRResponse( + is_successful=False, + message="Login failed or KEYCLOAK_IDENTITY cookie is missing", + ) + + +def logout(session: requests.Session, auth_realm_url: str) -> AISRResponse: + """ + Log out of AISR. + """ + url = f"{auth_realm_url}/protocol/openid-connect/logout?client_id=aisr-app" + session.request("GET", url, headers={}, data={}) + return AISRResponse( + is_successful=True, + message="Logged out successfully", + ) diff --git a/data-pipeline/data_pipeline/etl_workflow.py b/data-pipeline/data_pipeline/etl_workflow.py index 17cf326..8779728 100644 --- a/data-pipeline/data_pipeline/etl_workflow.py +++ b/data-pipeline/data_pipeline/etl_workflow.py @@ -8,7 +8,6 @@ import pandas as pd -# Configure the logger for this module logger = logging.getLogger(__name__) diff --git a/data-pipeline/data_pipeline/pipeline_factory.py b/data-pipeline/data_pipeline/pipeline_factory.py index a319933..1b381f2 100644 --- a/data-pipeline/data_pipeline/pipeline_factory.py +++ b/data-pipeline/data_pipeline/pipeline_factory.py @@ -1,5 +1,5 @@ """ -Factory to create and configure the ETL pipeline with injected dependencies. +Factory for creating the pipeline and related tools """ from collections.abc import Callable diff --git a/data-pipeline/poetry.lock b/data-pipeline/poetry.lock index c16e455..63b104e 100644 --- a/data-pipeline/poetry.lock +++ b/data-pipeline/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.5 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.0.1 and should not be changed by hand. [[package]] name = "annotated-types" @@ -6,6 +6,8 @@ version = "0.7.0" description = "Reusable constraint types to use with typing.Annotated" optional = false python-versions = ">=3.8" +groups = ["test"] +markers = "python_version >= \"3.12\" or python_version == \"3.11\"" files = [ {file = "annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"}, {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"}, @@ -17,6 +19,8 @@ version = "4.7.0" description = "High level compatibility layer for multiple asynchronous event loop implementations" optional = false python-versions = ">=3.9" +groups = ["test"] +markers = "python_version >= \"3.12\" or python_version == \"3.11\"" files = [ {file = "anyio-4.7.0-py3-none-any.whl", hash = "sha256:ea60c3723ab42ba6fff7e8ccb0488c898ec538ff4df1f1d5e642c3601d07e352"}, {file = "anyio-4.7.0.tar.gz", hash = "sha256:2f834749c602966b7d456a7567cafcb309f96482b5081d14ac93ccd457f9dd48"}, @@ -38,17 +42,44 @@ version = "3.3.8" description = "An abstract syntax tree for Python with inference support." optional = false python-versions = ">=3.9.0" +groups = ["dev"] +markers = "python_version >= \"3.12\" or python_version == \"3.11\"" files = [ {file = "astroid-3.3.8-py3-none-any.whl", hash = "sha256:187ccc0c248bfbba564826c26f070494f7bc964fd286b6d9fff4420e55de828c"}, {file = "astroid-3.3.8.tar.gz", hash = "sha256:a88c7994f914a4ea8572fac479459f4955eeccc877be3f2d959a33273b0cf40b"}, ] +[[package]] +name = "beautifulsoup4" +version = "4.12.3" +description = "Screen-scraping library" +optional = false +python-versions = ">=3.6.0" +groups = ["main"] +markers = "python_version >= \"3.12\" or python_version == \"3.11\"" +files = [ + {file = "beautifulsoup4-4.12.3-py3-none-any.whl", hash = "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed"}, + {file = "beautifulsoup4-4.12.3.tar.gz", hash = "sha256:74e3d1928edc070d21748185c46e3fb33490f22f52a3addee9aee0f4f7781051"}, +] + +[package.dependencies] +soupsieve = ">1.2" + +[package.extras] +cchardet = ["cchardet"] +chardet = ["chardet"] +charset-normalizer = ["charset-normalizer"] +html5lib = ["html5lib"] +lxml = ["lxml"] + [[package]] name = "black" version = "24.10.0" description = "The uncompromising code formatter." optional = false python-versions = ">=3.9" +groups = ["dev"] +markers = "python_version >= \"3.12\" or python_version == \"3.11\"" files = [ {file = "black-24.10.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e6668650ea4b685440857138e5fe40cde4d652633b1bdffc62933d0db4ed9812"}, {file = "black-24.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1c536fcf674217e87b8cc3657b81809d3c085d7bf3ef262ead700da345bfa6ea"}, @@ -93,6 +124,8 @@ version = "2024.12.14" description = "Python package for providing Mozilla's CA Bundle." optional = false python-versions = ">=3.6" +groups = ["test"] +markers = "python_version >= \"3.12\" or python_version == \"3.11\"" files = [ {file = "certifi-2024.12.14-py3-none-any.whl", hash = "sha256:1275f7a45be9464efc1173084eaa30f866fe2e47d389406136d332ed4967ec56"}, {file = "certifi-2024.12.14.tar.gz", hash = "sha256:b650d30f370c2b724812bee08008be0c4163b163ddaec3f2546c1caf65f191db"}, @@ -104,6 +137,8 @@ version = "3.4.0" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." optional = false python-versions = ">=3.7.0" +groups = ["test"] +markers = "python_version >= \"3.12\" or python_version == \"3.11\"" files = [ {file = "charset_normalizer-3.4.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:4f9fc98dad6c2eaa32fc3af1417d95b5e3d08aff968df0cd320066def971f9a6"}, {file = "charset_normalizer-3.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0de7b687289d3c1b3e8660d0741874abe7888100efe14bd0f9fd7141bcbda92b"}, @@ -218,6 +253,8 @@ version = "8.1.7" description = "Composable command line interface toolkit" optional = false python-versions = ">=3.7" +groups = ["dev", "test"] +markers = "python_version >= \"3.12\" or python_version == \"3.11\"" files = [ {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"}, {file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"}, @@ -232,10 +269,12 @@ version = "0.4.6" description = "Cross-platform colored terminal text." optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +groups = ["dev", "test"] files = [ {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] +markers = {dev = "(sys_platform == \"win32\" or platform_system == \"Windows\") and (python_version >= \"3.12\" or python_version == \"3.11\")", test = "(platform_system == \"Windows\" or sys_platform == \"win32\") and (python_version >= \"3.12\" or python_version == \"3.11\")"} [[package]] name = "coverage" @@ -243,6 +282,8 @@ version = "7.6.9" description = "Code coverage measurement for Python" optional = false python-versions = ">=3.9" +groups = ["test"] +markers = "python_version >= \"3.12\" or python_version == \"3.11\"" files = [ {file = "coverage-7.6.9-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:85d9636f72e8991a1706b2b55b06c27545448baf9f6dbf51c4004609aacd7dcb"}, {file = "coverage-7.6.9-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:608a7fd78c67bee8936378299a6cb9f5149bb80238c7a566fc3e6717a4e68710"}, @@ -317,6 +358,8 @@ version = "0.3.9" description = "serialize all of Python" optional = false python-versions = ">=3.8" +groups = ["dev"] +markers = "python_version >= \"3.12\" or python_version == \"3.11\"" files = [ {file = "dill-0.3.9-py3-none-any.whl", hash = "sha256:468dff3b89520b474c0397703366b7b95eebe6303f108adf9b19da1f702be87a"}, {file = "dill-0.3.9.tar.gz", hash = "sha256:81aa267dddf68cbfe8029c42ca9ec6a4ab3b22371d1c450abc54422577b4512c"}, @@ -332,6 +375,8 @@ version = "33.3.1" description = "Faker is a Python package that generates fake data for you." optional = false python-versions = ">=3.8" +groups = ["test"] +markers = "python_version >= \"3.12\" or python_version == \"3.11\"" files = [ {file = "Faker-33.3.1-py3-none-any.whl", hash = "sha256:ac4cf2f967ce02c898efa50651c43180bd658a7707cfd676fcc5410ad1482c03"}, {file = "faker-33.3.1.tar.gz", hash = "sha256:49dde3b06a5602177bc2ad013149b6f60a290b7154539180d37b6f876ae79b20"}, @@ -347,6 +392,8 @@ version = "0.115.6" description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production" optional = false python-versions = ">=3.8" +groups = ["test"] +markers = "python_version >= \"3.12\" or python_version == \"3.11\"" files = [ {file = "fastapi-0.115.6-py3-none-any.whl", hash = "sha256:e9240b29e36fa8f4bb7290316988e90c381e5092e0cbe84e7818cc3713bcf305"}, {file = "fastapi-0.115.6.tar.gz", hash = "sha256:9ec46f7addc14ea472958a96aae5b5de65f39721a46aaf5705c480d9a8b76654"}, @@ -367,6 +414,8 @@ version = "0.14.0" description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" optional = false python-versions = ">=3.7" +groups = ["test"] +markers = "python_version >= \"3.12\" or python_version == \"3.11\"" files = [ {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"}, {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, @@ -378,6 +427,8 @@ version = "3.10" description = "Internationalized Domain Names in Applications (IDNA)" optional = false python-versions = ">=3.6" +groups = ["test"] +markers = "python_version >= \"3.12\" or python_version == \"3.11\"" files = [ {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"}, {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"}, @@ -392,6 +443,8 @@ version = "2.0.0" description = "brain-dead simple config-ini parsing" optional = false python-versions = ">=3.7" +groups = ["test"] +markers = "python_version >= \"3.12\" or python_version == \"3.11\"" files = [ {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, @@ -403,6 +456,8 @@ version = "5.13.2" description = "A Python utility / library to sort Python imports." optional = false python-versions = ">=3.8.0" +groups = ["dev"] +markers = "python_version >= \"3.12\" or python_version == \"3.11\"" files = [ {file = "isort-5.13.2-py3-none-any.whl", hash = "sha256:8ca5e72a8d85860d5a3fa69b8745237f2939afe12dbf656afbcb47fe72d947a6"}, {file = "isort-5.13.2.tar.gz", hash = "sha256:48fdfcb9face5d58a4f6dde2e72a1fb8dcaf8ab26f95ab49fab84c2ddefb0109"}, @@ -417,6 +472,8 @@ version = "0.7.0" description = "McCabe checker, plugin for flake8" optional = false python-versions = ">=3.6" +groups = ["dev"] +markers = "python_version >= \"3.12\" or python_version == \"3.11\"" files = [ {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"}, {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, @@ -428,6 +485,8 @@ version = "1.14.1" description = "Optional static typing for Python" optional = false python-versions = ">=3.8" +groups = ["dev"] +markers = "python_version >= \"3.12\" or python_version == \"3.11\"" files = [ {file = "mypy-1.14.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:52686e37cf13d559f668aa398dd7ddf1f92c5d613e4f8cb262be2fb4fedb0fcb"}, {file = "mypy-1.14.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1fb545ca340537d4b45d3eecdb3def05e913299ca72c290326be19b3804b39c0"}, @@ -486,6 +545,8 @@ version = "1.0.0" description = "Type system extensions for programs checked with the mypy type checker." optional = false python-versions = ">=3.5" +groups = ["dev"] +markers = "python_version >= \"3.12\" or python_version == \"3.11\"" files = [ {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, @@ -497,6 +558,8 @@ version = "2.2.0" description = "Fundamental package for array computing in Python" optional = false python-versions = ">=3.10" +groups = ["main", "dev"] +markers = "python_version >= \"3.12\" or python_version == \"3.11\"" files = [ {file = "numpy-2.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1e25507d85da11ff5066269d0bd25d06e0a0f2e908415534f3e603d2a78e4ffa"}, {file = "numpy-2.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a62eb442011776e4036af5c8b1a00b706c5bc02dc15eb5344b0c750428c94219"}, @@ -561,6 +624,8 @@ version = "24.2" description = "Core utilities for Python packages" optional = false python-versions = ">=3.8" +groups = ["dev", "test"] +markers = "python_version >= \"3.12\" or python_version == \"3.11\"" files = [ {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"}, {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"}, @@ -572,6 +637,8 @@ version = "2.2.3" description = "Powerful data structures for data analysis, time series, and statistics" optional = false python-versions = ">=3.9" +groups = ["main"] +markers = "python_version >= \"3.12\" or python_version == \"3.11\"" files = [ {file = "pandas-2.2.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1948ddde24197a0f7add2bdc4ca83bf2b1ef84a1bc8ccffd95eda17fd836ecb5"}, {file = "pandas-2.2.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:381175499d3802cde0eabbaf6324cce0c4f5d52ca6f8c377c29ad442f50f6348"}, @@ -657,6 +724,8 @@ version = "2.2.3.241126" description = "Type annotations for pandas" optional = false python-versions = ">=3.10" +groups = ["dev"] +markers = "python_version >= \"3.12\" or python_version == \"3.11\"" files = [ {file = "pandas_stubs-2.2.3.241126-py3-none-any.whl", hash = "sha256:74aa79c167af374fe97068acc90776c0ebec5266a6e5c69fe11e9c2cf51f2267"}, {file = "pandas_stubs-2.2.3.241126.tar.gz", hash = "sha256:cf819383c6d9ae7d4dabf34cd47e1e45525bb2f312e6ad2939c2c204cb708acd"}, @@ -672,6 +741,8 @@ version = "0.12.1" description = "Utility library for gitignore style pattern matching of file paths." optional = false python-versions = ">=3.8" +groups = ["dev"] +markers = "python_version >= \"3.12\" or python_version == \"3.11\"" files = [ {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"}, {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"}, @@ -683,6 +754,8 @@ version = "4.3.6" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`." optional = false python-versions = ">=3.8" +groups = ["dev"] +markers = "python_version >= \"3.12\" or python_version == \"3.11\"" files = [ {file = "platformdirs-4.3.6-py3-none-any.whl", hash = "sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb"}, {file = "platformdirs-4.3.6.tar.gz", hash = "sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907"}, @@ -699,6 +772,8 @@ version = "1.5.0" description = "plugin and hook calling mechanisms for python" optional = false python-versions = ">=3.8" +groups = ["test"] +markers = "python_version >= \"3.12\" or python_version == \"3.11\"" files = [ {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"}, {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"}, @@ -714,6 +789,8 @@ version = "2.10.3" description = "Data validation using Python type hints" optional = false python-versions = ">=3.8" +groups = ["test"] +markers = "python_version >= \"3.12\" or python_version == \"3.11\"" files = [ {file = "pydantic-2.10.3-py3-none-any.whl", hash = "sha256:be04d85bbc7b65651c5f8e6b9976ed9c6f41782a55524cef079a34a0bb82144d"}, {file = "pydantic-2.10.3.tar.gz", hash = "sha256:cb5ac360ce894ceacd69c403187900a02c4b20b693a9dd1d643e1effab9eadf9"}, @@ -734,6 +811,8 @@ version = "2.27.1" description = "Core functionality for Pydantic validation and serialization" optional = false python-versions = ">=3.8" +groups = ["test"] +markers = "python_version >= \"3.12\" or python_version == \"3.11\"" files = [ {file = "pydantic_core-2.27.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:71a5e35c75c021aaf400ac048dacc855f000bdfed91614b4a726f7432f1f3d6a"}, {file = "pydantic_core-2.27.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f82d068a2d6ecfc6e054726080af69a6764a10015467d7d7b9f66d6ed5afa23b"}, @@ -846,6 +925,8 @@ version = "3.3.3" description = "python code static checker" optional = false python-versions = ">=3.9.0" +groups = ["dev"] +markers = "python_version >= \"3.12\" or python_version == \"3.11\"" files = [ {file = "pylint-3.3.3-py3-none-any.whl", hash = "sha256:26e271a2bc8bce0fc23833805a9076dd9b4d5194e2a02164942cb3cdc37b4183"}, {file = "pylint-3.3.3.tar.gz", hash = "sha256:07c607523b17e6d16e2ae0d7ef59602e332caa762af64203c24b41c27139f36a"}, @@ -873,6 +954,8 @@ version = "8.3.4" description = "pytest: simple powerful testing with Python" optional = false python-versions = ">=3.8" +groups = ["test"] +markers = "python_version >= \"3.12\" or python_version == \"3.11\"" files = [ {file = "pytest-8.3.4-py3-none-any.whl", hash = "sha256:50e16d954148559c9a74109af1eaf0c945ba2d8f30f0a3d3335edde19788b6f6"}, {file = "pytest-8.3.4.tar.gz", hash = "sha256:965370d062bce11e73868e0335abac31b4d3de0e82f4007408d242b4f8610761"}, @@ -893,6 +976,8 @@ version = "6.0.0" description = "Pytest plugin for measuring coverage." optional = false python-versions = ">=3.9" +groups = ["test"] +markers = "python_version >= \"3.12\" or python_version == \"3.11\"" files = [ {file = "pytest-cov-6.0.0.tar.gz", hash = "sha256:fde0b595ca248bb8e2d76f020b465f3b107c9632e6a1d1705f17834c89dcadc0"}, {file = "pytest_cov-6.0.0-py3-none-any.whl", hash = "sha256:eee6f1b9e61008bd34975a4d5bab25801eb31898b032dd55addc93e96fcaaa35"}, @@ -911,6 +996,8 @@ version = "2.9.0.post0" description = "Extensions to the standard Python datetime module" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +groups = ["main", "test"] +markers = "python_version >= \"3.12\" or python_version == \"3.11\"" files = [ {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"}, {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"}, @@ -919,12 +1006,27 @@ files = [ [package.dependencies] six = ">=1.5" +[[package]] +name = "python-multipart" +version = "0.0.20" +description = "A streaming multipart parser for Python" +optional = false +python-versions = ">=3.8" +groups = ["test"] +markers = "python_version >= \"3.12\" or python_version == \"3.11\"" +files = [ + {file = "python_multipart-0.0.20-py3-none-any.whl", hash = "sha256:8a62d3a8335e06589fe01f2a3e178cdcc632f3fbe0d492ad9ee0ec35aab1f104"}, + {file = "python_multipart-0.0.20.tar.gz", hash = "sha256:8dd0cab45b8e23064ae09147625994d090fa46f5b0d1e13af944c331a7fa9d13"}, +] + [[package]] name = "pytz" version = "2024.2" description = "World timezone definitions, modern and historical" optional = false python-versions = "*" +groups = ["main"] +markers = "python_version >= \"3.12\" or python_version == \"3.11\"" files = [ {file = "pytz-2024.2-py2.py3-none-any.whl", hash = "sha256:31c7c1817eb7fae7ca4b8c7ee50c72f93aa2dd863de768e1ef4245d426aa0725"}, {file = "pytz-2024.2.tar.gz", hash = "sha256:2aa355083c50a0f93fa581709deac0c9ad65cca8a9e9beac660adcbd493c798a"}, @@ -936,6 +1038,8 @@ version = "2.32.3" description = "Python HTTP for Humans." optional = false python-versions = ">=3.8" +groups = ["test"] +markers = "python_version >= \"3.12\" or python_version == \"3.11\"" files = [ {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"}, {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"}, @@ -957,6 +1061,8 @@ version = "1.17.0" description = "Python 2 and 3 compatibility utilities" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +groups = ["main", "test"] +markers = "python_version >= \"3.12\" or python_version == \"3.11\"" files = [ {file = "six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274"}, {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"}, @@ -968,17 +1074,34 @@ version = "1.3.1" description = "Sniff out which async library your code is running under" optional = false python-versions = ">=3.7" +groups = ["test"] +markers = "python_version >= \"3.12\" or python_version == \"3.11\"" files = [ {file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"}, {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"}, ] +[[package]] +name = "soupsieve" +version = "2.6" +description = "A modern CSS selector implementation for Beautiful Soup." +optional = false +python-versions = ">=3.8" +groups = ["main"] +markers = "python_version >= \"3.12\" or python_version == \"3.11\"" +files = [ + {file = "soupsieve-2.6-py3-none-any.whl", hash = "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9"}, + {file = "soupsieve-2.6.tar.gz", hash = "sha256:e2e68417777af359ec65daac1057404a3c8a5455bb8abc36f1a9866ab1a51abb"}, +] + [[package]] name = "starlette" version = "0.41.3" description = "The little ASGI library that shines." optional = false python-versions = ">=3.8" +groups = ["test"] +markers = "python_version >= \"3.12\" or python_version == \"3.11\"" files = [ {file = "starlette-0.41.3-py3-none-any.whl", hash = "sha256:44cedb2b7c77a9de33a8b74b2b90e9f50d11fcf25d8270ea525ad71a25374ff7"}, {file = "starlette-0.41.3.tar.gz", hash = "sha256:0e4ab3d16522a255be6b28260b938eae2482f98ce5cc934cb08dce8dc3ba5835"}, @@ -996,17 +1119,50 @@ version = "0.13.2" description = "Style preserving TOML library" optional = false python-versions = ">=3.8" +groups = ["dev"] +markers = "python_version >= \"3.12\" or python_version == \"3.11\"" files = [ {file = "tomlkit-0.13.2-py3-none-any.whl", hash = "sha256:7a974427f6e119197f670fbbbeae7bef749a6c14e793db934baefc1b5f03efde"}, {file = "tomlkit-0.13.2.tar.gz", hash = "sha256:fff5fe59a87295b278abd31bec92c15d9bc4a06885ab12bcea52c71119392e79"}, ] +[[package]] +name = "types-beautifulsoup4" +version = "4.12.0.20241020" +description = "Typing stubs for beautifulsoup4" +optional = false +python-versions = ">=3.8" +groups = ["dev"] +markers = "python_version >= \"3.12\" or python_version == \"3.11\"" +files = [ + {file = "types-beautifulsoup4-4.12.0.20241020.tar.gz", hash = "sha256:158370d08d0cd448bd11b132a50ff5279237a5d4b5837beba074de152a513059"}, + {file = "types_beautifulsoup4-4.12.0.20241020-py3-none-any.whl", hash = "sha256:c95e66ce15a4f5f0835f7fbc5cd886321ae8294f977c495424eaf4225307fd30"}, +] + +[package.dependencies] +types-html5lib = "*" + +[[package]] +name = "types-html5lib" +version = "1.1.11.20241018" +description = "Typing stubs for html5lib" +optional = false +python-versions = ">=3.8" +groups = ["dev"] +markers = "python_version >= \"3.12\" or python_version == \"3.11\"" +files = [ + {file = "types-html5lib-1.1.11.20241018.tar.gz", hash = "sha256:98042555ff78d9e3a51c77c918b1041acbb7eb6c405408d8a9e150ff5beccafa"}, + {file = "types_html5lib-1.1.11.20241018-py3-none-any.whl", hash = "sha256:3f1e064d9ed2c289001ae6392c84c93833abb0816165c6ff0abfc304a779f403"}, +] + [[package]] name = "types-pytz" version = "2024.2.0.20241003" description = "Typing stubs for pytz" optional = false python-versions = ">=3.8" +groups = ["dev"] +markers = "python_version >= \"3.12\" or python_version == \"3.11\"" files = [ {file = "types-pytz-2024.2.0.20241003.tar.gz", hash = "sha256:575dc38f385a922a212bac00a7d6d2e16e141132a3c955078f4a4fd13ed6cb44"}, {file = "types_pytz-2024.2.0.20241003-py3-none-any.whl", hash = "sha256:3e22df1336c0c6ad1d29163c8fda82736909eb977281cb823c57f8bae07118b7"}, @@ -1018,6 +1174,8 @@ version = "2.32.0.20241016" description = "Typing stubs for requests" optional = false python-versions = ">=3.8" +groups = ["test"] +markers = "python_version >= \"3.12\" or python_version == \"3.11\"" files = [ {file = "types-requests-2.32.0.20241016.tar.gz", hash = "sha256:0d9cad2f27515d0e3e3da7134a1b6f28fb97129d86b867f24d9c726452634d95"}, {file = "types_requests-2.32.0.20241016-py3-none-any.whl", hash = "sha256:4195d62d6d3e043a4eaaf08ff8a62184584d2e8684e9d2aa178c7915a7da3747"}, @@ -1032,6 +1190,8 @@ version = "4.12.2" description = "Backported and Experimental Type Hints for Python 3.8+" optional = false python-versions = ">=3.8" +groups = ["dev", "test"] +markers = "python_version >= \"3.12\" or python_version == \"3.11\"" files = [ {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"}, {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"}, @@ -1043,6 +1203,8 @@ version = "2024.2" description = "Provider of IANA time zone data" optional = false python-versions = ">=2" +groups = ["main"] +markers = "python_version >= \"3.12\" or python_version == \"3.11\"" files = [ {file = "tzdata-2024.2-py2.py3-none-any.whl", hash = "sha256:a48093786cdcde33cad18c2555e8532f34422074448fbc874186f0abd79565cd"}, {file = "tzdata-2024.2.tar.gz", hash = "sha256:7d85cc416e9382e69095b7bdf4afd9e3880418a2413feec7069d533d6b4e31cc"}, @@ -1054,6 +1216,8 @@ version = "2.2.3" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.8" +groups = ["test"] +markers = "python_version >= \"3.12\" or python_version == \"3.11\"" files = [ {file = "urllib3-2.2.3-py3-none-any.whl", hash = "sha256:ca899ca043dcb1bafa3e262d73aa25c465bfb49e0bd9dd5d59f1d0acba2f8fac"}, {file = "urllib3-2.2.3.tar.gz", hash = "sha256:e7d814a81dad81e6caf2ec9fdedb284ecc9c73076b62654547cc64ccdcae26e9"}, @@ -1071,6 +1235,8 @@ version = "0.34.0" description = "The lightning-fast ASGI server." optional = false python-versions = ">=3.9" +groups = ["test"] +markers = "python_version >= \"3.12\" or python_version == \"3.11\"" files = [ {file = "uvicorn-0.34.0-py3-none-any.whl", hash = "sha256:023dc038422502fa28a09c7a30bf2b6991512da7dcdb8fd35fe57cfc154126f4"}, {file = "uvicorn-0.34.0.tar.gz", hash = "sha256:404051050cd7e905de2c9a7e61790943440b3416f49cb409f965d9dcd0fa73e9"}, @@ -1084,6 +1250,6 @@ h11 = ">=0.8" standard = ["colorama (>=0.4)", "httptools (>=0.6.3)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1)", "watchfiles (>=0.13)", "websockets (>=10.4)"] [metadata] -lock-version = "2.0" +lock-version = "2.1" python-versions = "^3.11" -content-hash = "09ee255998dd853529dd172841b69f157a3c7f107892cc037220ed3ef36ac39f" +content-hash = "ddf4abd75cff2f7592df34a393f6697ccc153d350ebefbab16abb6280ef4d581" diff --git a/data-pipeline/pyproject.toml b/data-pipeline/pyproject.toml index 059fd18..f8d903e 100644 --- a/data-pipeline/pyproject.toml +++ b/data-pipeline/pyproject.toml @@ -17,6 +17,7 @@ minnesota-immunization-data-pipeline = "data_pipeline.__main__:run" [tool.poetry.dependencies] python = "^3.11" pandas = "^2.2.3" +beautifulsoup4 = "^4.12.3" [tool.poetry.group.dev.dependencies] @@ -25,6 +26,7 @@ mypy = "^1.13.0" pylint = "^3.3.1" black = "^24.10.0" pandas-stubs = "^2.2.3.241009" +types-beautifulsoup4 = "^4.12.0.20241020" [tool.poetry.group.test.dependencies] pytest = "^8.3.3" @@ -34,6 +36,7 @@ types-requests = "^2.32.0.20241016" fastapi = "^0.115.6" uvicorn = "^0.34.0" pytest-cov = "^6.0.0" +python-multipart = "^0.0.20" [build-system] requires = ["poetry-core"] diff --git a/data-pipeline/tests/conftest.py b/data-pipeline/tests/conftest.py index c13e1ef..8489ddd 100644 --- a/data-pipeline/tests/conftest.py +++ b/data-pipeline/tests/conftest.py @@ -5,11 +5,12 @@ import time from multiprocessing import Process from pathlib import Path +from urllib.parse import urlencode import pytest import uvicorn -from fastapi import FastAPI -from fastapi.responses import HTMLResponse +from fastapi import FastAPI, Form +from fastapi.responses import HTMLResponse, JSONResponse @pytest.fixture(name="folders") @@ -41,25 +42,85 @@ def input_output_logs_folders(): def fastapi_server(): """ Spins up a FastAPI server for integration tests. - Serves a boilerplate HTML page with a