chore: apply black formatting and enable CI on pre-commit hooks (#66)

DS4SD · May 27, 2024 · ce9a2fa · ce9a2fa
1 parent fecc449
commit ce9a2fa
Show file tree

Hide file tree

Showing 21 changed files with 1,678 additions and 1,426 deletions.
diff --git a/.github/actions/setup-poetry/action.yml b/.github/actions/setup-poetry/action.yml
@@ -0,0 +1,19 @@
+name: 'Set up Poetry and install'
+description: 'Set up a specific version of Poetry and install dependencies using caching.'
+inputs:
+  python-version:
+    description: "Version range or exact version of Python or PyPy to use, using SemVer's version range syntax."
+    default: '3.10'
+runs:
+  using: 'composite'
+  steps:
+    - name: Install poetry
+      run: pipx install poetry==1.8.3
+      shell: bash
+    - uses: actions/setup-python@v4
+      with:
+        python-version: ${{ inputs.python-version }}
+        cache: 'poetry'
+    - name: Install dependencies
+      run: poetry install --all-extras
+      shell: bash
diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml
@@ -0,0 +1,18 @@
+on:
+  push:
+    branches:
+      - "**"
+
+jobs:
+  run-checks:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ['3.8', '3.9', '3.10']
+    steps:
+      - uses: actions/checkout@v3
+      - uses: ./.github/actions/setup-poetry
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Run styling check
+        run: poetry run pre-commit run --all-files
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -4,7 +4,7 @@ repos:
     hooks:
       - id: system
         name: Black
-        entry: poetry run black nbrunner dsnotebooks examples
+        entry: poetry run black --include '(\.py|\.ipynb)$' nbrunner dsnotebooks examples
         pass_filenames: false
         language: system
         files: '(\.py|\.ipynb)$'

diff --git a/examples/attachment_management/manage_attachments.ipynb b/examples/attachment_management/manage_attachments.ipynb
@@ -42,10 +42,10 @@
     "# notebook settings auto-loaded from .env / env vars\n",
     "notebook_settings = ProjectNotebookSettings()\n",
     "\n",
-    "PROFILE_NAME = notebook_settings.profile     # profile to use\n",
-    "PROJ_KEY = notebook_settings.proj_key        # project to use\n",
+    "PROFILE_NAME = notebook_settings.profile  # profile to use\n",
+    "PROJ_KEY = notebook_settings.proj_key  # project to use\n",
     "INDEX_NAME = notebook_settings.new_idx_name  # index to create\n",
-    "CLEANUP = notebook_settings.cleanup          # whether to clean up\n",
+    "CLEANUP = notebook_settings.cleanup  # whether to clean up\n",
     "ATTACHMENT_KEY = \"usr_attachments\"  # format must be: \"usr_<snake_case>\"\n",
     "FILES_TO_ATTACH = [\n",
     "    \"../../data/samples/2206.00785.pdf\",\n",
@@ -100,8 +100,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "def find_index_item(api, coordinates, search_query=\"*\", source=None, page_size=50, pred=None):\n",
-    "    \"\"\" Find first index item that satisfies the criteria \"\"\"\n",
+    "def find_index_item(\n",
+    "    api, coordinates, search_query=\"*\", source=None, page_size=50, pred=None\n",
+    "):\n",
+    "    \"\"\"Find first index item that satisfies the criteria\"\"\"\n",
     "    source_to_use = [\"_id\", \"_name\", \"_s3_data\"] if source is None else source\n",
     "    query = DataQuery(\n",
     "        search_query=search_query,\n",
@@ -116,6 +118,7 @@
     "                return item\n",
     "    return None\n",
     "\n",
+    "\n",
     "def list_item_attachments(api, coordinates, index_item_id, attch_key):\n",
     "    pred = lambda x: x[\"_id\"] == index_item_id\n",
     "    item = find_index_item(api, coordinates, pred=pred)\n",
@@ -420,9 +423,7 @@
     "    m.update(json.dumps(row, sort_keys=True).encode())\n",
     "    h = m.hexdigest()\n",
     "    row[\"_name\"] = f\"row-{i:06d}-{h[:5]}\"\n",
-    "    row[\"file-info\"] = {\n",
-    "        \"document-hash\": h\n",
-    "    }"
+    "    row[\"file-info\"] = {\"document-hash\": h}"
    ]
   },
   {
@@ -609,8 +610,7 @@
     "    filename = Path(attachment[\"path\"]).name\n",
     "    download_url = attachment[\"url\"]\n",
     "    display(HTML(f'&#128073; Download <a href=\"{download_url}\">{filename}</a>'))\n",
-    "    print()\n",
-    "    "
+    "    print()"
    ]
   },
   {

diff --git a/examples/bring_your_own_converted_documents/upload_converted_documents.ipynb b/examples/bring_your_own_converted_documents/upload_converted_documents.ipynb
@@ -46,10 +46,10 @@
     "# notebook settings auto-loaded from .env / env vars\n",
     "notebook_settings = ProjectNotebookSettings()\n",
     "\n",
-    "PROFILE_NAME = notebook_settings.profile     # profile to use\n",
-    "PROJ_KEY = notebook_settings.proj_key        # project to use\n",
+    "PROFILE_NAME = notebook_settings.profile  # profile to use\n",
+    "PROJ_KEY = notebook_settings.proj_key  # project to use\n",
     "INDEX_NAME = notebook_settings.new_idx_name  # index to create\n",
-    "CLEANUP = notebook_settings.cleanup          # whether to clean up\n",
+    "CLEANUP = notebook_settings.cleanup  # whether to clean up\n",
     "INPUT_FILES_FOLDER = Path(\"../../data/converted/\")\n",
     "TMP_DIR = tempfile.TemporaryDirectory()"
    ]
@@ -212,7 +212,11 @@
     }
    ],
    "source": [
-    "display(Markdown(f\"The data is now available. You can query it programmatically (see next section) or access it via the Deep Search UI at <br />{api.client.config.host}/projects/{PROJ_KEY}/library/private/{data_index.source.index_key}\"))"
+    "display(\n",
+    "    Markdown(\n",
+    "        f\"The data is now available. You can query it programmatically (see next section) or access it via the Deep Search UI at <br />{api.client.config.host}/projects/{PROJ_KEY}/library/private/{data_index.source.index_key}\"\n",
+    "    )\n",
+    ")"
    ]
   },
   {
@@ -249,7 +253,7 @@
     "# Count the documents in the data index\n",
     "query = DataQuery(\"*\", source=[\"\"], limit=0, coordinates=data_index.source)\n",
     "query_results = api.queries.run(query)\n",
-    "num_results = query_results.outputs['data_count']\n",
+    "num_results = query_results.outputs[\"data_count\"]\n",
     "print(f\"The data index contains {num_results} entries.\")"
    ]
   },
@@ -286,23 +290,34 @@
    "source": [
     "# Find documents matching query\n",
     "search_query = \"speedup\"\n",
-    "query = DataQuery(search_query, source=[\"description.title\", \"description.authors\"], coordinates=data_index.source)\n",
+    "query = DataQuery(\n",
+    "    search_query,\n",
+    "    source=[\"description.title\", \"description.authors\"],\n",
+    "    coordinates=data_index.source,\n",
+    ")\n",
     "query_results = api.queries.run(query)\n",
     "\n",
     "all_results = []\n",
     "cursor = api.queries.run_paginated_query(query)\n",
     "for result_page in tqdm(cursor):\n",
     "    # Iterate through the results of a single page, and add to the total list\n",
-    "    for row in result_page.outputs[\"data_outputs\"]:  \n",
+    "    for row in result_page.outputs[\"data_outputs\"]:\n",
     "        print()\n",
     "        # Add row to results table\n",
-    "        all_results.append({\n",
-    "            \"Title\": row[\"_source\"][\"description\"][\"title\"],\n",
-    "            \"Authors\": \", \".join([author[\"name\"] for author in row[\"_source\"][\"description\"].get(\"authors\", [])]),\n",
-    "        })    \n",
+    "        all_results.append(\n",
+    "            {\n",
+    "                \"Title\": row[\"_source\"][\"description\"][\"title\"],\n",
+    "                \"Authors\": \", \".join(\n",
+    "                    [\n",
+    "                        author[\"name\"]\n",
+    "                        for author in row[\"_source\"][\"description\"].get(\"authors\", [])\n",
+    "                    ]\n",
+    "                ),\n",
+    "            }\n",
+    "        )\n",
     "\n",
     "num_results = len(all_results)\n",
-    "print(f'Finished fetching all data. Total is {num_results} records.')"
+    "print(f\"Finished fetching all data. Total is {num_results} records.\")"
    ]
   },
   {
@@ -388,7 +403,7 @@
     "    api.data_indices.delete(data_index.source)\n",
     "    print(\"Data index deleted\")\n",
     "    TMP_DIR.cleanup()\n",
-    "    print(\"Temporary directory deleted\")\n"
+    "    print(\"Temporary directory deleted\")"
    ]
   }
  ],