Skip to content

Commit

Permalink
chore: apply black formatting and enable CI on pre-commit hooks (#66)
Browse files Browse the repository at this point in the history
  • Loading branch information
dolfim-ibm authored May 27, 2024
1 parent fecc449 commit ce9a2fa
Show file tree
Hide file tree
Showing 21 changed files with 1,678 additions and 1,426 deletions.
19 changes: 19 additions & 0 deletions .github/actions/setup-poetry/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
name: 'Set up Poetry and install'
description: 'Set up a specific version of Poetry and install dependencies using caching.'
inputs:
python-version:
description: "Version range or exact version of Python or PyPy to use, using SemVer's version range syntax."
default: '3.10'
runs:
using: 'composite'
steps:
- name: Install poetry
run: pipx install poetry==1.8.3
shell: bash
- uses: actions/setup-python@v4
with:
python-version: ${{ inputs.python-version }}
cache: 'poetry'
- name: Install dependencies
run: poetry install --all-extras
shell: bash
18 changes: 18 additions & 0 deletions .github/workflows/checks.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
on:
push:
branches:
- "**"

jobs:
run-checks:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ['3.8', '3.9', '3.10']
steps:
- uses: actions/checkout@v3
- uses: ./.github/actions/setup-poetry
with:
python-version: ${{ matrix.python-version }}
- name: Run styling check
run: poetry run pre-commit run --all-files
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ repos:
hooks:
- id: system
name: Black
entry: poetry run black nbrunner dsnotebooks examples
entry: poetry run black --include '(\.py|\.ipynb)$' nbrunner dsnotebooks examples
pass_filenames: false
language: system
files: '(\.py|\.ipynb)$'
Expand Down
20 changes: 10 additions & 10 deletions examples/attachment_management/manage_attachments.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,10 @@
"# notebook settings auto-loaded from .env / env vars\n",
"notebook_settings = ProjectNotebookSettings()\n",
"\n",
"PROFILE_NAME = notebook_settings.profile # profile to use\n",
"PROJ_KEY = notebook_settings.proj_key # project to use\n",
"PROFILE_NAME = notebook_settings.profile # profile to use\n",
"PROJ_KEY = notebook_settings.proj_key # project to use\n",
"INDEX_NAME = notebook_settings.new_idx_name # index to create\n",
"CLEANUP = notebook_settings.cleanup # whether to clean up\n",
"CLEANUP = notebook_settings.cleanup # whether to clean up\n",
"ATTACHMENT_KEY = \"usr_attachments\" # format must be: \"usr_<snake_case>\"\n",
"FILES_TO_ATTACH = [\n",
" \"../../data/samples/2206.00785.pdf\",\n",
Expand Down Expand Up @@ -100,8 +100,10 @@
"metadata": {},
"outputs": [],
"source": [
"def find_index_item(api, coordinates, search_query=\"*\", source=None, page_size=50, pred=None):\n",
" \"\"\" Find first index item that satisfies the criteria \"\"\"\n",
"def find_index_item(\n",
" api, coordinates, search_query=\"*\", source=None, page_size=50, pred=None\n",
"):\n",
" \"\"\"Find first index item that satisfies the criteria\"\"\"\n",
" source_to_use = [\"_id\", \"_name\", \"_s3_data\"] if source is None else source\n",
" query = DataQuery(\n",
" search_query=search_query,\n",
Expand All @@ -116,6 +118,7 @@
" return item\n",
" return None\n",
"\n",
"\n",
"def list_item_attachments(api, coordinates, index_item_id, attch_key):\n",
" pred = lambda x: x[\"_id\"] == index_item_id\n",
" item = find_index_item(api, coordinates, pred=pred)\n",
Expand Down Expand Up @@ -420,9 +423,7 @@
" m.update(json.dumps(row, sort_keys=True).encode())\n",
" h = m.hexdigest()\n",
" row[\"_name\"] = f\"row-{i:06d}-{h[:5]}\"\n",
" row[\"file-info\"] = {\n",
" \"document-hash\": h\n",
" }"
" row[\"file-info\"] = {\"document-hash\": h}"
]
},
{
Expand Down Expand Up @@ -609,8 +610,7 @@
" filename = Path(attachment[\"path\"]).name\n",
" download_url = attachment[\"url\"]\n",
" display(HTML(f'&#128073; Download <a href=\"{download_url}\">{filename}</a>'))\n",
" print()\n",
" "
" print()"
]
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,10 @@
"# notebook settings auto-loaded from .env / env vars\n",
"notebook_settings = ProjectNotebookSettings()\n",
"\n",
"PROFILE_NAME = notebook_settings.profile # profile to use\n",
"PROJ_KEY = notebook_settings.proj_key # project to use\n",
"PROFILE_NAME = notebook_settings.profile # profile to use\n",
"PROJ_KEY = notebook_settings.proj_key # project to use\n",
"INDEX_NAME = notebook_settings.new_idx_name # index to create\n",
"CLEANUP = notebook_settings.cleanup # whether to clean up\n",
"CLEANUP = notebook_settings.cleanup # whether to clean up\n",
"INPUT_FILES_FOLDER = Path(\"../../data/converted/\")\n",
"TMP_DIR = tempfile.TemporaryDirectory()"
]
Expand Down Expand Up @@ -212,7 +212,11 @@
}
],
"source": [
"display(Markdown(f\"The data is now available. You can query it programmatically (see next section) or access it via the Deep Search UI at <br />{api.client.config.host}/projects/{PROJ_KEY}/library/private/{data_index.source.index_key}\"))"
"display(\n",
" Markdown(\n",
" f\"The data is now available. You can query it programmatically (see next section) or access it via the Deep Search UI at <br />{api.client.config.host}/projects/{PROJ_KEY}/library/private/{data_index.source.index_key}\"\n",
" )\n",
")"
]
},
{
Expand Down Expand Up @@ -249,7 +253,7 @@
"# Count the documents in the data index\n",
"query = DataQuery(\"*\", source=[\"\"], limit=0, coordinates=data_index.source)\n",
"query_results = api.queries.run(query)\n",
"num_results = query_results.outputs['data_count']\n",
"num_results = query_results.outputs[\"data_count\"]\n",
"print(f\"The data index contains {num_results} entries.\")"
]
},
Expand Down Expand Up @@ -286,23 +290,34 @@
"source": [
"# Find documents matching query\n",
"search_query = \"speedup\"\n",
"query = DataQuery(search_query, source=[\"description.title\", \"description.authors\"], coordinates=data_index.source)\n",
"query = DataQuery(\n",
" search_query,\n",
" source=[\"description.title\", \"description.authors\"],\n",
" coordinates=data_index.source,\n",
")\n",
"query_results = api.queries.run(query)\n",
"\n",
"all_results = []\n",
"cursor = api.queries.run_paginated_query(query)\n",
"for result_page in tqdm(cursor):\n",
" # Iterate through the results of a single page, and add to the total list\n",
" for row in result_page.outputs[\"data_outputs\"]: \n",
" for row in result_page.outputs[\"data_outputs\"]:\n",
" print()\n",
" # Add row to results table\n",
" all_results.append({\n",
" \"Title\": row[\"_source\"][\"description\"][\"title\"],\n",
" \"Authors\": \", \".join([author[\"name\"] for author in row[\"_source\"][\"description\"].get(\"authors\", [])]),\n",
" }) \n",
" all_results.append(\n",
" {\n",
" \"Title\": row[\"_source\"][\"description\"][\"title\"],\n",
" \"Authors\": \", \".join(\n",
" [\n",
" author[\"name\"]\n",
" for author in row[\"_source\"][\"description\"].get(\"authors\", [])\n",
" ]\n",
" ),\n",
" }\n",
" )\n",
"\n",
"num_results = len(all_results)\n",
"print(f'Finished fetching all data. Total is {num_results} records.')"
"print(f\"Finished fetching all data. Total is {num_results} records.\")"
]
},
{
Expand Down Expand Up @@ -388,7 +403,7 @@
" api.data_indices.delete(data_index.source)\n",
" print(\"Data index deleted\")\n",
" TMP_DIR.cleanup()\n",
" print(\"Temporary directory deleted\")\n"
" print(\"Temporary directory deleted\")"
]
}
],
Expand Down
Loading

0 comments on commit ce9a2fa

Please sign in to comment.