Skip to content

Commit

Permalink
feat!: move all dependencies to extras (#93)
Browse files Browse the repository at this point in the history
Signed-off-by: Michele Dolfi <[email protected]>
Signed-off-by: Peter Staar <[email protected]>
Co-authored-by: Peter Staar <[email protected]>
  • Loading branch information
dolfim-ibm and PeterStaar-IBM authored Dec 6, 2024
1 parent 61e5499 commit 69318d8
Show file tree
Hide file tree
Showing 16 changed files with 286 additions and 205 deletions.
13 changes: 12 additions & 1 deletion .github/actions/setup-poetry/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@ inputs:
python-version:
description: "Version range or exact version of Python or PyPy to use, using SemVer's version range syntax."
default: '3.11'
run_install:
description: "Install the dependencies."
default: 'true'
install_extras:
description: "When installing depdencies, the extra dependencies are included."
default: 'true'
runs:
using: 'composite'
steps:
Expand All @@ -21,6 +27,11 @@ runs:
poetry env use ${{ steps.py.outputs.python-path }}
poetry run python --version
shell: bash
- name: Install only dependencies and not the package itself
- name: Install the dependencies and the extas (but not the package itself)
if: ${{ inputs.run_install == 'true' && inputs.install_extras == 'true' }}
run: poetry install --all-extras --no-root
shell: bash
- name: Install only dependencies and not the package itself
if: ${{ inputs.run_install == 'true' && inputs.install_extras != 'true' }}
run: poetry install --no-root
shell: bash
2 changes: 1 addition & 1 deletion .github/scripts/build_rhel.sh
Original file line number Diff line number Diff line change
Expand Up @@ -49,5 +49,5 @@ docker build --progress=plain \
RUN cd /src \
&& pip3.11 install pytest \
&& pytest ./tests/test_glm.py -v
&& pytest ./tests/test_simple_interface.py -v
EOF
13 changes: 12 additions & 1 deletion .github/workflows/checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,18 @@ jobs:
- uses: ./.github/actions/setup-poetry
with:
python-version: ${{ matrix.python-version }}
- name: Install with poetry
run_install: 'false'

- name: Compile and install
run: |
poetry install
ls -l
ls -l deepsearch_glm
- name: Test interface without extras
run: |
poetry run pytest ./tests/test_simple_interface.py -vs
- name: Install extras
run: |
poetry install --all-extras
ls -l
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ jobs:
- name: Quick test wheel
if: ${{ matrix.os.platform_id == 'win_amd64' || (matrix.os.platform_id == 'macosx_arm64' && matrix.os.name == 'macos-14') }}
run: |
poetry install --no-interaction --no-root --only=test
poetry install --no-interaction --no-root --all-extras
poetry run python -c 'from deepsearch_glm import andromeda_glm'
poetry run pytest ./tests/test_glm.py -v
Expand Down
2 changes: 0 additions & 2 deletions deepsearch_glm/glm_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,7 @@
"""Module to query the GLM"""

import argparse
import json

import pandas as pd
from tabulate import tabulate

from deepsearch_glm.andromeda_glm import glm_query
Expand Down
2 changes: 0 additions & 2 deletions deepsearch_glm/nlp_model_training/name_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
import re

import pandas as pd
from tabulate import tabulate
from tqdm import tqdm

from deepsearch_glm.glm_utils import read_edges_in_dataframe, read_nodes_in_dataframe
from deepsearch_glm.nlp_utils import (
Expand Down
2 changes: 0 additions & 2 deletions deepsearch_glm/nlp_model_training/person_name_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,8 @@
import json
import os
import random
import re

import pandas as pd
from tabulate import tabulate
from tqdm import tqdm

from deepsearch_glm.glm_utils import read_edges_in_dataframe, read_nodes_in_dataframe
Expand Down
3 changes: 0 additions & 3 deletions deepsearch_glm/nlp_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,6 @@ def init_nlp_model(
model = nlp_model()
model.set_loglevel(loglevel)

configs = model.get_apply_configs()
# print(json.dumps(configs, indent=2))

config = model.get_apply_configs()[0]
config["models"] = model_names
config["subject-filters"] = filters
Expand Down
4 changes: 0 additions & 4 deletions deepsearch_glm/utils/common.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,9 @@
import os

from dotenv import load_dotenv


def get_scratch_dir():
"""Get scratch directory from environment variable `DEEPSEARCH_GLM_SCRATCH_DIR` (defined in .env)"""

load_dotenv()

tmpdir = os.path.abspath(os.getenv("DEEPSEARCH_GLM_SCRATCH_DIR"))

if not os.path.exists(tmpdir):
Expand Down
345 changes: 180 additions & 165 deletions poetry.lock

Large diffs are not rendered by default.

32 changes: 15 additions & 17 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,16 @@ build = "build.py"

[tool.poetry.dependencies]
python = "^3.9"
docling-core = "^2.0"
pywin32 = { version = "^307", markers = "sys_platform == 'win32'" }
docling-core = { version = "^2.0", optional = true }
deepsearch-toolkit = { version = "^1.1.0", optional = true }
tabulate = ">=0.8.9"
numpy = ">=1.24.4,<3.0.0"
pandas = ">=1.5.1,<3.0.0"
tabulate = { version = ">=0.8.9", optional = true }
pandas = { version = ">=1.5.1,<3.0.0", optional = true }
matplotlib = { version = "^3.7.1", optional = true }
python-dotenv = "^1.0.0"
tqdm = "^4.64.0"
rich = "^13.7.0"
docutils = "!=0.21"
pywin32 = { version = "^307", markers = "sys_platform == 'win32'" }
requests = "^2.32.3"
python-dotenv = { version = "^1.0.0", optional = true }
tqdm = { version = "^4.64.0", optional = true }
rich = { version = "^13.7.0", optional = true }
requests = { version = "^2.32.3", optional = true }

[tool.poetry.group.test.dependencies]
pytest = "^7.4.2"
Expand All @@ -46,6 +44,7 @@ isort = "^5.13.2"
mypy = "^1.7.1"
pre-commit = "2.17.0"
pylint = "^3.0.3"
docutils = "!=0.21" # added by python-semantic-release
python-semantic-release = "^7.32.2"

[tool.poetry.group.build.dependencies]
Expand Down Expand Up @@ -73,12 +72,13 @@ pandas = [
# support recursive extras: https://github.com/python-poetry/poetry/issues/3369)

pyplot = ["matplotlib"]
toolkit = ["deepsearch-toolkit"]

toolkit = ["deepsearch-toolkit", "python-dotenv"]
docling = ["docling-core", "pandas"]
utils = ["tabulate", "python-dotenv", "pandas", "tqdm", "rich", "requests"]

[tool.black]
line-length = 88
target-version = ["py38"]
target-version = ["py39"]
include = '\.pyi?$'
#extend-exclude = """
## Exclude generated API code
Expand All @@ -88,16 +88,14 @@ include = '\.pyi?$'
[tool.isort]
profile = "black"
line_length = 88
#skip_glob = ["docs", "deepsearch/cps/apis"]
py_version=38
#known_first_party = ["cps"]
py_version=39

[tool.mypy]
# plugins = ["pydantic.mypy"]
pretty = true
# strict = true
#no_implicit_optional = true
python_version = 3.8
python_version = "3.9"
disable_error_code = ["import-untyped"]

#[[tool.mypy.overrides]]
Expand Down
19 changes: 15 additions & 4 deletions src/pybind/base_log.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ namespace andromeda_py

base_log();

base_log(std::string level);

bool set_loglevel(std::string level);
};

Expand All @@ -33,23 +35,32 @@ namespace andromeda_py
*/
}

base_log::base_log(std::string level)
{
set_loglevel(level);
}

bool base_log::set_loglevel(std::string level)
{
if(level=="INFO")
if(level=="INFO" or level=="info")
{
loguru::g_stderr_verbosity = loguru::Verbosity_INFO;
}
else if(level=="WARNING")
else if(level=="WARNING" or level=="warning")
{
loguru::g_stderr_verbosity = loguru::Verbosity_WARNING;
}
else if(level=="ERROR")
else if(level=="ERROR" or level=="error")
{
loguru::g_stderr_verbosity = loguru::Verbosity_ERROR;
}
else if(level=="FATAL" or level=="fatal")
{
loguru::g_stderr_verbosity = loguru::Verbosity_FATAL;
}
else
{
loguru::g_stderr_verbosity = loguru::Verbosity_WARNING;
loguru::g_stderr_verbosity = loguru::Verbosity_ERROR;
return false;
}

Expand Down
17 changes: 17 additions & 0 deletions src/pybind/nlp_interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ namespace andromeda_py
public:

nlp_model();
nlp_model(std::string loglevel, bool text_ordering, bool normalise_chars, bool normalise_text);

~nlp_model();

bool initialise(const nlohmann::json config_);
Expand Down Expand Up @@ -66,6 +68,21 @@ namespace andromeda_py
char_normaliser(andromeda::text_element::create_char_normaliser(false)),
text_normaliser(andromeda::text_element::create_text_normaliser(false))
{}

nlp_model::nlp_model(std::string loglevel, bool text_ordering, bool normalise_chars, bool normalise_text):
base_log::base_log(loglevel),
base_resources::base_resources(),

config(nlohmann::json::object({})),

order_text(text_ordering),
models({}),

char_normaliser(andromeda::text_element::create_char_normaliser(normalise_chars)),
text_normaliser(andromeda::text_element::create_text_normaliser(normalise_text))
{
config["order-text"] = order_text;
}

nlp_model::~nlp_model()
{}
Expand Down
10 changes: 9 additions & 1 deletion src/pybind/nlp_modules.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,15 @@ PYBIND11_MODULE(andromeda_nlp, m) {

pybind11::class_<andromeda_py::nlp_model>(m, "nlp_model")
.def(pybind11::init())

.def(pybind11::init<std::string, bool, bool, bool>(),
pybind11::arg("loglevel"),
pybind11::arg("text_ordering") = true,
pybind11::arg("normalise_chars") = true,
pybind11::arg("normalise_text") = true,
R"(
Initialise the NLP models with standard parameters.)"
)

.def("set_loglevel", &andromeda_py::nlp_model::set_loglevel)
.def("get_resources_path", &andromeda_py::nlp_model::get_resources_path)

Expand Down
24 changes: 24 additions & 0 deletions tests/test_simple_interface.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import json

from deepsearch_glm.andromeda_nlp import nlp_model


def test_simple_interface_v1():
model = nlp_model()
model.set_loglevel("WARNING")

config = model.get_apply_configs()[0]
config["models"] = ""
config["subject-filters"] = []

model.initialise(config)

doc = json.load(open("tests/data/docs/1806.02284.json"))
output = model.apply_on_doc(doc)


def test_simple_interface_v2():
model = nlp_model(loglevel="warning", text_ordering=True)

doc = json.load(open("tests/data/docs/1806.02284.json"))
output = model.apply_on_doc(doc)
1 change: 0 additions & 1 deletion tests/test_structs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import json

import pandas as pd
from tabulate import tabulate

from deepsearch_glm.andromeda_structs import ds_document, ds_table, ds_text
from deepsearch_glm.nlp_utils import init_nlp_model
Expand Down

0 comments on commit 69318d8

Please sign in to comment.