Skip to content

Commit 13bd0c8

Browse files
gustavocidornelaswhoseoyster
authored andcommitted
Completes UNB-2555 - Specifying a tokenizer on the client fails validation
1 parent b89b531 commit 13bd0c8

File tree

1 file changed

+33
-0
lines changed

1 file changed

+33
-0
lines changed

unboxapi/__init__.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,7 @@ def add_model(
218218
dependent_dir: Optional[str] = None,
219219
commit_message: str = None,
220220
project_id: str = None,
221+
explainability_tokenizer: Optional[callable] = None,
221222
**kwargs,
222223
) -> Model:
223224
"""Uploads a model to the Unbox platform.
@@ -275,6 +276,12 @@ def add_model(
275276
is :obj:`ModelType.custom`.
276277
commit_message : str, default None
277278
Commit message for this version.
279+
explainability_tokenizer: callable, default None
280+
Optional custom tokenizer function that will be used exclusively by the explainability techniques.
281+
This is **not** the tokenizer used by `function` (in case your `function` has a tokenizer as a kwarg).
282+
The `explainability_tokenizer` should receive a list of sentences as input and return a list with lists of tokens.
283+
E.g. `["Hello world", "San Francisco is hot today"]` as input and [["Hello", "world"], ["San Francisco", "is", "hot", "today"]]
284+
as output.
278285
**kwargs
279286
Any additional keyword args you would like to pass to your ``predict_proba`` function.
280287
@@ -589,6 +596,32 @@ def add_model(
589596
+ "you may find it useful to debug it on the Jupyter notebook, to ensure it is working correctly before uploading it.",
590597
) from None
591598

599+
# explainability tokenizer
600+
if not isinstance(explainability_tokenizer, Callable):
601+
raise UnboxValidationError(
602+
f"- `{explainability_tokenizer}` specified as `explainability_tokenizer` is not callable. \n"
603+
) from None
604+
605+
if model_type != ModelType.custom:
606+
try:
607+
if task_type in [TaskType.TextClassification]:
608+
test_input = [
609+
"Unbox is great!",
610+
"Let's see if this function is ready for some error analysis",
611+
]
612+
with HidePrints():
613+
function(model, test_input, **kwargs)
614+
except Exception as e:
615+
exception_stack = "".join(
616+
traceback.format_exception(type(e), e, e.__traceback__)
617+
)
618+
raise UnboxResourceError(
619+
context="There is an issue with the specified `explainability_tokenizer`. \n",
620+
message=f"It is failing with the following error: \n{exception_stack}",
621+
mitigation="Make sure your `explainability_tokenizer` receives a list of sentences as input and returns a list of lists of tokens "
622+
+ "as output. Additionally, you may find it useful to debug it on the Jupyter notebook, to ensure it is working correctly before uploading it.",
623+
) from None
624+
592625
# Transformers resources
593626
if model_type is ModelType.transformers:
594627
if "tokenizer" not in kwargs:

0 commit comments

Comments
 (0)