Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Test: Show current pipeline run issues (DO NOT MERGE) #8695

Closed
wants to merge 9 commits into from
Prev Previous commit
Next Next commit
chore: format
mathislucka committed Jan 9, 2025
commit 438ffaaadbf325f689bf421d1072ae191df9b250
2 changes: 1 addition & 1 deletion haystack/components/audio/whisper_local.py
Original file line number Diff line number Diff line change
@@ -72,7 +72,7 @@ def __init__(
whisper_import.check()
if model not in get_args(WhisperLocalModel):
raise ValueError(
f"Model name '{model}' not recognized. Choose one among: " f"{', '.join(get_args(WhisperLocalModel))}."
f"Model name '{model}' not recognized. Choose one among: {', '.join(get_args(WhisperLocalModel))}."
)
self.model = model
self.whisper_params = whisper_params or {}
3 changes: 1 addition & 2 deletions haystack/components/converters/openapi_functions.py
Original file line number Diff line number Diff line change
@@ -249,8 +249,7 @@ def _parse_openapi_spec(self, content: str) -> Dict[str, Any]:
open_api_spec_content = yaml.safe_load(content)
except yaml.YAMLError:
error_message = (
"Failed to parse the OpenAPI specification. "
"The content does not appear to be valid JSON or YAML.\n\n"
"Failed to parse the OpenAPI specification. The content does not appear to be valid JSON or YAML.\n\n"
)
raise RuntimeError(error_message, content)

2 changes: 1 addition & 1 deletion haystack/components/generators/chat/hugging_face_local.py
Original file line number Diff line number Diff line change
@@ -149,7 +149,7 @@ def __init__( # pylint: disable=too-many-positional-arguments

if task not in PIPELINE_SUPPORTED_TASKS:
raise ValueError(
f"Task '{task}' is not supported. " f"The supported tasks are: {', '.join(PIPELINE_SUPPORTED_TASKS)}."
f"Task '{task}' is not supported. The supported tasks are: {', '.join(PIPELINE_SUPPORTED_TASKS)}."
)
huggingface_pipeline_kwargs["task"] = task

4 changes: 2 additions & 2 deletions haystack/components/rankers/lost_in_the_middle.py
Original file line number Diff line number Diff line change
@@ -51,7 +51,7 @@ def __init__(self, word_count_threshold: Optional[int] = None, top_k: Optional[i
"""
if isinstance(word_count_threshold, int) and word_count_threshold <= 0:
raise ValueError(
f"Invalid value for word_count_threshold: {word_count_threshold}. " f"word_count_threshold must be > 0."
f"Invalid value for word_count_threshold: {word_count_threshold}. word_count_threshold must be > 0."
)
if isinstance(top_k, int) and top_k <= 0:
raise ValueError(f"top_k must be > 0, but got {top_k}")
@@ -78,7 +78,7 @@ def run(
"""
if isinstance(word_count_threshold, int) and word_count_threshold <= 0:
raise ValueError(
f"Invalid value for word_count_threshold: {word_count_threshold}. " f"word_count_threshold must be > 0."
f"Invalid value for word_count_threshold: {word_count_threshold}. word_count_threshold must be > 0."
)
if isinstance(top_k, int) and top_k <= 0:
raise ValueError(f"top_k must be > 0, but got {top_k}")
10 changes: 5 additions & 5 deletions haystack/core/component/component.py
Original file line number Diff line number Diff line change
@@ -268,9 +268,9 @@ def __call__(cls, *args, **kwargs):
try:
pre_init_hook.in_progress = True
named_positional_args = ComponentMeta._positional_to_kwargs(cls, args)
assert (
set(named_positional_args.keys()).intersection(kwargs.keys()) == set()
), "positional and keyword arguments overlap"
assert set(named_positional_args.keys()).intersection(kwargs.keys()) == set(), (
"positional and keyword arguments overlap"
)
kwargs.update(named_positional_args)
pre_init_hook.callback(cls, kwargs)
instance = super().__call__(**kwargs)
@@ -309,8 +309,8 @@ def _component_repr(component: Component) -> str:
# We're explicitly ignoring the type here because we're sure that the component
# has the __haystack_input__ and __haystack_output__ attributes at this point
return (
f'{result}\n{getattr(component, "__haystack_input__", "<invalid_input_sockets>")}'
f'\n{getattr(component, "__haystack_output__", "<invalid_output_sockets>")}'
f"{result}\n{getattr(component, '__haystack_input__', '<invalid_input_sockets>')}"
f"\n{getattr(component, '__haystack_output__', '<invalid_output_sockets>')}"
)


6 changes: 3 additions & 3 deletions haystack/core/pipeline/draw.py
Original file line number Diff line number Diff line change
@@ -124,7 +124,7 @@ def _to_mermaid_text(graph: networkx.MultiDiGraph) -> str:
}

states = {
comp: f"{comp}[\"<b>{comp}</b><br><small><i>{type(data['instance']).__name__}{optional_inputs[comp]}</i></small>\"]:::component" # noqa
comp: f'{comp}["<b>{comp}</b><br><small><i>{type(data["instance"]).__name__}{optional_inputs[comp]}</i></small>"]:::component' # noqa
for comp, data in graph.nodes(data=True)
if comp not in ["input", "output"]
}
@@ -139,11 +139,11 @@ def _to_mermaid_text(graph: networkx.MultiDiGraph) -> str:
connections_list.append(conn_string)

input_connections = [
f"i{{&ast;}}--\"{conn_data['label']}<br><small><i>{conn_data['conn_type']}</i></small>\"--> {states[to_comp]}"
f'i{{&ast;}}--"{conn_data["label"]}<br><small><i>{conn_data["conn_type"]}</i></small>"--> {states[to_comp]}'
for _, to_comp, conn_data in graph.out_edges("input", data=True)
]
output_connections = [
f"{states[from_comp]}--\"{conn_data['label']}<br><small><i>{conn_data['conn_type']}</i></small>\"--> o{{&ast;}}"
f'{states[from_comp]}--"{conn_data["label"]}<br><small><i>{conn_data["conn_type"]}</i></small>"--> o{{&ast;}}'
for from_comp, _, conn_data in graph.in_edges("output", data=True)
]
connections = "\n".join(connections_list + input_connections + output_connections)
9 changes: 3 additions & 6 deletions haystack/document_stores/in_memory/document_store.py
Original file line number Diff line number Diff line change
@@ -396,8 +396,7 @@ def filter_documents(self, filters: Optional[Dict[str, Any]] = None) -> List[Doc
if filters:
if "operator" not in filters and "conditions" not in filters:
raise ValueError(
"Invalid filter syntax. See https://docs.haystack.deepset.ai/docs/metadata-filtering "
"for details."
"Invalid filter syntax. See https://docs.haystack.deepset.ai/docs/metadata-filtering for details."
)
return [doc for doc in self.storage.values() if document_matches_filter(filters=filters, document=doc)]
return list(self.storage.values())
@@ -506,8 +505,7 @@ def bm25_retrieval(
if filters:
if "operator" not in filters:
raise ValueError(
"Invalid filter syntax. See https://docs.haystack.deepset.ai/docs/metadata-filtering "
"for details."
"Invalid filter syntax. See https://docs.haystack.deepset.ai/docs/metadata-filtering for details."
)
filters = {"operator": "AND", "conditions": [content_type_filter, filters]}
else:
@@ -574,8 +572,7 @@ def embedding_retrieval( # pylint: disable=too-many-positional-arguments
return []
elif len(documents_with_embeddings) < len(all_documents):
logger.info(
"Skipping some Documents that don't have an embedding. "
"To generate embeddings, use a DocumentEmbedder."
"Skipping some Documents that don't have an embedding. To generate embeddings, use a DocumentEmbedder."
)

scores = self._compute_query_embedding_similarity_scores(
3 changes: 1 addition & 2 deletions haystack/marshal/yaml.py
Original file line number Diff line number Diff line change
@@ -31,8 +31,7 @@ def marshal(self, dict_: Dict[str, Any]) -> str:
return yaml.dump(dict_, Dumper=YamlDumper)
except yaml.representer.RepresenterError as e:
raise TypeError(
"Error dumping pipeline to YAML - Ensure that all pipeline "
"components only serialize basic Python types"
"Error dumping pipeline to YAML - Ensure that all pipeline components only serialize basic Python types"
) from e

def unmarshal(self, data_: Union[str, bytes, bytearray]) -> Dict[str, Any]:
3 changes: 1 addition & 2 deletions haystack/utils/filters.py
Original file line number Diff line number Diff line change
@@ -112,8 +112,7 @@ def _less_than_equal(document_value: Any, filter_value: Any) -> bool:
def _in(document_value: Any, filter_value: Any) -> bool:
if not isinstance(filter_value, list):
msg = (
f"Filter value must be a `list` when using operator 'in' or 'not in', "
f"received type '{type(filter_value)}'"
f"Filter value must be a `list` when using operator 'in' or 'not in', received type '{type(filter_value)}'"
)
raise FilterError(msg)
return any(_equal(e, document_value) for e in filter_value)
2 changes: 1 addition & 1 deletion haystack/utils/hf.py
Original file line number Diff line number Diff line change
@@ -205,7 +205,7 @@ def resolve_hf_pipeline_kwargs( # pylint: disable=too-many-positional-arguments
task = model_info(huggingface_pipeline_kwargs["model"], token=huggingface_pipeline_kwargs["token"]).pipeline_tag

if task not in supported_tasks:
raise ValueError(f"Task '{task}' is not supported. " f"The supported tasks are: {', '.join(supported_tasks)}.")
raise ValueError(f"Task '{task}' is not supported. The supported tasks are: {', '.join(supported_tasks)}.")
huggingface_pipeline_kwargs["task"] = task
return huggingface_pipeline_kwargs

12 changes: 6 additions & 6 deletions test/components/audio/test_whisper_local.py
Original file line number Diff line number Diff line change
@@ -190,14 +190,14 @@ def test_whisper_local_transcriber(self, test_files_path):
docs = output["documents"]
assert len(docs) == 3

assert all(
word in docs[0].content.strip().lower() for word in {"content", "the", "document"}
), f"Expected words not found in: {docs[0].content.strip().lower()}"
assert all(word in docs[0].content.strip().lower() for word in {"content", "the", "document"}), (
f"Expected words not found in: {docs[0].content.strip().lower()}"
)
assert test_files_path / "audio" / "this is the content of the document.wav" == docs[0].meta["audio_file"]

assert all(
word in docs[1].content.strip().lower() for word in {"context", "answer"}
), f"Expected words not found in: {docs[1].content.strip().lower()}"
assert all(word in docs[1].content.strip().lower() for word in {"context", "answer"}), (
f"Expected words not found in: {docs[1].content.strip().lower()}"
)
path = test_files_path / "audio" / "the context for this answer is here.wav"
assert path.absolute() == docs[1].meta["audio_file"]

6 changes: 3 additions & 3 deletions test/components/converters/test_docx_file_to_document.py
Original file line number Diff line number Diff line change
@@ -176,9 +176,9 @@ def test_run_with_table(self, test_files_path):
table_index = next(i for i, part in enumerate(content_parts) if "| This | Is | Just a |" in part)
# check that natural order of the document is preserved
assert any("Donald Trump" in part for part in content_parts[:table_index]), "Text before table not found"
assert any(
"Now we are in Page 2" in part for part in content_parts[table_index + 1 :]
), "Text after table not found"
assert any("Now we are in Page 2" in part for part in content_parts[table_index + 1 :]), (
"Text after table not found"
)

def test_run_with_store_full_path_false(self, test_files_path):
"""
6 changes: 3 additions & 3 deletions test/components/embedders/test_openai_document_embedder.py
Original file line number Diff line number Diff line change
@@ -251,8 +251,8 @@ def test_run(self):
assert len(doc.embedding) == 1536
assert all(isinstance(x, float) for x in doc.embedding)

assert (
"text" in result["meta"]["model"] and "ada" in result["meta"]["model"]
), "The model name does not contain 'text' and 'ada'"
assert "text" in result["meta"]["model"] and "ada" in result["meta"]["model"], (
"The model name does not contain 'text' and 'ada'"
)

assert result["meta"]["usage"] == {"prompt_tokens": 15, "total_tokens": 15}, "Usage information does not match"
6 changes: 3 additions & 3 deletions test/components/embedders/test_openai_text_embedder.py
Original file line number Diff line number Diff line change
@@ -130,8 +130,8 @@ def test_run(self):
assert len(result["embedding"]) == 1536
assert all(isinstance(x, float) for x in result["embedding"])

assert (
"text" in result["meta"]["model"] and "ada" in result["meta"]["model"]
), "The model name does not contain 'text' and 'ada'"
assert "text" in result["meta"]["model"] and "ada" in result["meta"]["model"], (
"The model name does not contain 'text' and 'ada'"
)

assert result["meta"]["usage"] == {"prompt_tokens": 6, "total_tokens": 6}, "Usage information does not match"
6 changes: 3 additions & 3 deletions test/components/joiners/test_document_joiner.py
Original file line number Diff line number Diff line change
@@ -302,6 +302,6 @@ def test_test_score_norm_with_rrf(self):
for i in range(len(join_results["documents"]) - 1)
)

assert (
is_sorted
), "Documents are not sorted in descending order by score, there is an issue with rff ranking"
assert is_sorted, (
"Documents are not sorted in descending order by score, there is an issue with rff ranking"
)
8 changes: 2 additions & 6 deletions test/components/preprocessors/test_document_cleaner.py
Original file line number Diff line number Diff line change
@@ -71,7 +71,7 @@ def test_remove_whitespaces(self):
)
assert len(result["documents"]) == 1
assert result["documents"][0].content == (
"This is a text with some words. " "" "There is a second sentence. " "" "And there is a third sentence.\f"
"This is a text with some words. There is a second sentence. And there is a third sentence.\f"
)

def test_remove_substrings(self):
@@ -210,11 +210,7 @@ def test_ascii_only(self):
def test_other_document_fields_are_not_lost(self):
cleaner = DocumentCleaner(keep_id=True)
document = Document(
content="This is a text with some words. \n"
""
"There is a second sentence. \n"
""
"And there is a third sentence.\n",
content="This is a text with some words. \nThere is a second sentence. \nAnd there is a third sentence.\n",
dataframe=DataFrame({"col1": [1], "col2": [2]}),
blob=ByteStream.from_string("some_data"),
meta={"data": 1},
6 changes: 3 additions & 3 deletions test/components/routers/test_conditional_router.py
Original file line number Diff line number Diff line change
@@ -436,9 +436,9 @@ def test_router_with_optional_parameters(self):

# Test pipeline without path parameter
result = pipe.run(data={"router": {"question": "What?"}})
assert result["router"] == {
"fallback": "What?"
}, "Default route should work in pipeline when 'path' is not provided"
assert result["router"] == {"fallback": "What?"}, (
"Default route should work in pipeline when 'path' is not provided"
)

# Test pipeline with path parameter
result = pipe.run(data={"router": {"question": "What?", "path": "followup_short"}})