From 235370d8e730d9a5cc2c799589e90adf6c5a3004 Mon Sep 17 00:00:00 2001 From: zaryabmakram Date: Tue, 31 Mar 2026 15:39:50 +0500 Subject: [PATCH 1/6] fix: adjust input arguments for CausalAgent --- run_cais_new.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/run_cais_new.py b/run_cais_new.py index 93854df..d3b03a4 100644 --- a/run_cais_new.py +++ b/run_cais_new.py @@ -161,13 +161,13 @@ def main(): print('Starting run!') - cais = CausalAgent() + cais = CausalAgent( + dataset_path=data_path, + dataset_description=desc, + ) cais.run_analysis( query=row["natural_language_query"], - dataset_path=data_path, - dataset_description=desc, - use_decision_tree=True ) except Exception as e: From fca74a0d94a0a126c65ac741b299f90299511215 Mon Sep 17 00:00:00 2001 From: zaryabmakram Date: Tue, 31 Mar 2026 15:40:41 +0500 Subject: [PATCH 2/6] fix: remove passed invalid input argument --- cais/agent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cais/agent.py b/cais/agent.py index dee2aff..2579c21 100644 --- a/cais/agent.py +++ b/cais/agent.py @@ -341,7 +341,7 @@ def run_causal_analysis(query: str, dataset_path: str, instrument_hints=input_parsing_result["extracted_variables"].get("instruments_mentioned") ) - query_interpreter_output = query_interpreter_tool.func(query_info=query_info, dataset_analysis=dataset_analysis_result, dataset_description=input_parsing_result["dataset_description"], original_query = input_parsing_result["original_query"]).variables + query_interpreter_output = query_interpreter_tool.func(dataset_analysis=dataset_analysis_result, dataset_description=input_parsing_result["dataset_description"], original_query=input_parsing_result["original_query"]).variables # print('LOG RESULTS') # print(input_parsing_result['extracted_variables']) From b093ec9dac2e23378c4ae03bc76bd827562d216b Mon Sep 17 00:00:00 2001 From: zaryabmakram Date: Tue, 31 Mar 2026 15:41:06 +0500 Subject: [PATCH 3/6] write result per index to an output file --- run_cais_new.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/run_cais_new.py b/run_cais_new.py index d3b03a4..dde4dba 100644 --- a/run_cais_new.py +++ b/run_cais_new.py @@ -166,9 +166,21 @@ def main(): dataset_description=desc, ) - cais.run_analysis( + res = cais.run_analysis( query=row["natural_language_query"], ) + + # write result to file + formatted_result = { + "query": row["natural_language_query"], + "method": row["method"], + "answer": row["answer"], + "dataset_description": desc, + "dataset_path": data_path, + "keywords": row.get("keywords", "Causality, Average treatment effect"), + "final_result": res + } + file.write(json.dumps({idx: formatted_result}) + "\n") except Exception as e: logging.error(f"[row {idx}] Error: {e}") From 87e7055b5fec4461bd1f6b701de15b3953b7ea56 Mon Sep 17 00:00:00 2001 From: zaryabmakram Date: Tue, 31 Mar 2026 15:41:50 +0500 Subject: [PATCH 4/6] fallback to original datafile if cleaning fails --- cais/components/dataset_cleaner.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cais/components/dataset_cleaner.py b/cais/components/dataset_cleaner.py index e8531ce..b1f184d 100644 --- a/cais/components/dataset_cleaner.py +++ b/cais/components/dataset_cleaner.py @@ -279,6 +279,10 @@ def run_cleaning_stage(dataset_path: str, if ("Traceback" in stderr_all) or ("Error" in stderr_all): report.append("\n⚠️ LLM pipeline produced errors. Check stderr; artifacts may be missing or partial.") + if not os.path.exists(cleaned_path): + report.append(f"\n⚠️ Cleaned file not found at expected path; falling back to original dataset.") + cleaned_path = dataset_path + return { "cleaned_dataset_path": cleaned_path, "cleaning_report_md": "\n".join(report), From a06ceb7e7f0b04e67fcd7916e79905bf902b8eea Mon Sep 17 00:00:00 2001 From: zaryabmakram Date: Tue, 31 Mar 2026 15:42:20 +0500 Subject: [PATCH 5/6] store query_info in result --- cais/agent.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/cais/agent.py b/cais/agent.py index 2579c21..1858576 100644 --- a/cais/agent.py +++ b/cais/agent.py @@ -447,6 +447,16 @@ def run_causal_analysis(query: str, dataset_path: str, dataset_description=input_parsing_result["dataset_description"], original_query = input_parsing_result["original_query"]) result = explainer_output + + # include query_info in result + result["query_info"] = { + "query_text": input_parsing_result["original_query"], + "potential_treatments": input_parsing_result["extracted_variables"].get("treatment"), + "potential_outcomes": input_parsing_result["extracted_variables"].get("outcome"), + "covariates_hints": input_parsing_result["extracted_variables"].get("covariates_mentioned"), + "instrument_hints": input_parsing_result["extracted_variables"].get("instruments_mentioned") + } + #result['results']['results']["method_used"] = method_validator_output.get('method') logger.debug(result) logger.info("Causal analysis run finished.") From a8bd74a1f98ab2a9f3081047f4616a2fbc4ac03f Mon Sep 17 00:00:00 2001 From: zaryabmakram Date: Tue, 31 Mar 2026 15:42:48 +0500 Subject: [PATCH 6/6] skip removing data files --- cais/agent.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cais/agent.py b/cais/agent.py index 1858576..3577b5c 100644 --- a/cais/agent.py +++ b/cais/agent.py @@ -234,7 +234,7 @@ def execute_method(self, query=None, remove_cleaned=True): if self.cleaned_dataset_path and remove_cleaned: if isinstance(self.load_dataset(cleaned=True), pd.DataFrame): - os.remove(self.cleaned_dataset_path) + # os.remove(self.cleaned_dataset_path) self.cleaned_dataset_path=None logger.info("Succesfully Removed Cleaned Dataset.") @@ -462,8 +462,8 @@ def run_causal_analysis(query: str, dataset_path: str, logger.info("Causal analysis run finished.") # Remove the cleaned csv - logger.info("Removing cleaned csv.") - os.remove(cleaned_path) + # logger.info("Removing cleaned csv.") + # os.remove(cleaned_path) # Ensure result is a dict and extract the 'output' part if isinstance(result, dict):