diff --git a/src/sast_agent_workflow/configs/config.yml b/src/sast_agent_workflow/configs/config.yml index 77c48c49..52380083 100644 --- a/src/sast_agent_workflow/configs/config.yml +++ b/src/sast_agent_workflow/configs/config.yml @@ -77,3 +77,11 @@ general: level: INFO front_end: type: console + telemetry: + tracing: + galileo: + _type: galileo + endpoint: galileo_endpoint + project: sast-workflow-project-name + logstream: default + api_key: ${GALILEO_API_KEY} diff --git a/src/sast_agent_workflow/register.py b/src/sast_agent_workflow/register.py index dc394568..9c5efcbd 100644 --- a/src/sast_agent_workflow/register.py +++ b/src/sast_agent_workflow/register.py @@ -129,7 +129,7 @@ def convert_str_to_sast_tracker(input_str: str) -> SASTWorkflowTracker: return empty_state def convert_sast_tracker_to_str(tracker: SASTWorkflowTracker) -> str: - """Convert SASTWorkflowTracker to summary statistics string""" + """Convert SASTWorkflowTracker to summary statistics string including metrics""" logger.debug("Converting SASTWorkflowTracker to summary stats") try: # For debug, print the tracker to the console @@ -140,14 +140,43 @@ def convert_sast_tracker_to_str(tracker: SASTWorkflowTracker) -> str: # Calculate summary statistics counter = categorize_issues_by_status(tracker.issues) - return json.dumps(counter, indent=2) - + # Include metrics if available (only accuracy, recall, precision, f1_score) + output = dict(counter) + if tracker.metrics: + # Filter to only show key performance metrics + filtered_metrics = { + "accuracy": tracker.metrics.get("accuracy"), + "recall": tracker.metrics.get("recall"), + "precision": tracker.metrics.get("precision"), + "f1_score": tracker.metrics.get("f1_score") + } + output["metrics"] = _make_json_serializable(filtered_metrics) + + return json.dumps(output, indent=2) + except Exception as e: logger.error("Failed to convert SASTWorkflowTracker to summary stats: %s", e) raise e + + def _make_json_serializable(obj): + """Recursively convert sets to lists and numpy types to native Python types for JSON serialization""" + import numpy as np + + if isinstance(obj, dict): + return {k: _make_json_serializable(v) for k, v in obj.items()} + elif isinstance(obj, (list, tuple)): + return [_make_json_serializable(item) for item in obj] + elif isinstance(obj, set): + return list(obj) + elif isinstance(obj, (np.integer, np.int64, np.int32)): + return int(obj) + elif isinstance(obj, (np.floating, np.float64, np.float32)): + return float(obj) + else: + return obj async def _response_fn(input_message: SASTWorkflowTracker) -> SASTWorkflowTracker: - """Main response function that runs the LangGraph workflow""" + """Main response function that runs the LangGraph workflow""" results = await graph.ainvoke(input_message) graph_output = SASTWorkflowTracker(**results) return graph_output diff --git a/src/sast_agent_workflow/tools/write_results.py b/src/sast_agent_workflow/tools/write_results.py index 7ae6e029..bdc09ec5 100644 --- a/src/sast_agent_workflow/tools/write_results.py +++ b/src/sast_agent_workflow/tools/write_results.py @@ -163,8 +163,12 @@ def _create_mock_evaluation_summary(summary_data, config, metrics): mock_summary.tn = confusion_matrix.get(METRICS_FIELD_TRUE_NEGATIVES, 0) mock_summary.fp = confusion_matrix.get(METRICS_FIELD_FALSE_POSITIVES, 0) mock_summary.fn = confusion_matrix.get(METRICS_FIELD_FALSE_NEGATIVES, 0) + # CRITICAL: Set ground_truth to non-None if we have confusion matrix metrics + # This signals to Excel writer that GT comparison was performed + mock_summary.ground_truth = "calculated" else: mock_summary.tp = mock_summary.tn = mock_summary.fp = mock_summary.fn = 0 + mock_summary.ground_truth = None # ExcelWriter expects these to be collections that support len() # Map from our stored sets/lists back to the expected attributes