OrderLab
diff --git a/‎mldaikon/infer_engine.py‎
Lines changed: 62 additions & 1 deletion b/‎mldaikon/infer_engine.py‎
Lines changed: 62 additions & 1 deletion
diff --git a/‎mldaikon/invariant/DistinctArgumentRelation.py‎
Lines changed: 85 additions & 65 deletions b/‎mldaikon/invariant/DistinctArgumentRelation.py‎
Lines changed: 85 additions & 65 deletions
@@ -7,7 +7,13 @@
 import time
 
 import mldaikon.config.config as config
-from mldaikon.invariant.base_cls import FailedHypothesis, Invariant, Relation
+from mldaikon.invariant.base_cls import (
+    FailedHypothesis,
+    Hypothesis,
+    Invariant,
+    Relation,
+)
+from mldaikon.invariant.precondition import find_precondition
 from mldaikon.invariant.relation_pool import relation_pool
 from mldaikon.trace import MDNONEJSONEncoder, select_trace_implementation
 from mldaikon.utils import register_custom_excepthook
@@ -47,6 +53,61 @@ def infer(self, disabled_relations: list[Relation]):
         )
         return all_invs, all_failed_hypos
 
+    def infer_multi_trace(self, disabled_relations: list[Relation]):
+        hypotheses = self.generate_hypothesis(disabled_relations)
+        self.collect_examples(hypotheses)
+        invariants, failed_hypos = self.infer_precondition(hypotheses)
+        return invariants, failed_hypos
+
+    def generate_hypothesis(
+        self, disabled_relations: list[Relation]
+    ) -> list[list[Hypothesis]]:
+        all_invs = []
+        all_failed_hypos = []
+        hypotheses = []
+        for trace in self.traces:
+            for relation in relation_pool:
+                if disabled_relations is not None and relation in disabled_relations:
+                    logger.info(
+                        f"Skipping relation {relation.__name__} as it is disabled"
+                    )
+                    continue
+                logger.info(f"Infering invariants for relation: {relation.__name__}")
+                hypotheses.append(relation.generate_hypothesis(trace))
+
+                logger.info(
+                    f"Found {len(invs)} invariants for relation: {relation.__name__}"
+                )
+                all_invs.extend(invs)
+                all_failed_hypos.extend(failed_hypos)
+        return hypotheses
+
+    def collect_examples(self, hypotheses: list[list[Hypothesis]]):
+        for i, trace in enumerate(self.traces):
+            for j, hypothesis in enumerate(hypotheses[i]):
+                if j == i:
+                    # already collected examples for this hypothesis on the same trace that generated it
+                    continue
+                hypothesis.invariant.relation.collect_examples(trace, hypothesis)
+
+    def infer_precondition(self, hypotheses: list[list[Hypothesis]]):
+        all_hypotheses = []
+        for trace_hypotheses in hypotheses:
+            for hypothesis in trace_hypotheses:
+                all_hypotheses.append(hypothesis)
+
+        invariants = []
+        failed_hypos = []
+        for hypothesis in all_hypotheses:
+            precondition = find_precondition(hypothesis, self.traces)
+            if precondition is None:
+                failed_hypos.append(FailedHypothesis(hypothesis))
+            else:
+                hypothesis.invariant.precondition = precondition
+                invariants.append(hypothesis.invariant)
+
+        return invariants, failed_hypos
+
 
 def save_invs(invs: list[Invariant], output_file: str):
     with open(output_file, "w") as f:
 
@@ -1,36 +1,34 @@
 import logging
 from itertools import combinations
-from typing import Any, Dict, List, Set, Tuple, Iterable
+from typing import Any, Dict, Iterable, List, Set, Tuple
 
 from tqdm import tqdm
 
-from mldaikon.invariant.base_cls import (
-    Param,
+from mldaikon.invariant.base_cls import (  # GroupedPreconditions,
     APIParam,
     CheckerResult,
     Example,
     ExampleList,
     FailedHypothesis,
-    # GroupedPreconditions,
     Hypothesis,
     Invariant,
     Relation,
 )
-
 from mldaikon.invariant.precondition import find_precondition
 from mldaikon.trace.trace import Trace
 
 EXP_GROUP_NAME = "distinct_arg"
-MAX_FUNC_NUM_CONSECUTIVE_CALL = 6 
-IOU_THRESHHOLD = 0.1 # pre-defined threshhold for IOU
+MAX_FUNC_NUM_CONSECUTIVE_CALL = 6
+IOU_THRESHHOLD = 0.1  # pre-defined threshhold for IOU
+
 
 def calculate_IOU(list1, list2):
-            set1 = set(list1)
-            set2 = set(list2)
-            intersection = set1.intersection(set2)
-            union = set1.union(set2)
-            iou = len(intersection) / len(union) if len(union) != 0 else 0
-            return iou
+    set1 = set(list1)
+    set2 = set(list2)
+    intersection = set1.intersection(set2)
+    union = set1.union(set2)
+    iou = len(intersection) / len(union) if len(union) != 0 else 0
+    return iou
 
 
 def get_func_names_to_deal_with(trace: Trace) -> List[str]:
@@ -54,16 +52,20 @@ def get_func_names_to_deal_with(trace: Trace) -> List[str]:
 
     return list(function_pool)
 
+
 def get_event_data_per_function_per_step(trace: Trace, function_pool: Set[Any]):
-    listed_arguments: Dict[str, Dict[int, Dict[Tuple[str, str], List[dict[str, Any]]]]] = (
-        {}
-    )
+    listed_arguments: Dict[
+        str, Dict[int, Dict[Tuple[str, str], List[dict[str, Any]]]]
+    ] = {}
     for func_name in function_pool.copy():
         func_call_ids = trace.get_func_call_ids(func_name)
         keep_this_func = False
         for func_call_id in func_call_ids:
             event = trace.query_func_call_event(func_call_id)
-            if (event.pre_record["meta_vars.step"] is None or "args" not in event.pre_record):
+            if (
+                event.pre_record["meta_vars.step"] is None
+                or "args" not in event.pre_record
+            ):
                 continue
             keep_this_func = True
             process_id = event.pre_record["process_id"]
@@ -73,24 +75,26 @@ def get_event_data_per_function_per_step(trace: Trace, function_pool: Set[Any]):
                 listed_arguments[func_name] = {}
                 listed_arguments[func_name][step] = {}
                 listed_arguments[func_name][step][(process_id, thread_id)] = []
-            
+
             if step not in listed_arguments[func_name]:
                 listed_arguments[func_name][step] = {}
                 listed_arguments[func_name][step][(process_id, thread_id)] = []
-            
+
             if (process_id, thread_id) not in listed_arguments[func_name][step]:
                 listed_arguments[func_name][step][(process_id, thread_id)] = []
-            
-            listed_arguments[func_name][step][(process_id, thread_id)].append(event.pre_record)
-        
+
+            listed_arguments[func_name][step][(process_id, thread_id)].append(
+                event.pre_record
+            )
+
         if not keep_this_func:
             function_pool.remove(func_name)
 
     return function_pool, listed_arguments
 
 
 def get_event_list(trace: Trace, function_pool: Iterable[str]):
-    listed_events: List[dict[str, Any]]= []
+    listed_events: List[dict[str, Any]] = []
     # for all func_ids, get their corresponding events
     for func_name in function_pool:
         func_call_ids = trace.get_func_call_ids(func_name)
@@ -109,11 +113,16 @@ def get_event_list(trace: Trace, function_pool: Iterable[str]):
 
     return listed_events
 
-def compare_argument(value1, value2, IOU_criteria = True):
+
+def compare_argument(value1, value2, IOU_criteria=True):
     if type(value1) != type(value2):
         return False
     if isinstance(value1, list):
-        if IOU_criteria and all(isinstance(item, int) for item in value1) and all(isinstance(item, int) for item in value2):
+        if (
+            IOU_criteria
+            and all(isinstance(item, int) for item in value1)
+            and all(isinstance(item, int) for item in value2)
+        ):
             return calculate_IOU(value1, value2) >= IOU_THRESHHOLD
         if len(value1) != len(value2):
             return False
@@ -134,6 +143,7 @@ def compare_argument(value1, value2, IOU_criteria = True):
         return abs(value1 - value2) < 1e-8
     return value1 == value2
 
+
 def is_arguments_list_same(args1: list, args2: list):
     if len(args1) != len(args2):
         return False
@@ -144,6 +154,7 @@ def is_arguments_list_same(args1: list, args2: list):
             return False
     return True
 
+
 # class APIArgsParam(Param):
 #     def __init__(
 #         self, api_full_name: str, arg_name: str
@@ -181,7 +192,9 @@ def infer(trace: Trace) -> Tuple[List[Invariant], List[FailedHypothesis]]:
 
         # 1. Pre-process all the events
         print("Start preprocessing....")
-        listed_arguments: Dict[str, Dict[int, Dict[Tuple[str, str], List[dict[str, Any]]]]] = {}
+        listed_arguments: Dict[
+            str, Dict[int, Dict[Tuple[str, str], List[dict[str, Any]]]]
+        ] = {}
         function_pool: Set[Any] = set()
 
         function_pool = set(get_func_names_to_deal_with(trace))
@@ -205,9 +218,7 @@ def infer(trace: Trace) -> Tuple[List[Invariant], List[FailedHypothesis]]:
             func_name: Hypothesis(
                 invariant=Invariant(
                     relation=DistinctArgumentRelation,
-                    params=[
-                        APIParam(func_name)
-                    ],
+                    params=[APIParam(func_name)],
                     precondition=None,
                     text_description=f"{func_name} has distinct input arguments on difference PT for each step",
                 ),
@@ -226,29 +237,39 @@ def infer(trace: Trace) -> Tuple[List[Invariant], List[FailedHypothesis]]:
                 for PT_pair1, PT_pair2 in combinations(records.keys(), 2):
                     for event1 in records[PT_pair1]:
                         for event2 in records[PT_pair2]:
-                            if(not is_arguments_list_same(event1["args"], event2["args"])):
+                            if not is_arguments_list_same(
+                                event1["args"], event2["args"]
+                            ):
                                 flag = True
                                 pos = Example()
                                 pos.add_group(EXP_GROUP_NAME, [event1, event2])
-                                hypothesis_with_examples[func_name].positive_examples.add_example(pos)
+                                hypothesis_with_examples[
+                                    func_name
+                                ].positive_examples.add_example(pos)
                             else:
                                 neg = Example()
                                 neg.add_group(EXP_GROUP_NAME, [event1, event2])
-                                hypothesis_with_examples[func_name].negative_examples.add_example(neg)
-                    
+                                hypothesis_with_examples[
+                                    func_name
+                                ].negative_examples.add_example(neg)
+
                 for PT_pair in records.keys():
                     for event1, event2 in combinations(records[PT_pair], 2):
-                        if(not is_arguments_list_same(event1["args"], event2["args"])):
+                        if not is_arguments_list_same(event1["args"], event2["args"]):
                             flag = True
                             pos = Example()
                             pos.add_group(EXP_GROUP_NAME, [event1, event2])
-                            hypothesis_with_examples[func_name].positive_examples.add_example(pos)
+                            hypothesis_with_examples[
+                                func_name
+                            ].positive_examples.add_example(pos)
                         else:
                             neg = Example()
                             neg.add_group(EXP_GROUP_NAME, [event1, event2])
-                            hypothesis_with_examples[func_name].negative_examples.add_example(neg)
-            
-            if(not flag):
+                            hypothesis_with_examples[
+                                func_name
+                            ].negative_examples.add_example(neg)
+
+            if not flag:
                 hypothesis_with_examples.pop(func_name)
 
         print("End adding examples")
@@ -261,13 +282,11 @@ def infer(trace: Trace) -> Tuple[List[Invariant], List[FailedHypothesis]]:
             logger.debug(
                 f"Finding Precondition for {hypo}: {hypothesis_with_examples[hypo].invariant.text_description}"
             )
-            preconditions = find_precondition(hypothesis_with_examples[hypo], trace)
+            preconditions = find_precondition(hypothesis_with_examples[hypo], [trace])
             logger.debug(f"Preconditions for {hypo}:\n{str(preconditions)}")
 
             if preconditions is not None:
-                hypothesis_with_examples[hypo].invariant.precondition = (
-                    preconditions
-                )
+                hypothesis_with_examples[hypo].invariant.precondition = preconditions
             else:
                 logger.debug(f"Precondition not found for {hypo}")
                 failed_hypothesis.append(
@@ -281,13 +300,10 @@ def infer(trace: Trace) -> Tuple[List[Invariant], List[FailedHypothesis]]:
         print("End precondition inference")
 
         return (
-            list(
-                [hypo.invariant for hypo in hypothesis_with_examples.values()]
-            ),
+            list([hypo.invariant for hypo in hypothesis_with_examples.values()]),
             failed_hypothesis,
         )
 
-
     @staticmethod
     def evaluate(value_group: list) -> bool:
         """Given a group of values, should return a boolean value
@@ -317,13 +333,13 @@ def static_check_all(
 
         # 1. Pre-process all the events
         print("Start preprocessing....")
-        listed_arguments: Dict[str, Dict[int, Dict[Tuple[str, str], List[dict[str, Any]]]]] = {}
+        listed_arguments: Dict[
+            str, Dict[int, Dict[Tuple[str, str], List[dict[str, Any]]]]
+        ] = {}
         function_pool: Set[Any] = set()
-        func= inv.params[0]
+        func = inv.params[0]
 
-        assert isinstance(
-            func, APIParam
-        ), "Invariant parameters should be APIParam."
+        assert isinstance(func, APIParam), "Invariant parameters should be APIParam."
 
         func_name = func.api_full_name
         function_pool.add(func_name)
@@ -352,30 +368,34 @@ def static_check_all(
             )
 
         for step, records in listed_arguments[func_name].items():
-                for PT_pair1, PT_pair2 in combinations(records.keys(), 2):
-                    for event1 in records[PT_pair1]:
-                        for event2 in records[PT_pair2]:
-                            if(is_arguments_list_same(event1["args"], event2["args"])):
-                                return CheckerResult(
-                                    trace=[event1, event2],
-                                    invariant=inv,
-                                    check_passed=True,
-                                    triggered=True,
-                                )
-         
-                for PT_pair in records.keys():
-                    for event1, event2 in combinations(records[PT_pair], 2):
-                        if(is_arguments_list_same(event1["args"], event2["args"])):
+            for PT_pair1, PT_pair2 in combinations(records.keys(), 2):
+                for event1 in records[PT_pair1]:
+                    for event2 in records[PT_pair2]:
+                        if is_arguments_list_same(event1["args"], event2["args"]):
                             return CheckerResult(
                                 trace=[event1, event2],
                                 invariant=inv,
                                 check_passed=True,
                                 triggered=True,
                             )
 
+            for PT_pair in records.keys():
+                for event1, event2 in combinations(records[PT_pair], 2):
+                    if is_arguments_list_same(event1["args"], event2["args"]):
+                        return CheckerResult(
+                            trace=[event1, event2],
+                            invariant=inv,
+                            check_passed=True,
+                            triggered=True,
+                        )
+
         return CheckerResult(
             trace=None,
             invariant=inv,
             check_passed=True,
             triggered=True,
         )
+
+    @staticmethod
+    def get_precondition_infer_keys_to_skip(hypothesis: Hypothesis) -> list[str]:
+        return []