@@ -146,7 +146,9 @@ def analyze_source(
146
146
logger .debug (error_msg )
147
147
raise HeuristicAnalyzerValueError (error_msg ) from error
148
148
149
- def evaluate_heuristic_results (self , heuristic_results : dict [Heuristics , HeuristicResult ]) -> float | None :
149
+ def evaluate_heuristic_results (
150
+ self , heuristic_results : dict [Heuristics , HeuristicResult ]
151
+ ) -> tuple [float , JsonType ]:
150
152
"""Analyse the heuristic results to determine the maliciousness of the package.
151
153
152
154
Parameters
@@ -156,18 +158,17 @@ def evaluate_heuristic_results(self, heuristic_results: dict[Heuristics, Heurist
156
158
157
159
Returns
158
160
-------
159
- float | None
160
- Returns the confidence associated with the detected malicious combination, otherwise None if no associated
161
- malicious combination was triggered.
161
+ tuple[ float, JsonType]
162
+ Returns the confidence associated with the detected malicious combination, and associated rule IDs detailing
163
+ what rules were triggered.
162
164
"""
163
165
facts_list : list [str ] = []
164
166
for heuristic , result in heuristic_results .items ():
165
- if result == HeuristicResult .SKIP :
166
- facts_list .append (f"0.0::{ heuristic .value } ." )
167
- elif result == HeuristicResult .PASS :
167
+ if result == HeuristicResult .PASS :
168
168
facts_list .append (f"{ heuristic .value } :- true." )
169
- else : # HeuristicResult.FAIL
169
+ elif result == HeuristicResult .FAIL :
170
170
facts_list .append (f"{ heuristic .value } :- false." )
171
+ # Do not define for HeuristicResult.SKIP
171
172
172
173
facts = "\n " .join (facts_list )
173
174
problog_code = f"{ facts } \n \n { self .malware_rules_problog_model } "
@@ -176,10 +177,12 @@ def evaluate_heuristic_results(self, heuristic_results: dict[Heuristics, Heurist
176
177
problog_model = PrologString (problog_code )
177
178
problog_results : dict [Term , float ] = get_evaluatable ().create_from (problog_model ).evaluate ()
178
179
179
- confidence : float | None = problog_results .get (Term (self .problog_result_access ))
180
- if confidence == 0.0 :
181
- return None # no rules were triggered
182
- return confidence
180
+ confidence = sum (conf for conf in problog_results .values () if conf is not None )
181
+ triggered_rules : JsonType = ["No malicious rules triggered" ]
182
+ if confidence > 0 :
183
+ triggered_rules = [term .args [0 ] for term in problog_results ]
184
+
185
+ return confidence , triggered_rules
183
186
184
187
def run_heuristics (
185
188
self , pypi_package_json : PyPIPackageJsonAsset
@@ -299,9 +302,10 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
299
302
except HeuristicAnalyzerValueError :
300
303
return CheckResultData (result_tables = [], result_type = CheckResultType .UNKNOWN )
301
304
302
- confidence = self .evaluate_heuristic_results (heuristic_results )
305
+ confidence , triggered_rules = self .evaluate_heuristic_results (heuristic_results )
306
+ heuristics_detail_info ["triggered_rules" ] = triggered_rules
303
307
result_type = CheckResultType .FAILED
304
- if confidence is None :
308
+ if not confidence :
305
309
confidence = Confidence .HIGH
306
310
result_type = CheckResultType .PASSED
307
311
@@ -353,51 +357,61 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
353
357
AnomalousVersionAnalyzer ,
354
358
]
355
359
356
- problog_result_access = "result"
357
-
358
360
malware_rules_problog_model = f"""
359
- % Heuristic groupings
361
+ % ----- Wrappers ------
362
+ % These should be used to logically check for a pass or fail on a heuristic for the rest of the model. They exist since,
363
+ % when a heuristic is skipped, it is ommitted from being defined in the ProbLog model, and as such these try_call statements
364
+ % are needed to handle referencing an undefined fact.
365
+ passed(H) :- try_call(H).
366
+ failed(H) :- try_call(not H).
367
+
368
+ % ----- Heuristic groupings -----
360
369
% These are common combinations of heuristics that are used in many of the rules, thus themselves representing
361
370
% certain behaviors. When changing or adding rules here, if there are frequent combinations of particular
362
- % heuristics, group them together here.
371
+ % heuristics, group them together here. Note, these should only be used to check if a grouping statement
372
+ % is true. Evaluating 'not quickUndetailed' would be true if empty project link and closer release join
373
+ % date passed, or if they were both skipped, which is not desired behaviour.
363
374
364
375
% Maintainer has recently joined, publishing an undetailed page with no links.
365
- quickUndetailed :- not { Heuristics .EMPTY_PROJECT_LINK .value } , not { Heuristics .CLOSER_RELEASE_JOIN_DATE .value } .
376
+ quickUndetailed :- failed( { Heuristics .EMPTY_PROJECT_LINK .value } ), failed( { Heuristics .CLOSER_RELEASE_JOIN_DATE .value } ) .
366
377
367
378
% Maintainer releases a suspicious setup.py and forces it to run by omitting a .whl file.
368
- forceSetup :- not { Heuristics .SUSPICIOUS_SETUP .value } , not { Heuristics .WHEEL_ABSENCE .value } .
379
+ forceSetup :- failed( { Heuristics .SUSPICIOUS_SETUP .value } ), failed( { Heuristics .WHEEL_ABSENCE .value } ) .
369
380
370
- % Suspicious Combinations
381
+ % ----- Suspicious Combinations -----
371
382
372
383
% Package released recently with little detail, forcing the setup.py to run.
373
- { Confidence .HIGH .value } ::high :- quickUndetailed, forceSetup, not { Heuristics .ONE_RELEASE .value } .
374
- { Confidence .HIGH .value } ::high :- quickUndetailed, forceSetup, not { Heuristics .HIGH_RELEASE_FREQUENCY .value } .
384
+ { Confidence .HIGH .value } ::result("high_confidence_1") :-
385
+ quickUndetailed, forceSetup, failed({ Heuristics .ONE_RELEASE .value } ).
386
+ { Confidence .HIGH .value } ::result("high_confidence_2") :-
387
+ quickUndetailed, forceSetup, failed({ Heuristics .HIGH_RELEASE_FREQUENCY .value } ).
375
388
376
389
% Package released recently with little detail, with some more refined trust markers introduced: project links,
377
390
% multiple different releases, but there is no source code repository matching it and the setup is suspicious.
378
- { Confidence .HIGH .value } ::high :- not { Heuristics .SOURCE_CODE_REPO .value } ,
379
- not { Heuristics .HIGH_RELEASE_FREQUENCY .value } ,
380
- not { Heuristics .CLOSER_RELEASE_JOIN_DATE .value } ,
381
- { Heuristics .UNCHANGED_RELEASE .value } ,
391
+ { Confidence .HIGH .value } ::result("high_confidence_3") :-
392
+ failed({ Heuristics .SOURCE_CODE_REPO .value } ),
393
+ failed({ Heuristics .HIGH_RELEASE_FREQUENCY .value } ),
394
+ passed({ Heuristics .UNCHANGED_RELEASE .value } ),
395
+ failed({ Heuristics .CLOSER_RELEASE_JOIN_DATE .value } ),
382
396
forceSetup.
383
397
384
398
% Package released recently with little detail, with multiple releases as a trust marker, but frequent and with
385
399
% the same code.
386
- { Confidence .MEDIUM .value } ::medium :- quickUndetailed,
387
- not { Heuristics .HIGH_RELEASE_FREQUENCY .value } ,
388
- not { Heuristics .UNCHANGED_RELEASE .value } ,
389
- { Heuristics .SUSPICIOUS_SETUP .value } .
400
+ { Confidence .MEDIUM .value } ::result("medium_confidence_1") :-
401
+ quickUndetailed,
402
+ failed({ Heuristics .HIGH_RELEASE_FREQUENCY .value } ),
403
+ failed({ Heuristics .UNCHANGED_RELEASE .value } ),
404
+ passed({ Heuristics .SUSPICIOUS_SETUP .value } ).
390
405
391
406
% Package released recently with little detail and an anomalous version number for a single-release package.
392
- { Confidence .MEDIUM .value } ::medium :- quickUndetailed,
393
- not { Heuristics .ONE_RELEASE .value } ,
394
- { Heuristics .WHEEL_ABSENCE .value } ,
395
- not { Heuristics .ANOMALOUS_VERSION .value } .
396
-
397
- { problog_result_access } :- high.
398
- { problog_result_access } :- medium.
399
-
400
- query({ problog_result_access } ).
407
+ { Confidence .MEDIUM .value } ::result("medium_confidence_2") :-
408
+ quickUndetailed,
409
+ failed({ Heuristics .ONE_RELEASE .value } ),
410
+ passed({ Heuristics .WHEEL_ABSENCE .value } ),
411
+ failed({ Heuristics .ANOMALOUS_VERSION .value } ).
412
+
413
+ % ----- Evaluation -----
414
+ query(result(_)).
401
415
"""
402
416
403
417
0 commit comments