fix: iteration/matchidx for incremental scan (refactoring reducer)

dobin · Aug 29, 2023 · 94319d0 · 94319d0
1 parent a184161
commit 94319d0
Show file tree

Hide file tree

Showing 20 changed files with 88 additions and 76 deletions.
diff --git a/avred.py b/avred.py
@@ -24,9 +24,10 @@
 from plugins.dotnet.plugin_dotnet import PluginDotNet
 from plugins.pe.plugin_pe import PluginPe
 from plugins.office.plugin_office import PluginOffice
-from plugins.model import Plugin
+from model.plugin_model import Plugin
 from verifier import verify
 from minimizer import minimizeMatches
+from reducer import Reducer
 
 
 def handler(signum, frame):
@@ -132,6 +133,7 @@ def handleFile(filename, args, serverName):
         logging.error("Unknown filetype, aborting")
         exit(1)
 
+    # scanner is the connection to the AV-oracle
     scanner = None
     # load existing outcome
     if os.path.exists(filenameOutcome):
@@ -157,7 +159,6 @@ def handleFile(filename, args, serverName):
 
         fileInfo = getFileInfo(file)
         outcome = Outcome.nullOutcome(fileInfo)
-        analyzerOptions['scanSpeed'] = ScanSpeed(args.scanspeed)
 
     hashCache.load()
     # scan
@@ -191,6 +192,7 @@ def handleFile(filename, args, serverName):
         isDetected = True  # we now know that the file is being detected
         filePlay = deepcopy(file)  # leave original unmodified, apply matches for iterative scanning here
         iteration = 0
+        reducer = Reducer(filePlay, scanner, ScanSpeed(args.scanspeed), iteration=iteration)
         MAX_ITERATIONS = 6
         while isDetected:
             if iteration > MAX_ITERATIONS:
@@ -199,7 +201,7 @@ def handleFile(filename, args, serverName):
 
             # get matches
             logging.info("Scanning for matches...")
-            matches, scanInfo = plugin.analyzeFile(filePlay, scanner, iteration, analyzerOptions)
+            matches, scanInfo = plugin.analyzeFile(filePlay, scanner, reducer, analyzerOptions)
             outcome.matches += matches
             logging.info("Result: {} matches".format(len(matches)))
             outcome.scanInfo = scanInfo
@@ -213,11 +215,13 @@ def handleFile(filename, args, serverName):
                 logging.info("Still detected on iteration {}, apply {} matches and do again".format(
                     iteration, len(matches)
                 ))
+                iteration += 1
+                # not really necessary to create a new reducer
+                # but it is tho (reset scan chunk size and similar)
+                reducer = Reducer(filePlay, scanner, ScanSpeed(args.scanspeed), iteration=iteration)
             else:
                break
 
-            iteration += 1
-
         outcome.sections = filePlay.sectionsBag.sections
 
     hashCache.save()

diff --git a/filehelper.py b/filehelper.py
@@ -4,7 +4,7 @@
 from typing import List, Set, Dict, Tuple, Optional
 from enum import Enum
 
-from plugins.model import BaseFile
+from model.file_model import BaseFile
 from model.model_base import FileInfo
 
 

diff --git a/minimizer.py b/minimizer.py
@@ -1,7 +1,7 @@
 from typing import List
 from copy import deepcopy
 
-from plugins.model import BaseFile
+from model.file_model import BaseFile
 from model.model_base import Match, ScanSpeed
 from scanner import Scanner
 from reducer import Reducer

diff --git a/plugins/model.py → model/file_model.py b/plugins/model.py → model/file_model.py
@@ -1,12 +1,7 @@
-from dataclasses import dataclass
 import os
 import copy
-from abc import abstractmethod
-from typing import List, Tuple, Set
 
 from model.model_data import Match, Data
-from model.model_verification import MatchConclusion
-from model.model_base import Scanner, OutflankPatch, ScanInfo
 
 
 class BaseFile():
@@ -57,26 +52,3 @@ def loadFromMem(self, data: bytes, filename: str) -> bool:
         self.filename = filename
         self.fileData = Data(data)
         return self.parseFile()
-
-
-class Plugin():
-    def __init__(self):
-        pass
-
-    @abstractmethod
-    def makeFile(self, filepath: str):
-        pass
-
-    @abstractmethod
-    def analyzeFile(self, file: BaseFile, scanner: Scanner, iteration: int = 0, analyzerOptions={}) -> Tuple[Match, ScanInfo]:
-        pass
-
-    @abstractmethod
-    def augmentMatches(self, file: BaseFile, matches: List[Match]) -> str:
-        pass
-
-    @abstractmethod
-    def outflankFile(
-        self, filePe: BaseFile, matches: List[Match], matchConclusion: MatchConclusion, scanner: Scanner = None
-    ) -> List[OutflankPatch]:
-        pass
diff --git a/model/plugin_model.py b/model/plugin_model.py
@@ -0,0 +1,31 @@
+from abc import abstractmethod
+from typing import List, Tuple, Set
+
+from model.model_data import Match, Data
+from model.model_verification import MatchConclusion
+from model.model_base import Scanner, OutflankPatch, ScanInfo
+from reducer import Reducer
+from model.file_model import BaseFile
+
+
+class Plugin():
+    def __init__(self):
+        pass
+
+    @abstractmethod
+    def makeFile(self, filepath: str):
+        pass
+
+    @abstractmethod
+    def analyzeFile(self, file: BaseFile, scanner: Scanner, reducer: Reducer, analyzerOptions={}) -> Tuple[Match, ScanInfo]:
+        pass
+
+    @abstractmethod
+    def augmentMatches(self, file: BaseFile, matches: List[Match]) -> str:
+        pass
+
+    @abstractmethod
+    def outflankFile(
+        self, filePe: BaseFile, matches: List[Match], matchConclusion: MatchConclusion, scanner: Scanner = None
+    ) -> List[OutflankPatch]:
+        pass
diff --git a/plugins/dotnet/plugin_dotnet.py b/plugins/dotnet/plugin_dotnet.py
@@ -3,11 +3,13 @@
 from model.model_base import Scanner, OutflankPatch, ScanInfo
 from model.model_data import Match
 from model.model_verification import MatchConclusion
-from plugins.model import Plugin, BaseFile
+from model.plugin_model import Plugin
+from model.file_model import BaseFile
 from plugins.pe.analyzer_pe import analyzeFilePe
 from plugins.dotnet.augment_dotnet import augmentFileDotnet
 from plugins.dotnet.outflank_dotnet import outflankDotnet
 from plugins.pe.file_pe import FilePe
+from reducer import Reducer
 
 
 class PluginDotNet(Plugin):
@@ -18,9 +20,9 @@ def makeFile(self, filepath: str):
         return file
 
 
-    def analyzeFile(self, file: BaseFile, scanner: Scanner, iteration: int = 0, analyzerOptions={}) -> Tuple[Match, ScanInfo]:
+    def analyzeFile(self, file: BaseFile, scanner: Scanner, reducer: Reducer, analyzerOptions={}) -> Tuple[Match, ScanInfo]:
         # We use the simple PE analyzer
-        return analyzeFilePe(file, scanner, iteration, analyzerOptions)
+        return analyzeFilePe(file, scanner, reducer, analyzerOptions)
 
 
     def augmentFile(self, file: BaseFile, matches: List[Match]) -> str:

diff --git a/plugins/office/analyzer_office.py b/plugins/office/analyzer_office.py
@@ -10,10 +10,9 @@
 from plugins.office.file_office import FileOffice
 
 
-def analyzeFileWord(fileOffice: FileOffice, scanner: Scanner, analyzerOptions={}) -> Tuple[Match, ScanInfo]:
+def analyzeFileWord(fileOffice: FileOffice, scanner: Scanner, reducer: Reducer, analyzerOptions={}) -> Tuple[Match, ScanInfo]:
     # Scans a office file given with fileOffice with Scanner scanner. 
     # Returns all matches.
-    reducer = Reducer(fileOffice, scanner)
     scanInfo = ScanInfo(scanner.scanner_name, ScanSpeed.Normal)
 
     timeStart = time.time()

diff --git a/plugins/office/file_office.py b/plugins/office/file_office.py
@@ -5,7 +5,7 @@
 from typing import List
 
 from model.model_data import Data
-from plugins.model import BaseFile
+from model.file_model import BaseFile
 
 MAKRO_PATH = 'word/vbaProject.bin'
 

diff --git a/plugins/office/plugin_office.py b/plugins/office/plugin_office.py
@@ -1,12 +1,13 @@
-from plugins.model import Plugin, BaseFile
+from model.plugin_model import Plugin
 from model.model_base import Scanner, Match, OutflankPatch, ScanInfo
 from model.model_data import Match
 from model.model_verification import MatchConclusion
 from typing import List, Tuple, Set
-
+from reducer import Reducer
 from plugins.office.analyzer_office import analyzeFileWord
 from plugins.office.augment_office import augmentFileWord
 from plugins.office.file_office import FileOffice
+from model.file_model import BaseFile
 
 
 class PluginOffice(Plugin):
@@ -17,9 +18,9 @@ def makeFile(self, filepath: str):
         return file
 
 
-    def analyzeFile(self, file: BaseFile, scanner: Scanner, iteration: int = 0, analyzerOptions={}) -> Tuple[Match, ScanInfo]:
+    def analyzeFile(self, file: BaseFile, scanner: Scanner, reducer: Reducer, analyzerOptions={}) -> Tuple[Match, ScanInfo]:
         # We use the simple PE analyzer
-        return analyzeFileWord(file, scanner, iteration, analyzerOptions)
+        return analyzeFileWord(file, scanner, reducer, analyzerOptions)
 
 
     def augmentFile(self, file: BaseFile, matches: List[Match]) -> str:

diff --git a/plugins/pe/analyzer_pe.py b/plugins/pe/analyzer_pe.py
@@ -13,14 +13,13 @@
 from scanning import scanIsHash
 
 
-def analyzeFilePe(filePe: FilePe, scanner: Scanner, iteration: int = 0, analyzerOptions={}) -> Tuple[Match, ScanInfo]:
+def analyzeFilePe(filePe: FilePe, scanner: Scanner, reducer: Reducer, analyzerOptions={}) -> Tuple[Match, ScanInfo]:
     """Scans a PE file given with filePe with Scanner scanner. Returns all matches and ScanInfo"""
     isolate = analyzerOptions.get("isolate", False)
     scanSpeed = analyzerOptions.get("scanSpeed", ScanSpeed.Normal)
     scanInfo = ScanInfo(scanner.scanner_name, scanSpeed)
 
     # prepare the reducer with the file
-    reducer = Reducer(filePe, scanner, scanSpeed)
     timeStart = time.time()
     matches, scanPipe = scanForMatchesInPe(filePe, scanner, reducer, isolate)
     scanInfo.scanDuration = round(time.time() - timeStart)

diff --git a/plugins/pe/file_pe.py b/plugins/pe/file_pe.py
@@ -4,7 +4,7 @@
 import inspect
 
 from model.model_code import Section, SectionsBag
-from plugins.model import BaseFile
+from model.file_model import BaseFile
 
 from dotnetfile import DotNetPE
 from dotnetfile.util import FileLocation

diff --git a/plugins/pe/plugin_pe.py b/plugins/pe/plugin_pe.py
@@ -1,4 +1,4 @@
-from plugins.model import Plugin, BaseFile
+from model.plugin_model import Plugin
 from model.model_base import Scanner, OutflankPatch, ScanInfo
 from model.model_data import Match
 from model.model_verification import MatchConclusion
@@ -9,6 +9,8 @@
 from plugins.pe.augment_pe import augmentFilePe
 from plugins.pe.outflank_pe import outflankPe
 from plugins.pe.file_pe import FilePe
+from reducer import Reducer
+from model.file_model import BaseFile
 
 
 class PluginPe(Plugin):
@@ -19,9 +21,9 @@ def makeFile(self, filepath: str):
         return file
 
 
-    def analyzeFile(self, file: BaseFile, scanner: Scanner, iteration: int = 0, analyzerOptions={}) -> Tuple[Match, ScanInfo]:
+    def analyzeFile(self, file: BaseFile, scanner: Scanner, reducer: Reducer, analyzerOptions={}) -> Tuple[Match, ScanInfo]:
         # We use the simple PE analyzer
-        return analyzeFilePe(file, scanner, iteration, analyzerOptions)
+        return analyzeFilePe(file, scanner, reducer, analyzerOptions)
 
 
     def augmentFile(self, file: BaseFile, matches: List[Match]) -> str:

diff --git a/plugins/plain/analyzer_plain.py b/plugins/plain/analyzer_plain.py
@@ -9,8 +9,7 @@
 
 
 # no PE file, just check its content
-def analyzeFilePlain(filePlain: FilePlain, scanner, iteration, analyzerOptions) -> Tuple[Match, ScanInfo]:
-    reducer = Reducer(filePlain, scanner)
+def analyzeFilePlain(filePlain: FilePlain, scanner, reducer, analyzerOptions) -> Tuple[Match, ScanInfo]:
     scanInfo = ScanInfo(scanner.scanner_name, ScanSpeed.Normal)
 
     timeStart = time.time()

diff --git a/plugins/plain/file_plain.py b/plugins/plain/file_plain.py
@@ -1,6 +1,6 @@
 import logging
 import os
-from plugins.model import BaseFile
+from model.file_model import BaseFile
 
 
 class FilePlain(BaseFile):

diff --git a/plugins/plain/plugin_plain.py b/plugins/plain/plugin_plain.py
@@ -1,9 +1,10 @@
-from plugins.model import Plugin, BaseFile
+from model.plugin_model import Plugin
 from model.model_base import Scanner, OutflankPatch, ScanInfo
 from model.model_data import Match
 from model.model_verification import MatchConclusion
+from model.file_model import BaseFile
 from typing import List, Tuple, Set
-
+from reducer import Reducer
 from plugins.plain.analyzer_plain import analyzeFilePlain
 from plugins.plain.file_plain import FilePlain
 
@@ -16,9 +17,9 @@ def makeFile(self, filepath: str):
         return file
 
 
-    def analyzeFile(self, file: BaseFile, scanner: Scanner, iteration: int = 0, analyzerOptions={}) -> Tuple[Match, ScanInfo]:
+    def analyzeFile(self, file: BaseFile, scanner: Scanner, reducer: Reducer, analyzerOptions={}) -> Tuple[Match, ScanInfo]:
         # We use the simple PE analyzer
-        return analyzeFilePlain(file, scanner, iteration, analyzerOptions)
+        return analyzeFilePlain(file, scanner, reducer, analyzerOptions)
 
 
     def augmentFile(self, file: BaseFile, matches: List[Match]) -> str:

diff --git a/reducer.py b/reducer.py
@@ -6,7 +6,7 @@
 
 from model.model_base import Scanner, ScanSpeed
 from model.model_data import Data, Match
-from plugins.model import BaseFile
+from model.file_model import BaseFile
 
 from myutils import *
 
@@ -16,14 +16,14 @@
 class Reducer():
     """Reducer will scan data in file with scanner, and return List of matches"""
 
-    def __init__(self, file: BaseFile, scanner: Scanner, scanSpeed=ScanSpeed.Normal, iteration: int = 0):
+    def __init__(self, file: BaseFile, scanner: Scanner, iteration: int = 0, scanSpeed=ScanSpeed.Normal):
         self.file: BaseFile = file
         self.scanner: Scanner = scanner
         self.scanSpeed: ScanSpeed = scanSpeed
+        self.iteration: int = iteration
 
         self.matchesAdded: int = 0
         self.chunks_tested: int = 0
-        self.iteration: int = iteration
         self.matchIdx: int = 0
 
         self.minMatchSize: int = 4

diff --git a/scanning.py b/scanning.py
@@ -1,5 +1,5 @@
 from model.model_data import Data
-from plugins.model import BaseFile
+from model.file_model import BaseFile
 import logging
 
 
@@ -23,7 +23,6 @@ def scanIsHash(file: BaseFile, scanner, start=0, size=0) -> bool:
         d: Data = file.DataCopy()
         d.patchDataFill(offset, 1)
         detected = scanner.scannerDetectsBytes(d.getBytes(), file.filename)
-        logging.info("CheckHash: Offset:{} -> Detected:{}".format(offset, detected))
         scanResults.append(detected)
 
     # if all modifications result in not-detected, its hash based

diff --git a/tests/test_model.py b/tests/test_model.py
@@ -1,5 +1,5 @@
 import unittest
-from plugins.model import BaseFile
+from model.file_model import BaseFile
 
 
 class ModelTest(unittest.TestCase):