5858FILTERED_DIR_EXT = { # Folder endings to skip
5959 ".egg-info"
6060}
61- FILTERED_EXT = { # File extensions to skip
61+ FILTERED_EXT = [ # File extensions to skip
6262 ".1" , ".2" , ".3" , ".4" , ".5" , ".6" , ".7" , ".8" , ".9" , ".ac" , ".adoc" , ".am" ,
6363 ".asciidoc" , ".bmp" , ".build" , ".cfg" , ".chm" , ".class" , ".cmake" , ".cnf" ,
6464 ".conf" , ".config" , ".contributors" , ".copying" , ".crt" , ".csproj" , ".css" ,
7878 # File endings
7979 "-doc" , "changelog" , "config" , "copying" , "license" , "authors" , "news" , "licenses" , "notice" ,
8080 "readme" , "swiftdoc" , "texidoc" , "todo" , "version" , "ignore" , "manifest" , "sqlite" , "sqlite3"
81- }
81+ ]
8282FILTERED_FILES = { # Files to skip
8383 "gradlew" , "gradlew.bat" , "mvnw" , "mvnw.cmd" , "gradle-wrapper.jar" , "maven-wrapper.jar" ,
8484 "thumbs.db" , "babel.config.js" , "license.txt" , "license.md" , "copying.lib" , "makefile"
@@ -100,12 +100,17 @@ def __init__(self, wfp: str = None, scan_output: str = None, output_format: str
100100 all_extensions : bool = False , all_folders : bool = False , hidden_files_folders : bool = False ,
101101 scan_options : int = 7 , sc_timeout : int = 600 , sc_command : str = None , grpc_url : str = None ,
102102 obfuscate : bool = False , ignore_cert_errors : bool = False , proxy : str = None , grpc_proxy : str = None ,
103- ca_cert : str = None , pac : PACFile = None , retry : int = 5 , hpsm : bool = False
103+ ca_cert : str = None , pac : PACFile = None , retry : int = 5 , hpsm : bool = False ,
104+ skip_size : int = 0 , skip_extensions = None , skip_folders = None
104105 ):
105106 """
106107 Initialise scanning class, including Winnowing, ScanossApi and ThreadedScanning
107108 """
108109 super ().__init__ (debug , trace , quiet )
110+ if skip_folders is None :
111+ skip_folders = []
112+ if skip_extensions is None :
113+ skip_extensions = []
109114 self .wfp = wfp if wfp else "scanner_output.wfp"
110115 self .scan_output = scan_output
111116 self .output_format = output_format
@@ -117,6 +122,8 @@ def __init__(self, wfp: str = None, scan_output: str = None, output_format: str
117122 self .scan_options = scan_options
118123 self ._skip_snippets = True if not scan_options & ScanType .SCAN_SNIPPETS .value else False
119124 self .hpsm = hpsm
125+ self .skip_folders = skip_folders
126+ self .skip_size = skip_size
120127 ver_details = Scanner .version_details ()
121128
122129 self .winnowing = Winnowing (debug = debug , quiet = quiet , skip_snippets = self ._skip_snippets ,
@@ -143,6 +150,9 @@ def __init__(self, wfp: str = None, scan_output: str = None, output_format: str
143150 self .post_file_count = post_size if post_size > 0 else 32 # Max number of files for any given POST (default 32)
144151 if self ._skip_snippets :
145152 self .max_post_size = 8 * 1024 # 8k Max post size if we're skipping snippets
153+ self .skip_extensions = FILTERED_EXT
154+ if skip_extensions : # Append extra file extensions to skip
155+ self .skip_extensions .extend (skip_extensions )
146156
147157 def __filter_files (self , files : list ) -> list :
148158 """
@@ -160,8 +170,8 @@ def __filter_files(self, files: list) -> list:
160170 if f_lower in FILTERED_FILES : # Check for exact files to ignore
161171 ignore = True
162172 if not ignore :
163- for ending in FILTERED_EXT : # Check for file endings to ignore
164- if f_lower .endswith (ending ):
173+ for ending in self . skip_extensions : # Check for file endings to ignore (static and user supplied)
174+ if ending and f_lower .endswith (ending ):
165175 ignore = True
166176 break
167177 if not ignore :
@@ -181,10 +191,12 @@ def __filter_dirs(self, dirs: list) -> list:
181191 ignore = True
182192 if not ignore and not self .all_folders : # Skip this check if we're allowing all folders
183193 d_lower = d .lower ()
184- if d_lower in FILTERED_DIRS : # Ignore specific folders
194+ if d_lower in FILTERED_DIRS : # Ignore specific folders (case insensitive)
195+ ignore = True
196+ elif self .skip_folders and d in self .skip_folders : # Ignore user-supplied folders (case sensitive)
185197 ignore = True
186198 if not ignore :
187- for de in FILTERED_DIR_EXT : # Ignore specific folder endings
199+ for de in FILTERED_DIR_EXT : # Ignore specific folder endings (case insensitive)
188200 if d_lower .endswith (de ):
189201 ignore = True
190202 break
@@ -385,7 +397,8 @@ def scan_folder(self, scan_dir: str) -> bool:
385397 except Exception as e :
386398 self .print_trace (
387399 f'Ignoring missing symlink file: { file } ({ e } )' ) # Can fail if there is a broken symlink
388- if f_size > 0 : # Ignore broken links and empty files
400+ # Ignore broken links and empty files or if a user-specified size limit is supplied
401+ if f_size > 0 and (self .skip_size <= 0 or f_size > self .skip_size ):
389402 self .print_trace (f'Fingerprinting { path } ...' )
390403 if spinner :
391404 spinner .next ()
@@ -598,7 +611,7 @@ def scan_file(self, file: str) -> bool:
598611 success = False
599612 return success
600613
601- def scan_files (self , files : list [ str ]) -> bool :
614+ def scan_files (self , files : [ ]) -> bool :
602615 """
603616 Scan the specified list of files, producing fingerprints, send to the SCANOSS API and return results
604617 Please note that by providing an explicit list you bypass any exclusions that may be defined on the scanner
@@ -637,7 +650,7 @@ def scan_files(self, files: list[str]) -> bool:
637650 spinner .next ()
638651 wfp = self .winnowing .wfp_for_file (file , file )
639652 if wfp is None or wfp == '' :
640- self .print_stderr (f'Warning: No WFP returned for { path } ' )
653+ self .print_stderr (f'Warning: No WFP returned for { file } ' )
641654 continue
642655 if save_wfps_for_print :
643656 wfp_list .append (wfp )
@@ -681,12 +694,12 @@ def scan_files(self, files: list[str]) -> bool:
681694 if self .threaded_scan :
682695 success = self .__run_scan_threaded (scan_started , file_count )
683696 else :
684- Scanner .print_stderr (f'Warning: No files found to scan in folder : { scan_dir } ' )
697+ Scanner .print_stderr (f'Warning: No files found to scan from : { files } ' )
685698 return success
686699
687- def scan_files_with_options (self , files : list [ str ], deps_file : str = None , file_map : dict = None ) -> bool :
700+ def scan_files_with_options (self , files : [ ], deps_file : str = None , file_map : dict = None ) -> bool :
688701 """
689- Scan the given folder for whatever scaning options that have been configured
702+ Scan the given list of files for whatever scaning options that have been configured
690703 :param files: list of files to scan
691704 :param deps_file: pre-parsed dependency file to decorate
692705 :param file_map: mapping of obfuscated files back into originals
@@ -697,7 +710,7 @@ def scan_files_with_options(self, files: list[str], deps_file: str = None, file_
697710 raise Exception (f"ERROR: Please specify a list of files to scan" )
698711 if not self .is_file_or_snippet_scan ():
699712 raise Exception (f"ERROR: file or snippet scan options have to be set to scan files: { files } " )
700- if self .is_dependency_scan ():
713+ if self .is_dependency_scan () or deps_file :
701714 raise Exception (f"ERROR: The dependency scan option is currently not supported when scanning a list of files" )
702715 if self .scan_output :
703716 self .print_msg (f'Writing results to { self .scan_output } ...' )
@@ -852,26 +865,25 @@ def scan_wfp_with_options(self, wfp: str, deps_file: str, file_map: dict = None)
852865 raise Exception (f"ERROR: Specified WFP file does not exist or is not a file: { wfp_file } " )
853866
854867 if not self .is_file_or_snippet_scan () and not self .is_dependency_scan ():
855- raise Exception (f"ERROR: No scan options defined to scan folder : { scan_dir } " )
868+ raise Exception (f"ERROR: No scan options defined to scan WFP : { wfp } " )
856869
857870 if self .scan_output :
858871 self .print_msg (f'Writing results to { self .scan_output } ...' )
859872 if self .is_dependency_scan ():
860873 if not self .threaded_deps .run (deps_file = deps_file , wait = False ): # Kick off a background dependency scan
861874 success = False
862875 if self .is_file_or_snippet_scan ():
863- if not self .scan_wfp_file_threaded (wfp_file , file_map ):
876+ if not self .scan_wfp_file_threaded (wfp_file ):
864877 success = False
865878 if self .threaded_scan :
866879 if not self .__finish_scan_threaded (file_map ):
867880 success = False
868881 return success
869882
870- def scan_wfp_file_threaded (self , file : str = None , file_map : dict = None ) -> bool :
883+ def scan_wfp_file_threaded (self , file : str = None ) -> bool :
871884 """
872885 Scan the contents of the specified WFP file (threaded)
873886 :param file: WFP file to scan (optional)
874- :param file_map: mapping of obfuscated files back into originals (optional)
875887 return: True if successful, False otherwise
876888 """
877889 success = True
0 commit comments