-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathauto_gen_files.py
111 lines (90 loc) · 4.75 KB
/
auto_gen_files.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import re
import os
def snake_to_camel(word):
# "indexOf".capitalize() Indexof
# "URL".capitalize() Url
return ''.join(x.capitalize() for x in word.split('_'))
if __name__ == '__main__':
p_detector = re.compile(r'class\s*([^\s()]+)\(Detector\)')
p = re.compile(r'[\'"]([A-Z0-9]+_+[A-Z_0-9]+)[\'"]')
detector_path = os.path.join(os.getcwd(), 'codegex/detect')
all_pattern_set = set() # Pattern names of Codegex, including synthetic patterns
# ========= gen_detectors.py is used to register detectors, i.e. let engine knows the detectors in
file_names = list() # File names of detectors in Codegex, used to update gen_detectors.py
detector_names = list() # class names of detectors in Codegex, used to update gen_detectors.py
import_list = list() # used to update the imports of gen_detectors.py
for filename in os.listdir(detector_path):
path = os.path.join(detector_path, filename)
if not os.path.isfile(path) or filename == '__init__.py':
continue
strip_filename = filename[:-3] # Remove '.py'
camel_name = snake_to_camel(strip_filename)
file_names.append(camel_name)
import_stmt = None
with open(path, 'r') as f:
content = f.read()
pattern_list = p.findall(content)
if pattern_list:
all_pattern_set.update(pattern_list)
detectors = p_detector.findall(content)
if detectors:
tmp_str = ', '.join(detectors)
import_stmt = f'from codegex.detect.{strip_filename} import {tmp_str}\n'
for name in detectors:
detector_names.append(f'"{name}": {name}')
if import_stmt:
import_list.append(import_stmt)
# print('[Number of detector files in Codegex]', len(file_names))
# print('[Number of original patterns in Codegex]', len(all_pattern_set))
# print('[Number of detector classes in codegex]', len(detector_names))
# print(all_pattern_set)
# print visitors which is used to config SpotBugs maven plugins to decide to run detectors in which files
visitors = list()
for name in file_names:
if name == 'FindSelfAssignment':
visitors.append('FindFieldSelfAssignment')
visitors.append('FindLocalSelfAssignment2')
elif name == 'FindSelfComparison':
visitors.append('FindSelfComparison')
visitors.append('FindSelfComparison2')
elif name == 'FindBadCast':
visitors.append('FindBadCast2')
elif name == 'UrlProblems': # "URL".capitalize() 返回 "Url";该模式也在 DumbMethods.java 出现
visitors.append('URLProblems')
else:
visitors.append(name)
# DMI_DOH 出现在 FindRefComparison 和 DumbMethods 里
visitors.append('TestASM') # NM_METHOD_NAMING_CONVENTION 出现在 TestASM.java 和 Naming.java 里
with open('visitor.txt', 'w') as out:
out.write(','.join(visitors))
# # This writing operation should be enabled only when the old gen_detectors.py is not maintained after implementing new patterns
# with open('gen_detectors.py', 'w') as f:
# if import_list:
# f.writelines(import_list)
# if detector_names:
# f.write('\nDETECTOR_DICT = {\n ' + ',\n '.join(detector_names) + '\n}')
# patterns_to_write matches codegex patterns to spotbugs patterns, and is used to generate warning filter files in comparison experiment with spotbugs
patterns_to_write = set()
for pattern in all_pattern_set:
if pattern == 'SA_SELF_ASSIGNMENT':
patterns_to_write.add('SA_FIELD_SELF_ASSIGNMENT')
patterns_to_write.add('SA_LOCAL_SELF_ASSIGNMENT')
elif pattern == 'SA_SELF_COMPARISON':
patterns_to_write.add('SA_FIELD_SELF_COMPARISON')
patterns_to_write.add('SA_LOCAL_SELF_COMPARISON')
elif pattern == 'SA_SELF_COMPUTATION':
patterns_to_write.add('SA_FIELD_SELF_COMPUTATION')
patterns_to_write.add('SA_LOCAL_SELF_COMPUTATION')
elif pattern == 'SA_DOUBLE_ASSIGNMENT':
patterns_to_write.add('SA_FIELD_DOUBLE_ASSIGNMENT')
patterns_to_write.add('SA_LOCAL_DOUBLE_ASSIGNMENT')
else:
patterns_to_write.add(pattern)
print(f'Number of implemented SpotBugs patterns by Codegex = {len(patterns_to_write)}')
print('\n'.join(list(patterns_to_write)))
# Generate warning filter files used in comparison experiment with spotbugs
with open('spotbugs-includeFilter.xml', 'w') as f:
f.write('<FindBugsFilter>\n')
for pattern in patterns_to_write:
f.write(f'\t<Match>\n\t\t<Bug pattern="{pattern}"/>\n\t</Match>\n')
f.write('</FindBugsFilter>')