-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfile_enumerator.py
101 lines (80 loc) · 3.58 KB
/
file_enumerator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# /file_enumerator.py
import os
import argparse
import fnmatch
import magic
import re
def load_ignore_patterns(directory):
ignore_patterns = []
ignore_files = ['.gitignore', '.dockerignore']
for ignore_file in ignore_files:
ignore_path = os.path.join(directory, ignore_file)
if os.path.exists(ignore_path):
with open(ignore_path, 'r') as f:
ignore_patterns.extend(line.strip() for line in f if line.strip() and not line.startswith('#'))
# Add pattern to ignore .git directory
ignore_patterns.append('.git/')
return ignore_patterns
def should_ignore(path, base_path, ignore_patterns):
rel_path = os.path.relpath(path, base_path)
# Ignore .git directory and its contents
if '.git' in rel_path.split(os.sep):
return True
for pattern in ignore_patterns:
if pattern.endswith('/'):
if fnmatch.fnmatch(rel_path + '/', pattern) or fnmatch.fnmatch(os.path.dirname(rel_path) + '/', pattern):
return True
elif fnmatch.fnmatch(rel_path, pattern):
return True
return False
def is_text_file(file_path):
try:
mime = magic.Magic(mime=True)
file_type = mime.from_file(file_path)
# Debug statement to print the MIME type
# print(f"Debug: {file_path} MIME type: {file_type}")
# Check if it's a text file based on MIME type
if file_type.startswith('text/'):
return True
# Additional checks for specific file types
if file_type in ['application/json', 'application/xml', 'application/x-yaml', 'application/javascript']:
return True
return False
except Exception as e:
print(f"Error checking file type: {e}")
return False
def enumerate_files(directory):
ignore_patterns = load_ignore_patterns(directory)
for root, dirs, files in os.walk(directory):
dirs[:] = [d for d in dirs if not should_ignore(os.path.join(root, d), directory, ignore_patterns)]
for file in files:
file_path = os.path.join(root, file)
if not should_ignore(file_path, directory, ignore_patterns):
if is_text_file(file_path):
relative_path = os.path.relpath(file_path, directory)
print(f"File: {relative_path}")
print("-" * 40)
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
# Remove non-printable characters
content = re.sub(r'[^\x20-\x7E\n\r\t]', '', content)
print(content)
except UnicodeDecodeError:
print(f"Error: Unable to decode file as UTF-8")
except Exception as e:
print(f"Error reading file: {e}")
print("-" * 40)
print("-" * 40)
print()
def main():
parser = argparse.ArgumentParser(description="Directory File Enumerator with Ignore Support")
parser.add_argument("directory", nargs='?', default=os.getcwd(), help="Path to the directory to enumerate (default: current directory)")
args = parser.parse_args()
directory = os.path.abspath(args.directory)
if not os.path.isdir(directory):
print(f"Error: {directory} is not a valid directory")
return
enumerate_files(directory)
if __name__ == "__main__":
main()