Skip to content

Commit 26b52e8

Browse files
committed
Ignore files based on name(glob), size and whether they're binary files
1 parent 9c9c397 commit 26b52e8

File tree

11 files changed

+274
-95
lines changed

11 files changed

+274
-95
lines changed

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ After you have Python and (optionally) PostgreSQL installed, follow these steps:
8585
- LLM Provider (OpenAI/Azure/Openrouter)
8686
- Your API key
8787
- database settings: SQLite/PostgreSQL (to change from SQLite to PostgreSQL, just set `DATABASE_TYPE=postgres`)
88-
- optionally set IGNORE_FOLDERS for the folders which shouldn't be tracked by GPT Pilot in workspace, useful to ignore folders created by compilers (i.e. `IGNORE_FOLDERS=folder1,folder2,folder3`)
88+
- optionally set IGNORE_PATHS for the folders which shouldn't be tracked by GPT Pilot in workspace, useful to ignore folders created by compilers (i.e. `IGNORE_PATHS=folder1,folder2,folder3`)
8989
9. `python db_init.py` (initialize the database)
9090
10. `python main.py` (start GPT Pilot)
9191

docker-compose.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ services:
1818
- DB_USER=pilot
1919
- DB_PASSWORD=pilot
2020
# Folders which shouldn't be tracked in workspace (useful to ignore folders created by compiler)
21-
# IGNORE_FOLDERS=folder1,folder2
21+
# IGNORE_PATHS=folder1,folder2
2222
volumes:
2323
- ~/gpt-pilot-workspace:/usr/src/app/workspace
2424
build:

pilot/.env.example

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ MODEL_NAME=gpt-4-1106-preview
1616
MAX_TOKENS=8192
1717

1818
# Folders which shouldn't be tracked in workspace (useful to ignore folders created by compiler)
19-
# IGNORE_FOLDERS=folder1,folder2
19+
# IGNORE_PATHS=folder1,folder2
2020

2121
# Database
2222
# DATABASE_TYPE=postgres

pilot/const/common.py

+13-7
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,7 @@
2121
'finished'
2222
]
2323

24-
additional_ignore_folders = os.environ.get('IGNORE_FOLDERS', '').split(',')
25-
26-
# TODO: rename to IGNORE_PATHS as it also contains files
27-
IGNORE_FOLDERS = [
24+
DEFAULT_IGNORE_PATHS = [
2825
'.git',
2926
'.gpt-pilot',
3027
'.idea',
@@ -36,7 +33,16 @@
3633
'venv',
3734
'dist',
3835
'build',
39-
'target'
40-
] + [folder for folder in additional_ignore_folders if folder]
41-
36+
'target',
37+
"*.min.js",
38+
"*.min.css",
39+
"*.svg",
40+
"*.csv",
41+
]
42+
IGNORE_PATHS = DEFAULT_IGNORE_PATHS + [
43+
folder for folder
44+
in os.environ.get('IGNORE_PATHS', '').split(',')
45+
if folder
46+
]
47+
IGNORE_SIZE_THRESHOLD = 102400 # 100K+ files are ignored by default
4248
PROMPT_DATA_TO_IGNORE = {'directory_tree', 'name'}

pilot/helpers/Project.py

+12-23
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
from const.messages import CHECK_AND_CONTINUE, AFFIRMATIVE_ANSWERS, NEGATIVE_ANSWERS
1010
from utils.style import color_yellow_bold, color_cyan, color_white_bold, color_green
11-
from const.common import IGNORE_FOLDERS, STEPS
11+
from const.common import STEPS
1212
from database.database import delete_unconnected_steps_from, delete_all_app_development_data, update_app_status
1313
from const.ipc import MESSAGE_TYPE
1414
from prompts.prompts import ask_user
@@ -28,6 +28,7 @@
2828
from logger.logger import logger
2929
from utils.dot_gpt_pilot import DotGptPilot
3030
from utils.llm_connection import test_api_access
31+
from utils.ignore import IgnoreMatcher
3132

3233
from utils.telemetry import telemetry
3334

@@ -176,12 +177,7 @@ def get_directory_tree(self, with_descriptions=False):
176177
Returns:
177178
dict: The directory tree.
178179
"""
179-
# files = {}
180-
# if with_descriptions and False:
181-
# files = File.select().where(File.app_id == self.args['app_id'])
182-
# files = {snapshot.name: snapshot for snapshot in files}
183-
# return build_directory_tree_with_descriptions(self.root_path, ignore=IGNORE_FOLDERS, files=files, add_descriptions=False)
184-
return build_directory_tree(self.root_path, ignore=IGNORE_FOLDERS)
180+
return build_directory_tree(self.root_path)
185181

186182
def get_test_directory_tree(self):
187183
"""
@@ -191,7 +187,7 @@ def get_test_directory_tree(self):
191187
dict: The directory tree of tests.
192188
"""
193189
# TODO remove hardcoded path
194-
return build_directory_tree(self.root_path + '/tests', ignore=IGNORE_FOLDERS)
190+
return build_directory_tree(self.root_path + '/tests')
195191

196192
def get_all_coded_files(self):
197193
"""
@@ -209,18 +205,7 @@ def get_all_coded_files(self):
209205
)
210206
)
211207

212-
files = self.get_files([file.path + '/' + file.name for file in files])
213-
214-
# Don't send contents of binary files
215-
for file in files:
216-
if not isinstance(file["content"], str):
217-
file["content"] = f"<<binary file, {len(file['content'])} bytes>>"
218-
219-
# TODO temoprary fix to eliminate files that are not in the project
220-
files = [file for file in files if file['content'] != '']
221-
# TODO END
222-
223-
return files
208+
return self.get_files([file.path + '/' + file.name for file in files])
224209

225210
def get_files(self, files):
226211
"""
@@ -232,16 +217,20 @@ def get_files(self, files):
232217
Returns:
233218
list: A list of files with content.
234219
"""
220+
matcher = IgnoreMatcher(root_path=self.root_path)
235221
files_with_content = []
236222
for file_path in files:
237223
try:
238224
# TODO path is sometimes relative and sometimes absolute - fix at one point
239225
_, full_path = self.get_full_file_path(file_path, file_path)
240226
file_data = get_file_contents(full_path, self.root_path)
241227
except ValueError:
228+
full_path = None
242229
file_data = {"path": file_path, "name": os.path.basename(file_path), "content": ''}
243230

244-
files_with_content.append(file_data)
231+
if full_path and file_data["content"] != "" and not matcher.ignore(full_path):
232+
files_with_content.append(file_data)
233+
245234
return files_with_content
246235

247236
def find_input_required_lines(self, file_content):
@@ -395,7 +384,7 @@ def normalize_path(path: str) -> Tuple[str, str]:
395384

396385

397386
def save_files_snapshot(self, development_step_id):
398-
files = get_directory_contents(self.root_path, ignore=IGNORE_FOLDERS)
387+
files = get_directory_contents(self.root_path)
399388
development_step, created = DevelopmentSteps.get_or_create(id=development_step_id)
400389

401390
total_files = 0
@@ -431,7 +420,7 @@ def restore_files(self, development_step_id):
431420
development_step = DevelopmentSteps.get(DevelopmentSteps.id == development_step_id)
432421
file_snapshots = FileSnapshot.select().where(FileSnapshot.development_step == development_step)
433422

434-
clear_directory(self.root_path, IGNORE_FOLDERS + self.files)
423+
clear_directory(self.root_path, ignore=self.files)
435424
for file_snapshot in file_snapshots:
436425
update_file(file_snapshot.file.full_path, file_snapshot.content, project=self)
437426
if file_snapshot.file.full_path not in self.files:

pilot/helpers/cli.py

+23-43
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
from logger.logger import logger
1212
from utils.style import color_yellow, color_green, color_red, color_yellow_bold
13+
from utils.ignore import IgnoreMatcher
1314
from database.database import get_saved_command_run, save_command_run
1415
from helpers.exceptions.TooDeepRecursionError import TooDeepRecursionError
1516
from helpers.exceptions.TokenLimitError import TokenLimitError
@@ -340,32 +341,41 @@ def check_if_command_successful(convo, command, cli_response, response, exit_cod
340341

341342
return response
342343

343-
def build_directory_tree(path, prefix='', is_root=True, ignore=None):
344+
def build_directory_tree(path, prefix='', root_path=None) -> str:
344345
"""Build the directory tree structure in a simplified format.
345346
346-
Args:
347-
- path: The starting directory path.
348-
- prefix: Prefix for the current item, used for recursion.
349-
- is_root: Flag to indicate if the current item is the root directory.
350-
- ignore: a list of directories to ignore
351-
352-
Returns:
353-
- A string representation of the directory tree.
347+
:param path: The starting directory path.
348+
:param prefix: Prefix for the current item, used for recursion.
349+
:param root_path: The root directory path.
350+
:return: A string representation of the directory tree.
354351
"""
355352
output = ""
356353
indent = ' '
357354

355+
if root_path is None:
356+
root_path = path
357+
358+
matcher = IgnoreMatcher(root_path=root_path)
359+
358360
if os.path.isdir(path):
359-
if is_root:
361+
if root_path == path:
360362
output += '/'
361363
else:
362364
dir_name = os.path.basename(path)
363365
output += f'{prefix}/{dir_name}'
364366

365367
# List items in the directory
366368
items = os.listdir(path)
367-
dirs = [item for item in items if os.path.isdir(os.path.join(path, item)) and item not in ignore]
368-
files = [item for item in items if os.path.isfile(os.path.join(path, item))]
369+
dirs = []
370+
files = []
371+
for item in items:
372+
item_path = os.path.join(path, item)
373+
if matcher.ignore(item_path):
374+
continue
375+
if os.path.isdir(item_path):
376+
dirs.append(item)
377+
elif os.path.isfile(item_path):
378+
files.append(item)
369379
dirs.sort()
370380
files.sort()
371381

@@ -374,7 +384,7 @@ def build_directory_tree(path, prefix='', is_root=True, ignore=None):
374384
for index, dir_item in enumerate(dirs):
375385
item_path = os.path.join(path, dir_item)
376386
new_prefix = prefix + indent # Updated prefix for recursion
377-
output += build_directory_tree(item_path, new_prefix, is_root=False, ignore=ignore)
387+
output += build_directory_tree(item_path, new_prefix, root_path)
378388

379389
if files:
380390
output += f"{prefix} {', '.join(files)}\n"
@@ -387,36 +397,6 @@ def build_directory_tree(path, prefix='', is_root=True, ignore=None):
387397
return output
388398

389399

390-
def res_for_build_directory_tree(path, files=None):
391-
return ' - ' + files[os.path.basename(path)].description + ' ' if files and os.path.basename(path) in files else ''
392-
393-
394-
def build_directory_tree_with_descriptions(path, prefix="", ignore=None, is_last=False, files=None):
395-
"""Build the directory tree structure in tree-like format.
396-
Args:
397-
- path: The starting directory path.
398-
- prefix: Prefix for the current item, used for recursion.
399-
- ignore: List of directory names to ignore.
400-
- is_last: Flag to indicate if the current item is the last in its parent directory.
401-
Returns:
402-
- A string representation of the directory tree.
403-
"""
404-
ignore |= []
405-
if os.path.basename(path) in ignore:
406-
return ""
407-
output = ""
408-
indent = '| ' if not is_last else ' '
409-
# It's a directory, add its name to the output and then recurse into it
410-
output += prefix + f"|-- {os.path.basename(path)}{res_for_build_directory_tree(path, files)}/\n"
411-
if os.path.isdir(path):
412-
# List items in the directory
413-
items = os.listdir(path)
414-
for index, item in enumerate(items):
415-
item_path = os.path.join(path, item)
416-
output += build_directory_tree(item_path, prefix + indent, ignore, index == len(items) - 1, files)
417-
return output
418-
419-
420400
def execute_command_and_check_cli_response(convo, command: dict):
421401
"""
422402
Execute a command and check its CLI response.

pilot/helpers/files.py

+19-13
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from typing import Optional, Union
44

55
from utils.style import color_green
6-
6+
from utils.ignore import IgnoreMatcher
77

88
def update_file(path: str, new_content: Union[str, bytes], project=None):
99
"""
@@ -87,7 +87,8 @@ def get_file_contents(
8787

8888

8989
def get_directory_contents(
90-
directory: str, ignore: Optional[list[str]] = None
90+
directory: str,
91+
ignore: Optional[list[str]] = None,
9192
) -> list[dict[str, Union[str, bytes]]]:
9293
"""
9394
Get the content of all files in the given directory.
@@ -101,19 +102,22 @@ def get_directory_contents(
101102
"""
102103
return_array = []
103104

104-
if ignore is None:
105-
ignore = []
105+
matcher = IgnoreMatcher(ignore, root_path=directory)
106106

107107
# TODO: Convert to use pathlib.Path.walk()
108108
for dpath, dirs, files in os.walk(directory):
109109
# In-place update of dirs so that os.walk() doesn't traverse them
110-
dirs[:] = [d for d in dirs if d not in ignore]
110+
dirs[:] = [
111+
d for d in dirs
112+
if not matcher.ignore(os.path.join(dpath, d))
113+
]
111114

112115
for file in files:
113-
if file in ignore:
116+
full_path = os.path.join(dpath, file)
117+
if matcher.ignore(full_path):
114118
continue
115119

116-
return_array.append(get_file_contents(os.path.join(dpath, file), directory))
120+
return_array.append(get_file_contents(full_path, directory))
117121

118122
return return_array
119123

@@ -125,20 +129,22 @@ def clear_directory(directory: str, ignore: Optional[list[str]] = None):
125129
:param dir_path: Full path to the directory to clear
126130
:param ignore: List of files or folders to ignore (optional)
127131
"""
128-
if ignore is None:
129-
ignore = []
132+
matcher = IgnoreMatcher(ignore, root_path=directory)
130133

131134
# TODO: Convert to use pathlib.Path.walk()
132135
for dpath, dirs, files in os.walk(directory, topdown=True):
133136
# In-place update of dirs so that os.walk() doesn't traverse them
134-
dirs[:] = [d for d in dirs if d not in ignore]
137+
dirs[:] = [
138+
d for d in dirs
139+
if not matcher.ignore(os.path.join(dpath, d))
140+
]
135141

136142
for file in files:
137-
if file in ignore or os.path.join(directory, file) in ignore:
143+
full_path = os.path.join(dpath, file)
144+
if matcher.ignore(full_path):
138145
continue
139146

140-
path = os.path.join(dpath, file)
141-
os.remove(path)
147+
os.remove(full_path)
142148

143149
# Delete empty subdirectories not in ignore list
144150
for d in dirs:

pilot/helpers/test_Project.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -363,7 +363,7 @@ def setup_method(self):
363363
'user_review_goal': 'Test User Review Goal',
364364
}]
365365

366-
# with directories including common.IGNORE_FOLDERS
366+
# with directories including common.IGNORE_PATHS
367367
src = os.path.join(project.root_path, 'src')
368368
foo = os.path.join(project.root_path, 'src/foo')
369369
files_no_folders = os.path.join(foo, 'files_no_folders')

pilot/test/helpers/test_files.py

+4-5
Original file line numberDiff line numberDiff line change
@@ -147,14 +147,13 @@ def test_get_directory_contents_live():
147147
assert isinstance(this_file["content"], str)
148148
assert "test_get_directory_contents_live()" in this_file["content"]
149149

150-
# Check that the Python cache was loaded as a binary file
151-
print("FILES", [(f["path"], f["name"]) for f in files])
152-
pycache_file = [
150+
# Check that the binary file was ignored
151+
image_files = [
153152
f
154153
for f in files
155154
if f["path"] == "helpers" and f["name"] == "testlogo.png"
156-
][0]
157-
assert isinstance(pycache_file["content"], bytes)
155+
]
156+
assert image_files == []
158157

159158
# Check that the ignore list works
160159
assert all(file["name"] != "__init__.py" for file in files)

0 commit comments

Comments
 (0)