File tree Expand file tree Collapse file tree 3 files changed +6
-15
lines changed Expand file tree Collapse file tree 3 files changed +6
-15
lines changed Original file line number Diff line number Diff line change @@ -49,19 +49,10 @@ ehthumbs.db
49
49
Thumbs.db
50
50
51
51
# Data files (these be large treasures that shouldn't go in git)
52
- data /kaggle_so_2023 /
53
- # But keep the zip file for distribution
52
+ # Ignore everything in data/ directory
53
+ data /*
54
+ # But keep the zip file for distribution, arrr!
54
55
! data /kaggle_so_2023_data.zip
55
- data /* .csv
56
- data /* .json
57
- data /* .xlsx
58
-
59
- # Large data files - too big for GitHub's hold
60
- data /kaggle_so_2023_data /
61
- data /kaggle_so_2023_data.zip
62
- data /kaggle_so_2023 /
63
- * .csv
64
- * .zip
65
56
66
57
# Test coverage
67
58
.coverage
Original file line number Diff line number Diff line change @@ -138,8 +138,8 @@ def _setup_data_sources(self):
138
138
discovered = self ._discover_data_sources ()
139
139
140
140
# Configure Stack Overflow 2023 Survey if found
141
- if "kaggle_so_2023 " in discovered :
142
- so_2023 = discovered ["kaggle_so_2023 " ]
141
+ if "kaggle_so_2023_data " in discovered :
142
+ so_2023 = discovered ["kaggle_so_2023_data " ]
143
143
self .register_data_source (
144
144
DataSource (
145
145
name = "stackoverflow_2023" ,
@@ -172,7 +172,7 @@ def _setup_data_sources(self):
172
172
173
173
# Auto-configure other discovered data sources with generic settings
174
174
for dir_name , files_info in discovered .items ():
175
- if dir_name != "kaggle_so_2023" : # Skip already configured ones
175
+ if dir_name not in [ "kaggle_so_2023_data" ] : # Skip already configured ones
176
176
# Try to detect common column patterns by loading a sample
177
177
try :
178
178
sample_df = pd .read_csv (files_info ["data_file" ], nrows = 1 )
You can’t perform that action at this time.
0 commit comments