5
5
import 'dart:math' as math;
6
6
7
7
import 'package:_pub_shared/search/search_form.dart' ;
8
+ import 'package:bit_array/bit_array.dart' ;
8
9
import 'package:clock/clock.dart' ;
9
10
import 'package:collection/collection.dart' ;
10
11
import 'package:logging/logging.dart' ;
@@ -29,10 +30,9 @@ class InMemoryPackageIndex {
29
30
late final TokenIndex <IndexedApiDocPage > _apiSymbolIndex;
30
31
late final _scorePool = ScorePool (_packageNameIndex._packageNames);
31
32
32
- /// Maps the tag strings to a list of document index values
33
- /// (`PackageDocument doc.tags -> List<_documents.indexOf(doc)>` ).
34
- final _tagDocumentIndices = < String , List <int >> {};
35
- final _documentTagIds = < List <int >> [];
33
+ /// Maps the tag strings to a list of document index values using bit arrays.
34
+ /// - (`PackageDocument doc.tags -> BitArray(List<_documents.indexOf(doc)>)` ).
35
+ final _tagBitArrays = < String , BitArray > {};
36
36
37
37
/// Adjusted score takes the overall score and transforms
38
38
/// it linearly into the [0.4-1.0] range.
@@ -63,12 +63,11 @@ class InMemoryPackageIndex {
63
63
_documentsByName[doc.package] = doc;
64
64
65
65
// transform tags into numberical IDs
66
- final tagIds = < int > [];
67
66
for (final tag in doc.tags) {
68
- _tagDocumentIndices.putIfAbsent (tag, () => []).add (i);
67
+ _tagBitArrays
68
+ .putIfAbsent (tag, () => BitArray (_documents.length))
69
+ .setBit (i);
69
70
}
70
- tagIds.sort ();
71
- _documentTagIds.add (tagIds);
72
71
73
72
final apiDocPages = doc.apiDocPages;
74
73
if (apiDocPages != null ) {
@@ -137,62 +136,54 @@ class InMemoryPackageIndex {
137
136
return PackageSearchResult .empty ();
138
137
}
139
138
return _scorePool.withScore (
140
- value: 1 .0 ,
139
+ value: 0 .0 ,
141
140
fn: (score) {
142
141
return _search (query, score);
143
142
},
144
143
);
145
144
}
146
145
147
146
PackageSearchResult _search (
148
- ServiceSearchQuery query, IndexedScore <String > packageScores) {
149
- // filter on package prefix
150
- if (query.parsedQuery.packagePrefix != null ) {
151
- final String prefix = query.parsedQuery.packagePrefix! .toLowerCase ();
152
- packageScores.retainWhere (
153
- (i, _) => _documents[i].packageNameLowerCased.startsWith (prefix),
154
- );
155
- }
147
+ ServiceSearchQuery query,
148
+ IndexedScore <String > packageScores,
149
+ ) {
150
+ // TODO: implement pooling of this object similarly to [ScorePool].
151
+ final packages = BitArray (_documents.length)
152
+ ..setRange (0 , _documents.length);
156
153
157
154
// filter on tags
158
155
final combinedTagsPredicate =
159
156
query.tagsPredicate.appendPredicate (query.parsedQuery.tagsPredicate);
160
157
if (combinedTagsPredicate.isNotEmpty) {
161
158
for (final entry in combinedTagsPredicate.entries) {
162
- final docIndexes = _tagDocumentIndices[entry.key];
163
-
159
+ final tagBits = _tagBitArrays[entry.key];
164
160
if (entry.value) {
165
- // predicate is required, zeroing the gaps between index values
166
- if (docIndexes == null ) {
167
- // the predicate is required, no document will match it
161
+ if (tagBits == null ) {
162
+ // the predicate is not matched by any document
168
163
return PackageSearchResult .empty ();
169
164
}
170
-
171
- for (var i = 0 ; i < docIndexes.length; i++ ) {
172
- if (i == 0 ) {
173
- packageScores.fillRange (0 , docIndexes[i], 0.0 );
174
- continue ;
175
- }
176
- packageScores.fillRange (docIndexes[i - 1 ] + 1 , docIndexes[i], 0.0 );
177
- }
178
- packageScores.fillRange (docIndexes.last + 1 , _documents.length, 0.0 );
165
+ packages.and (tagBits);
179
166
} else {
180
- // predicate is prohibited, zeroing the values
181
-
182
- if (docIndexes == null ) {
183
- // the predicate is prohibited, no document has it, always a match
167
+ if (tagBits == null ) {
168
+ // negative predicate without index means all document is matched
184
169
continue ;
185
170
}
186
- for (final i in docIndexes) {
187
- packageScores.setValue (i, 0.0 );
188
- }
171
+ packages.andNot (tagBits);
189
172
}
190
173
}
191
174
}
192
175
176
+ // filter on package prefix
177
+ if (query.parsedQuery.packagePrefix != null ) {
178
+ final prefix = query.parsedQuery.packagePrefix! .toLowerCase ();
179
+ packages.clearWhere (
180
+ (i) => ! _documents[i].packageNameLowerCased.startsWith (prefix),
181
+ );
182
+ }
183
+
193
184
// filter on dependency
194
185
if (query.parsedQuery.hasAnyDependency) {
195
- packageScores. removeWhere ((i, _ ) {
186
+ packages. clearWhere ((i) {
196
187
final doc = _documents[i];
197
188
if (doc.dependencies.isEmpty) return true ;
198
189
for (final dependency in query.parsedQuery.allDependencies) {
@@ -208,22 +199,29 @@ class InMemoryPackageIndex {
208
199
209
200
// filter on points
210
201
if (query.minPoints != null && query.minPoints! > 0 ) {
211
- packageScores. removeWhere (
212
- (i, _ ) => _documents[i].grantedPoints < query.minPoints! );
202
+ packages
203
+ . clearWhere ((i ) => _documents[i].grantedPoints < query.minPoints! );
213
204
}
214
205
215
206
// filter on updatedDuration
216
207
final updatedDuration = query.parsedQuery.updatedDuration;
217
208
if (updatedDuration != null && updatedDuration > Duration .zero) {
218
209
final now = clock.now ();
219
- packageScores.removeWhere (
220
- (i, _) => now.difference (_documents[i].updated) > updatedDuration);
210
+ packages.clearWhere (
211
+ (i) => now.difference (_documents[i].updated) > updatedDuration);
212
+ }
213
+
214
+ // TODO: find a better way to handle predicate-only filtering and scoring
215
+ for (final index in packages.asIntIterable ()) {
216
+ if (index >= _documents.length) break ;
217
+ packageScores.setValue (index, 1.0 );
221
218
}
222
219
223
220
// do text matching
224
221
final parsedQueryText = query.parsedQuery.text;
225
222
final textResults = _searchText (
226
223
packageScores,
224
+ packages,
227
225
parsedQueryText,
228
226
includeNameMatches: (query.offset ?? 0 ) == 0 ,
229
227
textMatchExtent: query.textMatchExtent ?? TextMatchExtent .api,
@@ -334,6 +332,7 @@ class InMemoryPackageIndex {
334
332
335
333
_TextResults ? _searchText (
336
334
IndexedScore <String > packageScores,
335
+ BitArray packages,
337
336
String ? text, {
338
337
required bool includeNameMatches,
339
338
required TextMatchExtent textMatchExtent,
@@ -345,12 +344,14 @@ class InMemoryPackageIndex {
345
344
final sw = Stopwatch ()..start ();
346
345
final words = splitForQuery (text);
347
346
if (words.isEmpty) {
347
+ // packages.clearAll();
348
348
packageScores.fillRange (0 , packageScores.length, 0 );
349
349
return _TextResults .empty ();
350
350
}
351
351
352
352
final matchName = textMatchExtent.shouldMatchName ();
353
353
if (! matchName) {
354
+ // packages.clearAll();
354
355
packageScores.fillRange (0 , packageScores.length, 0 );
355
356
return _TextResults .empty (
356
357
errorMessage:
@@ -373,12 +374,6 @@ class InMemoryPackageIndex {
373
374
nameMatches.add (text);
374
375
}
375
376
376
- // Multiple words are scored separately, and then the individual scores
377
- // are multiplied. We can use a package filter that is applied after each
378
- // word to reduce the scope of the later words based on the previous results.
379
- /// However, API docs search should be filtered on the original list.
380
- final indexedPositiveList = packageScores.toIndexedPositiveList ();
381
-
382
377
final matchDescription = textMatchExtent.shouldMatchDescription ();
383
378
final matchReadme = textMatchExtent.shouldMatchReadme ();
384
379
final matchApi = textMatchExtent.shouldMatchApi ();
@@ -419,7 +414,7 @@ class InMemoryPackageIndex {
419
414
if (value < 0.01 ) continue ;
420
415
421
416
final doc = symbolPages.keys[i];
422
- if (! indexedPositiveList [doc.index]) continue ;
417
+ if (! packages [doc.index]) continue ;
423
418
424
419
// skip if the previously found pages are better than the current one
425
420
final pages =
0 commit comments