Skip to content

Commit f4cf87f

Browse files
authored
Reduce memory usage during index construction. (#8834)
1 parent 3235aa1 commit f4cf87f

File tree

5 files changed

+67
-41
lines changed

5 files changed

+67
-41
lines changed

app/bin/tools/isolate_search_benchmark.dart

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// for details. All rights reserved. Use of this source code is governed by a
33
// BSD-style license that can be found in the LICENSE file.
44

5+
import 'dart:io';
56
import 'dart:math';
67

78
import 'package:_pub_shared/search/search_form.dart';
@@ -23,13 +24,15 @@ final queries = [
2324
];
2425

2526
Future<void> main(List<String> args) async {
26-
print('Loading...');
27+
print('Started. Current memory: ${ProcessInfo.currentRss ~/ 1024} KiB, '
28+
'max memory: ${ProcessInfo.maxRss ~/ 1024} KiB');
2729
final primaryRunner = await startSearchIsolate(snapshot: args.first);
2830
final reducedRunner = await startSearchIsolate(
2931
snapshot: args.first,
3032
removeTextContent: true,
3133
);
32-
print('Loaded.');
34+
print('Loaded. Current memory: ${ProcessInfo.currentRss ~/ 1024} KiB, '
35+
'max memory: ${ProcessInfo.maxRss ~/ 1024} KiB');
3336

3437
for (var i = 0; i < 5; i++) {
3538
await _benchmark(primaryRunner, primaryRunner);
@@ -39,6 +42,8 @@ Future<void> main(List<String> args) async {
3942

4043
await primaryRunner.close();
4144
await reducedRunner.close();
45+
print('Done. Current memory: ${ProcessInfo.currentRss ~/ 1024} KiB, '
46+
'max memory: ${ProcessInfo.maxRss ~/ 1024} KiB');
4247
}
4348

4449
Future<void> _benchmark(IsolateRunner primary, IsolateRunner reduced) async {

app/bin/tools/sdk_search_benchmark.dart

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,17 @@
22
// for details. All rights reserved. Use of this source code is governed by a
33
// BSD-style license that can be found in the LICENSE file.
44

5+
import 'dart:io';
6+
57
import 'package:pub_dev/search/sdk_mem_index.dart';
68

79
/// Loads a Dart SDK search snapshot and executes queries on it, benchmarking their total time to complete.
810
Future<void> main() async {
11+
print('Started. Current memory: ${ProcessInfo.currentRss ~/ 1024} KiB, '
12+
'max memory: ${ProcessInfo.maxRss ~/ 1024} KiB');
913
final index = await createSdkMemIndex();
14+
print('Loaded. Current memory: ${ProcessInfo.currentRss ~/ 1024} KiB, '
15+
'max memory: ${ProcessInfo.maxRss ~/ 1024} KiB');
1016

1117
// NOTE: please add more queries to this list, especially if there is a performance bottleneck.
1218
final queries = [
@@ -25,4 +31,6 @@ Future<void> main() async {
2531
}
2632
sw.stop();
2733
print('${(sw.elapsedMilliseconds / count).toStringAsFixed(2)} ms/request');
34+
print('Done. Current memory: ${ProcessInfo.currentRss ~/ 1024} KiB, '
35+
'max memory: ${ProcessInfo.maxRss ~/ 1024} KiB');
2836
}

app/bin/tools/search_benchmark.dart

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,21 @@
22
// for details. All rights reserved. Use of this source code is governed by a
33
// BSD-style license that can be found in the LICENSE file.
44

5+
import 'dart:async';
6+
import 'dart:io';
7+
58
import 'package:_pub_shared/search/search_form.dart';
69
import 'package:pub_dev/search/search_service.dart';
710
import 'package:pub_dev/search/updater.dart';
811

912
/// Loads a search snapshot and executes queries on it, benchmarking their total time to complete.
1013
Future<void> main(List<String> args) async {
14+
print('Started. Current memory: ${ProcessInfo.currentRss ~/ 1024} KiB, '
15+
'max memory: ${ProcessInfo.maxRss ~/ 1024} KiB');
1116
// Assumes that the first argument is a search snapshot file.
1217
final index = await loadInMemoryPackageIndexFromFile(args.first);
18+
print('Loaded. Current memory: ${ProcessInfo.currentRss ~/ 1024} KiB, '
19+
'max memory: ${ProcessInfo.maxRss ~/ 1024} KiB');
1320

1421
// NOTE: please add more queries to this list, especially if there is a performance bottleneck.
1522
final queries = [
@@ -34,4 +41,6 @@ Future<void> main(List<String> args) async {
3441
}
3542
sw.stop();
3643
print('${(sw.elapsedMilliseconds / count).toStringAsFixed(2)} ms/request');
44+
print('Done. Current memory: ${ProcessInfo.currentRss ~/ 1024} KiB, '
45+
'max memory: ${ProcessInfo.maxRss ~/ 1024} KiB');
3746
}

app/lib/search/mem_index.dart

Lines changed: 12 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ import 'package:collection/collection.dart';
1010
import 'package:logging/logging.dart';
1111
import 'package:meta/meta.dart';
1212
import 'package:pub_dev/search/heap.dart';
13-
import 'package:pub_dev/service/topics/models.dart';
1413
import 'package:pub_dev/third_party/bit_array/bit_array.dart';
1514

1615
import 'models.dart';
@@ -23,7 +22,6 @@ final _textSearchTimeout = Duration(milliseconds: 500);
2322

2423
class InMemoryPackageIndex {
2524
final List<PackageDocument> _documents;
26-
final _documentsByName = <String, PackageDocument>{};
2725
final _nameToIndex = <String, int>{};
2826
late final PackageNameIndex _packageNameIndex;
2927
late final TokenIndex<String> _descrIndex;
@@ -47,23 +45,15 @@ class InMemoryPackageIndex {
4745
late final List<IndexedPackageHit> _pointsOrderedHits;
4846
late final List<IndexedPackageHit> _trendingOrderedHits;
4947

50-
// Contains all of the topics the index had seen so far.
51-
// TODO: consider moving this into a separate index
52-
// TODO: get the list of topics from the bucket
53-
final _topics = <String>{
54-
...canonicalTopics.aliasToCanonicalMap.values,
55-
};
56-
5748
late final DateTime _lastUpdated;
5849

5950
InMemoryPackageIndex({
6051
required Iterable<PackageDocument> documents,
6152
}) : _documents = [...documents] {
6253
final apiDocPageKeys = <IndexedApiDocPage>[];
63-
final apiDocPageValues = <String>[];
54+
final apiDocPageValues = <List<String>>[];
6455
for (var i = 0; i < _documents.length; i++) {
6556
final doc = _documents[i];
66-
_documentsByName[doc.package] = doc;
6757
_nameToIndex[doc.package] = i;
6858

6959
// transform tags into numberical IDs
@@ -78,16 +68,10 @@ class InMemoryPackageIndex {
7868
for (final page in apiDocPages) {
7969
if (page.symbols != null && page.symbols!.isNotEmpty) {
8070
apiDocPageKeys.add(IndexedApiDocPage(i, page));
81-
apiDocPageValues.add(page.symbols!.join(' '));
71+
apiDocPageValues.add(page.symbols!);
8272
}
8373
}
8474
}
85-
86-
// Note: we are not removing topics from this set, only adding them, no
87-
// need for tracking the current topic count.
88-
_topics.addAll(doc.tags
89-
.where((t) => t.startsWith('topic:'))
90-
.map((t) => t.split('topic:').last));
9175
}
9276

9377
final packageKeys = _documents.map((d) => d.package).toList();
@@ -101,7 +85,7 @@ class InMemoryPackageIndex {
10185
packageKeys,
10286
_documents.map((d) => d.readme).toList(),
10387
);
104-
_apiSymbolIndex = TokenIndex(apiDocPageKeys, apiDocPageValues);
88+
_apiSymbolIndex = TokenIndex.fromValues(apiDocPageKeys, apiDocPageValues);
10589

10690
// update download scores only if they were not set (should happen on old runtime's snapshot and local tests)
10791
if (_documents.any((e) => e.downloadScore == null)) {
@@ -131,7 +115,7 @@ class InMemoryPackageIndex {
131115
IndexInfo indexInfo() {
132116
return IndexInfo(
133117
isReady: true,
134-
packageCount: _documentsByName.length,
118+
packageCount: _documents.length,
135119
lastUpdated: _lastUpdated,
136120
);
137121
}
@@ -363,7 +347,7 @@ class InMemoryPackageIndex {
363347
}
364348

365349
// exact package name
366-
if (_documentsByName.containsKey(parsedQueryText)) {
350+
if (_nameToIndex.containsKey(parsedQueryText)) {
367351
return parsedQueryText;
368352
}
369353

@@ -377,12 +361,9 @@ class InMemoryPackageIndex {
377361
}
378362
// Note: to keep it simple, we select the most downloaded one from competing matches.
379363
return matches.reduce((a, b) {
380-
if (_documentsByName[a]!.downloadCount >
381-
_documentsByName[b]!.downloadCount) {
382-
return a;
383-
} else {
384-
return b;
385-
}
364+
final aDoc = _documents[_nameToIndex[a]!];
365+
final bDoc = _documents[_nameToIndex[b]!];
366+
return aDoc.downloadCount > bDoc.downloadCount ? a : b;
386367
});
387368
}
388369

@@ -487,7 +468,7 @@ class InMemoryPackageIndex {
487468
packageScores.setValueMaxOf(doc.index, value);
488469

489470
// add the page and re-sort the current results
490-
pages.add(MapEntry(doc.page.relativePath, value));
471+
pages.add(MapEntry(doc.relativePath, value));
491472
if (pages.length > 1) {
492473
pages.sort((a, b) => -a.value.compareTo(b.value));
493474
}
@@ -750,7 +731,8 @@ class IndexedPackageHit {
750731

751732
class IndexedApiDocPage {
752733
final int index;
753-
final ApiDocPage page;
734+
final String relativePath;
754735

755-
IndexedApiDocPage(this.index, this.page);
736+
IndexedApiDocPage(this.index, ApiDocPage page)
737+
: relativePath = page.relativePath;
756738
}

app/lib/search/token_index.dart

Lines changed: 31 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -51,21 +51,43 @@ class TokenIndex<K> {
5151
if (text == null) {
5252
continue;
5353
}
54-
final tokens = tokenize(text);
55-
if (tokens == null || tokens.isEmpty) {
54+
_build(i, text, skipDocumentWeight);
55+
}
56+
}
57+
58+
TokenIndex.fromValues(
59+
List<K> ids,
60+
List<List<String>?> values, {
61+
bool skipDocumentWeight = false,
62+
}) : _ids = ids {
63+
assert(ids.length == values.length);
64+
final length = values.length;
65+
for (var i = 0; i < length; i++) {
66+
final parts = values[i];
67+
68+
if (parts == null || parts.isEmpty) {
5669
continue;
5770
}
58-
// Document weight is a highly scaled-down proxy of the length.
59-
final dw =
60-
skipDocumentWeight ? 1.0 : 1 + math.log(1 + tokens.length) / 100;
61-
for (final e in tokens.entries) {
62-
final token = e.key;
63-
final weights = _inverseIds.putIfAbsent(token, () => {});
64-
weights[i] = math.max(weights[i] ?? 0.0, e.value / dw);
71+
for (final text in parts) {
72+
_build(i, text, skipDocumentWeight);
6573
}
6674
}
6775
}
6876

77+
void _build(int i, String text, bool skipDocumentWeight) {
78+
final tokens = tokenize(text);
79+
if (tokens == null || tokens.isEmpty) {
80+
return;
81+
}
82+
// Document weight is a highly scaled-down proxy of the length.
83+
final dw = skipDocumentWeight ? 1.0 : 1 + math.log(1 + tokens.length) / 100;
84+
for (final e in tokens.entries) {
85+
final token = e.key;
86+
final weights = _inverseIds.putIfAbsent(token, () => {});
87+
weights[i] = math.max(weights[i] ?? 0.0, e.value / dw);
88+
}
89+
}
90+
6991
factory TokenIndex.fromMap(Map<K, String> map) {
7092
final keys = map.keys.toList();
7193
final values = map.values.toList();

0 commit comments

Comments
 (0)