Skip to content

Commit 2c49cd5

Browse files
authored
Improved lexical's index efficiency (#534)
For lexical, the index was using 450MB of disk and consumed about a gig in memory. This commit includes the following changes: * We stopped duplicating entries in a couple of the records, and now only store them once in the 'by_ref' key. This decreased the index size by 50%, and had no noticable effect on performance. * Several of the processes had cache entries lying around that caused them to consume quite a bit (hundreds of megs) of memory. I introduced a cache cleaning block and added it to the call sites, and this reduced the memory usage of those processes back to extremely low levels. * We were storing the erlang and elixir versions on each entry. Now we create separate indexes for each elixir/erlang version pair In total, lexical was taking around 1gb of memory before these changes, and now it takes around 400mb. Disk space dropped from 450 megs to 250.
1 parent dc85552 commit 2c49cd5

File tree

13 files changed

+165
-126
lines changed

13 files changed

+165
-126
lines changed

apps/remote_control/lib/lexical/remote_control/commands/reindex.ex

+4-1
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,13 @@ defmodule Lexical.RemoteControl.Commands.Reindex do
22
defmodule State do
33
alias Lexical.Ast.Analysis
44
alias Lexical.Document
5+
alias Lexical.ProcessCache
56
alias Lexical.RemoteControl.Search
67
alias Lexical.RemoteControl.Search.Indexer
78

89
require Logger
10+
require ProcessCache
11+
912
defstruct reindex_fun: nil, index_task: nil, pending_updates: %{}
1013

1114
def new(reindex_fun) do
@@ -53,7 +56,7 @@ defmodule Lexical.RemoteControl.Commands.Reindex do
5356
defp entries_for_uri(uri) do
5457
with {:ok, %Document{} = document, %Analysis{} = analysis} <-
5558
Document.Store.fetch(uri, :analysis),
56-
{:ok, entries} <- Indexer.Quoted.index(analysis) do
59+
{:ok, entries} <- Indexer.Quoted.index_with_cleanup(analysis) do
5760
{:ok, document.path, entries}
5861
else
5962
error ->

apps/remote_control/lib/lexical/remote_control/search/indexer.ex

+18-7
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,32 @@
11
defmodule Lexical.RemoteControl.Search.Indexer do
2+
alias Lexical.ProcessCache
23
alias Lexical.Project
34
alias Lexical.RemoteControl.Search.Indexer
45

56
import Lexical.RemoteControl.Progress
7+
require ProcessCache
8+
69
@indexable_extensions "*.{ex,exs}"
710

811
def create_index(%Project{} = project) do
9-
entries =
10-
project
11-
|> indexable_files()
12-
|> async_chunks(&index_path/1)
13-
|> List.flatten()
14-
15-
{:ok, entries}
12+
ProcessCache.with_cleanup do
13+
entries =
14+
project
15+
|> indexable_files()
16+
|> async_chunks(&index_path/1)
17+
|> List.flatten()
18+
19+
{:ok, entries}
20+
end
1621
end
1722

1823
def update_index(%Project{} = project, existing_entries) do
24+
ProcessCache.with_cleanup do
25+
do_update_index(project, existing_entries)
26+
end
27+
end
28+
29+
defp do_update_index(%Project{} = project, existing_entries) do
1930
path_to_last_index_at =
2031
existing_entries
2132
|> Enum.group_by(& &1.path, & &1.updated_at)

apps/remote_control/lib/lexical/remote_control/search/indexer/entry.ex

-9
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@ defmodule Lexical.RemoteControl.Search.Indexer.Entry do
77

88
defstruct [
99
:application,
10-
:elixir_version,
11-
:erlang_version,
1210
:parent,
1311
:path,
1412
:range,
@@ -21,8 +19,6 @@ defmodule Lexical.RemoteControl.Search.Indexer.Entry do
2119

2220
@type t :: %__MODULE__{
2321
application: module(),
24-
elixir_version: version(),
25-
erlang_version: version(),
2622
subject: subject(),
2723
parent: entry_reference(),
2824
path: Path.t(),
@@ -34,7 +30,6 @@ defmodule Lexical.RemoteControl.Search.Indexer.Entry do
3430
}
3531

3632
alias Lexical.StructAccess
37-
alias Lexical.VM.Versions
3833

3934
use StructAccess
4035

@@ -47,12 +42,8 @@ defmodule Lexical.RemoteControl.Search.Indexer.Entry do
4742
end
4843

4944
defp new(path, ref, parent, subject, type, subtype, range, application) do
50-
versions = Versions.current()
51-
5245
%__MODULE__{
5346
application: application,
54-
elixir_version: versions.elixir,
55-
erlang_version: versions.erlang,
5647
subject: subject,
5748
parent: parent,
5849
path: path,

apps/remote_control/lib/lexical/remote_control/search/indexer/extractors/function_definition.ex

+1-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ defmodule Lexical.RemoteControl.Search.Indexer.Extractors.FunctionDefinition do
3939
entry =
4040
Entry.definition(
4141
reducer.analysis.document.path,
42-
block.ref,
42+
make_ref(),
4343
block.parent_ref,
4444
mfa,
4545
type,

apps/remote_control/lib/lexical/remote_control/search/indexer/extractors/function_reference.ex

+1-1
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ defmodule Lexical.RemoteControl.Search.Indexer.Extractors.FunctionReference do
140140

141141
Entry.reference(
142142
reducer.analysis.document.path,
143-
block.ref,
143+
make_ref(),
144144
block.parent_ref,
145145
mfa,
146146
:function,

apps/remote_control/lib/lexical/remote_control/search/indexer/quoted.ex

+9
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,16 @@
11
defmodule Lexical.RemoteControl.Search.Indexer.Quoted do
22
alias Lexical.Ast.Analysis
3+
alias Lexical.ProcessCache
34
alias Lexical.RemoteControl.Search.Indexer.Source.Reducer
45

6+
require ProcessCache
7+
8+
def index_with_cleanup(%Analysis{} = analysis) do
9+
ProcessCache.with_cleanup do
10+
index(analysis)
11+
end
12+
end
13+
514
def index(%Analysis{valid?: true} = analysis) do
615
{_, reducer} =
716
Macro.prewalk(analysis.ast, Reducer.new(analysis), fn elem, reducer ->

apps/remote_control/lib/lexical/remote_control/search/store/backends/ets/schema.ex

+4-1
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ defmodule Lexical.RemoteControl.Search.Store.Backends.Ets.Schema do
5555
end
5656

5757
alias Lexical.Project
58+
alias Lexical.VM.Versions
5859

5960
def load(%Project{} = project, schema_order) do
6061
ensure_unique_versions(schema_order)
@@ -89,7 +90,9 @@ defmodule Lexical.RemoteControl.Search.Store.Backends.Ets.Schema do
8990
end
9091

9192
def index_root(%Project{} = project) do
92-
Project.workspace_path(project, "indexes")
93+
versions = Versions.current()
94+
index_path = ["indexes", versions.erlang, versions.elixir]
95+
Project.workspace_path(project, index_path)
9396
end
9497

9598
def index_file_path(%Project{} = project, schema) do

apps/remote_control/lib/lexical/remote_control/search/store/backends/ets/schemas/v1.ex

+5-11
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,12 @@ defmodule Lexical.RemoteControl.Search.Store.Backends.Ets.Schemas.V1 do
1414

1515
use Schema, version: 1
1616

17-
defkey :by_id, [:id, :type, :subtype, :elixir_version, :erlang_version]
17+
defkey :by_id, [:id, :type, :subtype]
1818

1919
defkey :by_subject, [
2020
:subject,
2121
:type,
2222
:subtype,
23-
:elixir_version,
24-
:erlang_version,
2523
:path
2624
]
2725

@@ -31,7 +29,7 @@ defmodule Lexical.RemoteControl.Search.Store.Backends.Ets.Schemas.V1 do
3129
migrated =
3230
entries
3331
|> Stream.filter(fn
34-
{_, %_{elixir_version: _, erlang_version: _, type: _, subtype: _, ref: _}} -> true
32+
{_, %_{type: _, subtype: _, ref: _}} -> true
3533
_ -> false
3634
end)
3735
|> Stream.map(fn {_, entry} -> entry end)
@@ -53,8 +51,6 @@ defmodule Lexical.RemoteControl.Search.Store.Backends.Ets.Schemas.V1 do
5351
def to_rows(%Entry{} = entry) do
5452
subject_key =
5553
by_subject(
56-
elixir_version: entry.elixir_version,
57-
erlang_version: entry.erlang_version,
5854
subject: to_subject(entry.subject),
5955
type: entry.type,
6056
subtype: entry.subtype,
@@ -65,18 +61,16 @@ defmodule Lexical.RemoteControl.Search.Store.Backends.Ets.Schemas.V1 do
6561
by_id(
6662
id: entry.ref,
6763
type: entry.type,
68-
subtype: entry.subtype,
69-
elixir_version: entry.elixir_version,
70-
erlang_version: entry.erlang_version
64+
subtype: entry.subtype
7165
)
7266

7367
path_key = by_path(path: entry.path)
7468

75-
[{subject_key, entry}, {id_key, entry}, {path_key, id_key}]
69+
[{subject_key, id_key}, {id_key, entry}, {path_key, id_key}]
7670
end
7771

7872
# This case will handle any namespaced entries
79-
def to_rows(%{elixir_version: _, erlang_version: _, type: _, subtype: _, ref: _} = entry) do
73+
def to_rows(%{type: _, subtype: _, ref: _} = entry) do
8074
map = Map.delete(entry, :__struct__)
8175

8276
Entry

apps/remote_control/lib/lexical/remote_control/search/store/backends/ets/state.ex

+9-14
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ defmodule Lexical.RemoteControl.Search.Store.Backends.Ets.State do
88
alias Lexical.Project
99
alias Lexical.RemoteControl.Search.Store.Backends.Ets.Schema
1010
alias Lexical.RemoteControl.Search.Store.Backends.Ets.Schemas
11-
alias Lexical.VM.Versions
1211

1312
@schema_order [
1413
Schemas.LegacyV0,
@@ -60,28 +59,26 @@ defmodule Lexical.RemoteControl.Search.Store.Backends.Ets.State do
6059
end
6160

6261
def find_by_subject(%__MODULE__{} = state, subject, type, subtype) do
63-
versions = Versions.current()
64-
6562
match_pattern =
6663
query_by_subject(
6764
subject: to_subject(subject),
6865
type: type,
69-
subtype: subtype,
70-
elixir_version: versions.elixir,
71-
erlang_version: versions.erlang
66+
subtype: subtype
7267
)
7368

7469
state.table_name
7570
|> :ets.match_object({match_pattern, :_})
76-
|> Enum.flat_map(fn {_, match} -> match end)
71+
|> Enum.flat_map(fn {_, id_keys} ->
72+
id_keys
73+
end)
74+
|> MapSet.new()
75+
|> Enum.flat_map(&:ets.lookup_element(state.table_name, &1, 2))
7776
end
7877

7978
def find_by_references(%__MODULE__{} = state, references, type, subtype)
8079
when is_list(references) do
81-
versions = Versions.current()
82-
8380
for reference <- references,
84-
match_pattern = match_id_key(reference, versions, type, subtype),
81+
match_pattern = match_id_key(reference, type, subtype),
8582
{_key, entry} <- :ets.match_object(state.table_name, match_pattern) do
8683
entry
8784
end
@@ -133,13 +130,11 @@ defmodule Lexical.RemoteControl.Search.Store.Backends.Ets.State do
133130
state
134131
end
135132

136-
defp match_id_key(reference, versions, type, subtype) do
133+
defp match_id_key(reference, type, subtype) do
137134
{query_by_id(
138135
id: reference,
139136
type: type,
140-
subtype: subtype,
141-
elixir_version: versions.elixir,
142-
erlang_version: versions.erlang
137+
subtype: subtype
143138
), :_}
144139
end
145140

apps/remote_control/test/lexical/remote_control/search/store_test.exs

-43
Original file line numberDiff line numberDiff line change
@@ -62,29 +62,6 @@ defmodule Lexical.RemoteControl.Search.StoreTest do
6262
assert ref.subtype == :reference
6363
end
6464

65-
test "matching can exclude on elixir version" do
66-
Store.replace([
67-
reference(subject: Enum, elixir_version: "1.0.0"),
68-
reference(subject: Enum)
69-
])
70-
71-
assert {:ok, [ref]} = Store.exact("Enum", subtype: :reference)
72-
assert ref.subject == Enum
73-
refute ref.elixir_version == "1.0.0"
74-
end
75-
76-
test "matching can exclude on erlang version" do
77-
Store.replace([
78-
reference(subject: Enum, erlang_version: "1.0.0"),
79-
reference(subject: Enum)
80-
])
81-
82-
assert {:ok, [ref]} = Store.exact("Enum", subtype: :reference)
83-
84-
assert ref.subject == Enum
85-
refute ref.erlang_version == "1.0.0"
86-
end
87-
8865
test "matching with queries can exclude on type" do
8966
Store.replace([
9067
reference(subject: Foo.Bar.Baz),
@@ -124,26 +101,6 @@ defmodule Lexical.RemoteControl.Search.StoreTest do
124101
assert entry_1.subject in [Foo.Bar.Baz, Foo.Bar.Bak]
125102
assert entry_2.subject in [Foo.Bar.Baz, Foo.Bar.Bak]
126103
end
127-
128-
test "matching only returns entries specific to our elixir version" do
129-
Store.replace([
130-
definition(ref: 1, subject: Foo.Bar.Baz, elixir_version: "1.1"),
131-
definition(ref: 2, subject: Foo.Bar.Baz)
132-
])
133-
134-
assert {:ok, [entry]} = Store.fuzzy("Foo.Bar.", type: :module, subtype: :definition)
135-
assert entry.ref == 2
136-
end
137-
138-
test "matching only returns entries specific to our erlang version" do
139-
Store.replace([
140-
definition(ref: 1, subject: Foo.Bar.Baz, erlang_version: "14.3.2.8"),
141-
definition(ref: 2, subject: Foo.Bar.Baz)
142-
])
143-
144-
assert {:ok, [entry]} = Store.fuzzy("Foo.Bar.", type: :module, subtype: :definition)
145-
assert entry.ref == 2
146-
end
147104
end
148105

149106
describe "#{backend_name} :: updating entries in a file" do

apps/remote_control/test/support/lexical/test/entry/entry_builder.ex

-5
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,15 @@
11
defmodule Lexical.Test.Entry.Builder do
22
alias Lexical.Document.Range
33
alias Lexical.RemoteControl.Search.Indexer.Entry
4-
alias Lexical.VM.Versions
54

65
import Lexical.Test.PositionSupport
76

87
def entry(fields \\ []) do
9-
versions = Versions.current()
10-
118
defaults = [
129
subject: Module,
1310
ref: make_ref(),
1411
path: "/foo/bar/baz.ex",
1512
range: range(1, 1, 1, 5),
16-
elixir_version: versions.elixir,
17-
erlang_version: versions.erlang,
1813
type: :module
1914
]
2015

projects/lexical_shared/lib/lexical/process_cache.ex

+32
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,12 @@ defmodule Lexical.ProcessCache do
5858
end
5959
end
6060

61+
def clear_keys do
62+
keys()
63+
|> MapSet.put(all_keys_key())
64+
|> Enum.each(&Process.delete/1)
65+
end
66+
6167
@doc """
6268
Retrieves and optionally sets a value in the cache.
6369
@@ -76,9 +82,35 @@ defmodule Lexical.ProcessCache do
7682
end
7783
end
7884

85+
defmacro with_cleanup(do: block) do
86+
quote do
87+
try do
88+
unquote(block)
89+
after
90+
unquote(__MODULE__).clear_keys()
91+
end
92+
end
93+
end
94+
7995
defp set(key, timeout_ms, compute_fn) do
8096
value = compute_fn.()
97+
98+
add_key(key)
8199
Process.put(key, Entry.new(value, timeout_ms))
100+
82101
value
83102
end
103+
104+
defp add_key(key) do
105+
updated_keys = MapSet.put(keys(), key)
106+
Process.put(all_keys_key(), updated_keys)
107+
end
108+
109+
defp all_keys_key do
110+
{__MODULE__, :all_keys}
111+
end
112+
113+
defp keys do
114+
Process.get(all_keys_key(), MapSet.new())
115+
end
84116
end

0 commit comments

Comments
 (0)