Skip to content

Commit 136010d

Browse files
authored
Replaced refs with snowflakes (#536)
Despite my idea of creating refs to represent IDs of syntactic elements, they are not globally unique, and can be reused when the VM restarts. This makes them inappropriate for long-lived ids, like we need in the index. I've replaced them with snowflakes, which i benchmarked at 800k qps, are unique, and also represent the time they were created, which means we'll not need to store dates in the index. After the change, lexical's index was 50 MB smaller.
1 parent 2c49cd5 commit 136010d

File tree

30 files changed

+212
-117
lines changed

30 files changed

+212
-117
lines changed

apps/common/lib/lexical/ast/analysis.ex

+3-1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ defmodule Lexical.Ast.Analysis do
1212
alias Lexical.Document
1313
alias Lexical.Document
1414
alias Lexical.Document.Position
15+
alias Lexical.Identifier
1516
alias Sourceror.Zipper
1617

1718
defstruct [:ast, :document, :parse_error, scopes: [], valid?: true]
@@ -146,7 +147,8 @@ defmodule Lexical.Ast.Analysis do
146147

147148
# add a unique ID to 3-element tuples
148149
defp with_scope_id({_, _, _} = quoted) do
149-
Macro.update_meta(quoted, &Keyword.put(&1, @scope_id, make_ref()))
150+
id = Identifier.next_global!()
151+
Macro.update_meta(quoted, &Keyword.put(&1, @scope_id, id))
150152
end
151153

152154
defp with_scope_id(quoted) do

apps/common/lib/lexical/identifier.ex

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
defmodule Lexical.Identifier do
2+
@doc """
3+
Returns the next globally unique identifier.
4+
Raises a MatchError if this cannot be computed.
5+
"""
6+
def next_global! do
7+
{:ok, next_id} = Snowflake.next_id()
8+
next_id
9+
end
10+
end

apps/common/mix.exs

+1
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ defmodule Common.MixProject do
3434
[
3535
{:lexical_shared, path: "../../projects/lexical_shared"},
3636
{:lexical_test, path: "../../projects/lexical_test", only: :test},
37+
{:snowflake, "~> 1.0"},
3738
{:sourceror, "~> 0.14.1"},
3839
{:stream_data, "~> 0.6", only: [:test], runtime: false},
3940
{:patch, "~> 0.12", only: [:test], optional: true, runtime: false}

apps/common/test/test_helper.exs

+1
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
1+
Application.ensure_all_started(:snowflake)
12
ExUnit.start()
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
version https://git-lfs.github.com/spec/v1
2-
oid sha256:e8b938d9c25331df2752ecf44b1b7823f80e68a0c34489aa0df58edd75d096ee
3-
size 99882388
2+
oid sha256:6aa5313736d6431963476b7b0b38cc20f2c95b252b3358ca6fba17b8d116d6a0
3+
size 221782947

apps/remote_control/benchmarks/ets_bench.exs

+11-9
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ alias Lexical.RemoteControl
33
alias Lexical.RemoteControl.Search.Store.Backends.Ets
44
alias Lexical.RemoteControl.Search.Store.Backends.Ets.Schema
55
alias Lexical.RemoteControl.Search.Store.Backends.Ets.Schemas
6+
alias Lexical.VM.Versions
67

78
defmodule BenchHelper do
89
def wait_for_registration do
@@ -20,14 +21,14 @@ defmodule BenchHelper do
2021
Enum.random(entries).path
2122
end
2223

23-
def random_ref(entries) do
24-
Enum.random(entries).ref
24+
def random_id(entries) do
25+
Enum.random(entries).id
2526
end
2627

27-
def random_refs(entries, count) do
28+
def random_ids(entries, count) do
2829
entries
2930
|> Enum.take_random(count)
30-
|> Enum.map(& &1.ref)
31+
|> Enum.map(& &1.id)
3132
end
3233
end
3334

@@ -37,7 +38,8 @@ project = Project.new("file://#{cwd}")
3738
RemoteControl.set_project(project)
3839
Project.ensure_workspace(project)
3940

40-
indexes_path = Project.workspace_path(project, "indexes")
41+
versions = Versions.current()
42+
indexes_path = Project.workspace_path(project, ["indexes", versions.erlang, versions.elixir])
4143
data_dir = Path.join(cwd, "data")
4244

4345
File.mkdir_p!(indexes_path)
@@ -64,17 +66,17 @@ Benchee.run(
6466
"find_by_subject, two wildcards" => fn _ ->
6567
Ets.find_by_subject(Enum, :_, :_)
6668
end,
67-
"find_by_references" => fn %{refs: refs} ->
68-
Ets.find_by_refs(refs, :module, :_)
69+
"find_by_references" => fn %{ids: ids} ->
70+
Ets.find_by_ids(ids, :module, :_)
6971
end,
7072
"delete_by_path" => fn %{path: path} ->
7173
Ets.delete_by_path(path)
7274
end
7375
},
7476
before_each: fn _ ->
75-
refs = BenchHelper.random_refs(entries, 50)
77+
ids = BenchHelper.random_ids(entries, 50)
7678
path = BenchHelper.random_path(entries)
77-
%{path: path, refs: refs}
79+
%{path: path, ids: ids}
7880
end
7981
)
8082

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
Benchee.run(
2+
%{
3+
"next_id" => fn ->
4+
Snowflake.next_id()
5+
end
6+
},
7+
profile_after: true
8+
)

apps/remote_control/lib/lexical/remote_control/search/fuzzy.ex

+8-8
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ defmodule Lexical.RemoteControl.Search.Fuzzy do
3939
@spec from_entries([Entry.t()]) :: t
4040
def from_entries(entries) do
4141
mapper = fn %Entry{} = entry ->
42-
{entry.subject, entry.path, entry.ref}
42+
{entry.subject, entry.path, entry.id}
4343
end
4444

4545
new(entries, mapper, &stringify/1)
@@ -85,13 +85,13 @@ defmodule Lexical.RemoteControl.Search.Fuzzy do
8585
in descending order of the match relevance. Items at the beginning of the list
8686
will have a higher score than items at the end.
8787
"""
88-
@spec match(t(), String.t()) :: [reference()]
88+
@spec match(t(), String.t()) :: [Entry.entry_id()]
8989
def match(%__MODULE__{} = fuzzy, pattern) do
9090
fuzzy.subject_to_values
91-
|> Stream.map(fn {subject, references} ->
91+
|> Stream.map(fn {subject, ids} ->
9292
case score(fuzzy, subject, pattern) do
9393
{:ok, score} ->
94-
{score, references}
94+
{score, ids}
9595

9696
:error ->
9797
nil
@@ -117,13 +117,13 @@ defmodule Lexical.RemoteControl.Search.Fuzzy do
117117
subject = fuzzy.subject_converter.(extracted_subject)
118118

119119
updated_grouping_key_to_values =
120-
Map.update(fuzzy.grouping_key_to_values, grouping_key, [value], fn old_refs ->
121-
[value | old_refs]
120+
Map.update(fuzzy.grouping_key_to_values, grouping_key, [value], fn old_ids ->
121+
[value | old_ids]
122122
end)
123123

124124
updated_subject_to_values =
125-
Map.update(fuzzy.subject_to_values, subject, [value], fn old_refs ->
126-
[value | old_refs]
125+
Map.update(fuzzy.subject_to_values, subject, [value], fn old_ids ->
126+
[value | old_ids]
127127
end)
128128

129129
updated_preprocessed_subjects =

apps/remote_control/lib/lexical/remote_control/search/indexer/entry.ex

+9-10
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,14 @@ defmodule Lexical.RemoteControl.Search.Indexer.Entry do
33
@type subject :: String.t()
44
@type entry_subtype :: :reference | :definition
55
@type version :: String.t()
6-
@type entry_reference :: reference() | nil
6+
@type entry_id :: pos_integer() | nil
77

88
defstruct [
99
:application,
10+
:id,
1011
:parent,
1112
:path,
1213
:range,
13-
:ref,
1414
:subject,
1515
:subtype,
1616
:type,
@@ -20,10 +20,9 @@ defmodule Lexical.RemoteControl.Search.Indexer.Entry do
2020
@type t :: %__MODULE__{
2121
application: module(),
2222
subject: subject(),
23-
parent: entry_reference(),
23+
parent: entry_id(),
2424
path: Path.t(),
2525
range: Lexical.Document.Range.t(),
26-
ref: entry_reference(),
2726
subtype: entry_subtype(),
2827
type: entry_type(),
2928
updated_at: :calendar.datetime()
@@ -33,22 +32,22 @@ defmodule Lexical.RemoteControl.Search.Indexer.Entry do
3332

3433
use StructAccess
3534

36-
def reference(path, ref, parent, subject, type, range, application) do
37-
new(path, ref, parent, subject, type, :reference, range, application)
35+
def reference(path, id, parent, subject, type, range, application) do
36+
new(path, id, parent, subject, type, :reference, range, application)
3837
end
3938

40-
def definition(path, ref, parent, subject, type, range, application) do
41-
new(path, ref, parent, subject, type, :definition, range, application)
39+
def definition(path, id, parent, subject, type, range, application) do
40+
new(path, id, parent, subject, type, :definition, range, application)
4241
end
4342

44-
defp new(path, ref, parent, subject, type, subtype, range, application) do
43+
defp new(path, id, parent, subject, type, subtype, range, application) do
4544
%__MODULE__{
4645
application: application,
4746
subject: subject,
47+
id: id,
4848
parent: parent,
4949
path: path,
5050
range: range,
51-
ref: ref,
5251
subtype: subtype,
5352
type: type,
5453
updated_at: timestamp()

apps/remote_control/lib/lexical/remote_control/search/indexer/extractors/function_definition.ex

+3-2
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ defmodule Lexical.RemoteControl.Search.Indexer.Extractors.FunctionDefinition do
33
alias Lexical.Document.Position
44
alias Lexical.Document.Range
55
alias Lexical.Formats
6+
alias Lexical.Identifier
67
alias Lexical.RemoteControl
78
alias Lexical.RemoteControl.Search.Indexer.Entry
89
alias Lexical.RemoteControl.Search.Indexer.Metadata
@@ -39,8 +40,8 @@ defmodule Lexical.RemoteControl.Search.Indexer.Extractors.FunctionDefinition do
3940
entry =
4041
Entry.definition(
4142
reducer.analysis.document.path,
42-
make_ref(),
43-
block.parent_ref,
43+
Identifier.next_global!(),
44+
block.parent_id,
4445
mfa,
4546
type,
4647
range,

apps/remote_control/lib/lexical/remote_control/search/indexer/extractors/function_reference.ex

+3-2
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ defmodule Lexical.RemoteControl.Search.Indexer.Extractors.FunctionReference do
22
alias Lexical.Document.Position
33
alias Lexical.Document.Range
44
alias Lexical.Formats
5+
alias Lexical.Identifier
56
alias Lexical.RemoteControl
67
alias Lexical.RemoteControl.Search.Indexer.Entry
78
alias Lexical.RemoteControl.Search.Indexer.Metadata
@@ -140,8 +141,8 @@ defmodule Lexical.RemoteControl.Search.Indexer.Extractors.FunctionReference do
140141

141142
Entry.reference(
142143
reducer.analysis.document.path,
143-
make_ref(),
144-
block.parent_ref,
144+
Identifier.next_global!(),
145+
block.parent_id,
145146
mfa,
146147
:function,
147148
range,

apps/remote_control/lib/lexical/remote_control/search/indexer/extractors/module.ex

+7-6
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ defmodule Lexical.RemoteControl.Search.Indexer.Extractors.Module do
66
alias Lexical.Ast
77
alias Lexical.Document.Position
88
alias Lexical.Document.Range
9+
alias Lexical.Identifier
910
alias Lexical.ProcessCache
1011
alias Lexical.RemoteControl
1112
alias Lexical.RemoteControl.Search.Indexer.Entry
@@ -31,8 +32,8 @@ defmodule Lexical.RemoteControl.Search.Indexer.Extractors.Module do
3132
entry =
3233
Entry.definition(
3334
reducer.analysis.document.path,
34-
block.ref,
35-
block.parent_ref,
35+
block.id,
36+
block.parent_id,
3637
aliased_module,
3738
:module,
3839
range,
@@ -64,8 +65,8 @@ defmodule Lexical.RemoteControl.Search.Indexer.Extractors.Module do
6465
entry =
6566
Entry.reference(
6667
reducer.analysis.document.path,
67-
make_ref(),
68-
current_block.ref,
68+
Identifier.next_global!(),
69+
current_block.id,
6970
module,
7071
:module,
7172
range,
@@ -91,8 +92,8 @@ defmodule Lexical.RemoteControl.Search.Indexer.Extractors.Module do
9192
entry =
9293
Entry.reference(
9394
reducer.analysis.document.path,
94-
make_ref(),
95-
current_block.ref,
95+
Identifier.next_global!(),
96+
current_block.id,
9697
module,
9798
:module,
9899
range,

apps/remote_control/lib/lexical/remote_control/search/indexer/source/block.ex

+6-3
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,16 @@ defmodule Lexical.RemoteControl.Search.Indexer.Source.Block do
22
@moduledoc """
33
A struct that represents a block of source code
44
"""
5-
defstruct [:starts_at, :ends_at, :ref, :parent_ref]
5+
6+
defstruct [:starts_at, :ends_at, :id, :parent_id]
7+
alias Lexical.Identifier
68

79
def root do
8-
%__MODULE__{ref: :root}
10+
%__MODULE__{id: :root}
911
end
1012

1113
def new(starts_at, ends_at) do
12-
%__MODULE__{starts_at: starts_at, ends_at: ends_at, ref: make_ref()}
14+
id = Identifier.next_global!()
15+
%__MODULE__{starts_at: starts_at, ends_at: ends_at, id: id}
1316
end
1417
end

apps/remote_control/lib/lexical/remote_control/search/indexer/source/reducer.ex

+3-3
Original file line numberDiff line numberDiff line change
@@ -109,20 +109,20 @@ defmodule Lexical.RemoteControl.Search.Indexer.Source.Reducer do
109109

110110
defp push_block(%__MODULE__{} = reducer, %Block{} = block) do
111111
parent = List.first(reducer.blocks)
112-
block = %Block{block | parent_ref: parent.ref}
112+
block = %Block{block | parent_id: parent.id}
113113
%__MODULE__{reducer | blocks: [block | reducer.blocks]}
114114
end
115115

116116
# you never pop the root block in a document
117-
defp pop_block(%__MODULE__{blocks: [%Block{ref: :root}]} = reducer), do: reducer
117+
defp pop_block(%__MODULE__{blocks: [%Block{id: :root}]} = reducer), do: reducer
118118

119119
defp pop_block(%__MODULE__{} = reducer) do
120120
[_ | rest] = reducer.blocks
121121
%__MODULE__{reducer | blocks: rest}
122122
end
123123

124124
# The root block in the document goes on forever
125-
defp block_ended?(%__MODULE__{blocks: [%Block{ref: :root}]}), do: false
125+
defp block_ended?(%__MODULE__{blocks: [%Block{id: :root}]}), do: false
126126

127127
defp block_ended?(%__MODULE__{} = reducer) do
128128
%Block{} = block = current_block(reducer)

apps/remote_control/lib/lexical/remote_control/search/store/backend.ex

+2-2
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ defmodule Lexical.RemoteControl.Search.Store.Backend do
6767
@doc """
6868
Deletes all entries whose path is equal to the one passed in.
6969
"""
70-
@callback delete_by_path(Path.t()) :: {:ok, [reference()]} | {:error, any()}
70+
@callback delete_by_path(Path.t()) :: {:ok, [Entry.entry_id()]} | {:error, any()}
7171

7272
@doc """
7373
Finds all entries
@@ -77,7 +77,7 @@ defmodule Lexical.RemoteControl.Search.Store.Backend do
7777
@doc """
7878
Finds entries whose ref attribute is in the given list
7979
"""
80-
@callback find_by_refs([reference()], type_query(), subtype_query()) :: [Entry.t()]
80+
@callback find_by_ids([Entry.entry_id()], type_query(), subtype_query()) :: [Entry.t()]
8181

8282
@optional_callbacks sync: 1
8383
end

apps/remote_control/lib/lexical/remote_control/search/store/backends/ets.ex

+2-2
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,8 @@ defmodule Lexical.RemoteControl.Search.Store.Backends.Ets do
6969
end
7070

7171
@impl Backend
72-
def find_by_refs(references, type, subtype) do
73-
GenServer.call(genserver_name(), {:find_by_references, [references, type, subtype]})
72+
def find_by_ids(ids, type, subtype) do
73+
GenServer.call(genserver_name(), {:find_by_ids, [ids, type, subtype]})
7474
end
7575

7676
def start_link(%Project{} = project) do

0 commit comments

Comments
 (0)