Skip to content

Commit f1be52a

Browse files
author
Alex Evanczuk
authored
Do not glob out files unnecessarily (#54)
* Do not glob out all files unless we have to * pass nil to validate through CLI so we do not need to call tracked files * add a test for many files case * code review
1 parent 5500ab3 commit f1be52a

File tree

11 files changed

+121
-36
lines changed

11 files changed

+121
-36
lines changed

Gemfile.lock

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
PATH
22
remote: .
33
specs:
4-
code_ownership (1.32.8)
4+
code_ownership (1.32.9)
55
code_teams (~> 1.0)
66
packs
77
sorbet-runtime

code_ownership.gemspec

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Gem::Specification.new do |spec|
22
spec.name = "code_ownership"
3-
spec.version = '1.32.8'
3+
spec.version = '1.32.9'
44
spec.authors = ['Gusto Engineers']
55
spec.email = ['[email protected]']
66
spec.summary = 'A gem to help engineering teams declare ownership of code'

lib/code_ownership.rb

+11-5
Original file line numberDiff line numberDiff line change
@@ -82,18 +82,24 @@ def self.remove_file_annotation!(filename)
8282

8383
sig do
8484
params(
85-
files: T::Array[String],
8685
autocorrect: T::Boolean,
87-
stage_changes: T::Boolean
86+
stage_changes: T::Boolean,
87+
files: T.nilable(T::Array[String]),
8888
).void
8989
end
9090
def validate!(
91-
files: Private.tracked_files,
9291
autocorrect: true,
93-
stage_changes: true
92+
stage_changes: true,
93+
files: nil
9494
)
9595
Private.load_configuration!
96-
tracked_file_subset = Private.tracked_files & files
96+
97+
tracked_file_subset = if files
98+
files.select{|f| Private.file_tracked?(f)}
99+
else
100+
Private.tracked_files
101+
end
102+
97103
Private.validate!(files: tracked_file_subset, autocorrect: autocorrect, stage_changes: stage_changes)
98104
end
99105

lib/code_ownership/cli.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ def self.validate!(argv)
6161
File.exist?(file)
6262
end
6363
else
64-
Private.tracked_files
64+
nil
6565
end
6666

6767
CodeOwnership.validate!(

lib/code_ownership/private.rb

+18
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,24 @@ def self.tracked_files
8181
@tracked_files ||= Dir.glob(configuration.owned_globs) - Dir.glob(configuration.unowned_globs)
8282
end
8383

84+
sig { params(file: String).returns(T::Boolean) }
85+
def self.file_tracked?(file)
86+
# Another way to accomplish this is
87+
# (Dir.glob(configuration.owned_globs) - Dir.glob(configuration.unowned_globs)).include?(file)
88+
# However, globbing out can take 5 or more seconds on a large repository, dramatically slowing down
89+
# invocations to `bin/codeownership validate --diff`.
90+
# Using `File.fnmatch?` is a lot faster!
91+
in_owned_globs = configuration.owned_globs.all? do |owned_glob|
92+
File.fnmatch?(owned_glob, file, File::FNM_PATHNAME | File::FNM_EXTGLOB)
93+
end
94+
95+
in_unowned_globs = configuration.unowned_globs.all? do |unowned_glob|
96+
File.fnmatch?(unowned_glob, file, File::FNM_PATHNAME | File::FNM_EXTGLOB)
97+
end
98+
99+
in_owned_globs && !in_unowned_globs
100+
end
101+
84102
sig { params(team_name: String, location_of_reference: String).returns(CodeTeams::Team) }
85103
def self.find_team!(team_name, location_of_reference)
86104
found_team = CodeTeams.find(team_name)

lib/code_ownership/private/glob_cache.rb

+47-8
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ class GlobCache
1818
FilesByMapper = T.type_alias do
1919
T::Hash[
2020
String,
21-
T::Array[MapperDescription]
21+
T::Set[MapperDescription]
2222
]
2323
end
2424

@@ -32,6 +32,20 @@ def raw_cache_contents
3232
@raw_cache_contents
3333
end
3434

35+
sig { params(files: T::Array[String]).returns(FilesByMapper) }
36+
def mapper_descriptions_that_map_files(files)
37+
if files.count > 100
38+
# When looking at many files, expanding the cache out using Dir.glob and checking for intersections is faster
39+
files_by_mappers = files.map{ |f| [f, Set.new([]) ]}.to_h
40+
files_by_mappers.merge(files_by_mappers_via_expanded_cache)
41+
else
42+
# When looking at few files, using File.fnmatch is faster
43+
files_by_mappers_via_file_fnmatch(files)
44+
end
45+
end
46+
47+
private
48+
3549
sig { returns(CacheShape) }
3650
def expanded_cache
3751
@expanded_cache = T.let(@expanded_cache, T.nilable(CacheShape))
@@ -52,20 +66,45 @@ def expanded_cache
5266
end
5367

5468
sig { returns(FilesByMapper) }
55-
def files_by_mapper
56-
@files_by_mapper ||= T.let(@files_by_mapper, T.nilable(FilesByMapper))
57-
@files_by_mapper ||= begin
58-
files_by_mapper = {}
69+
def files_by_mappers_via_expanded_cache
70+
@files_by_mappers ||= T.let(@files_by_mappers, T.nilable(FilesByMapper))
71+
@files_by_mappers ||= begin
72+
files_by_mappers = T.let({}, FilesByMapper)
5973
expanded_cache.each do |mapper_description, file_by_owner|
6074
file_by_owner.each do |file, owner|
61-
files_by_mapper[file] ||= []
62-
files_by_mapper[file] << mapper_description
75+
files_by_mappers[file] ||= Set.new([])
76+
files_by_mappers.fetch(file) << mapper_description
6377
end
6478
end
6579

66-
files_by_mapper
80+
files_by_mappers
6781
end
6882
end
83+
84+
sig { params(files: T::Array[String]).returns(FilesByMapper) }
85+
def files_by_mappers_via_file_fnmatch(files)
86+
files_by_mappers = T.let({}, FilesByMapper)
87+
88+
files.each do |file|
89+
files_by_mappers[file] ||= Set.new([])
90+
@raw_cache_contents.each do |mapper_description, globs_by_owner|
91+
# As much as I'd like to *not* special case the file annotations mapper, using File.fnmatch? on the thousands of files mapped by the
92+
# file annotations mapper is a lot of unnecessary extra work.
93+
# Therefore we can just check if the file is in the globs directly for file annotations, otherwise use File.fnmatch
94+
if mapper_description == OwnershipMappers::FileAnnotations::DESCRIPTION
95+
files_by_mappers.fetch(file) << mapper_description if globs_by_owner[file]
96+
else
97+
globs_by_owner.each do |glob, owner|
98+
if File.fnmatch?(glob, file, File::FNM_PATHNAME | File::FNM_EXTGLOB)
99+
files_by_mappers.fetch(file) << mapper_description
100+
end
101+
end
102+
end
103+
end
104+
end
105+
106+
files_by_mappers
107+
end
69108
end
70109
end
71110
end

lib/code_ownership/private/ownership_mappers/file_annotations.rb

+3-5
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ class FileAnnotations
2121
@@map_files_to_owners = T.let({}, T.nilable(T::Hash[String, ::CodeTeams::Team])) # rubocop:disable Style/ClassVars
2222

2323
TEAM_PATTERN = T.let(/\A(?:#|\/\/) @team (?<team>.*)\Z/.freeze, Regexp)
24+
DESCRIPTION = 'Annotations at the top of file'
2425

2526
sig do
2627
override.params(file: String).
@@ -51,11 +52,8 @@ def globs_to_owner(files)
5152
end
5253
def update_cache(cache, files)
5354
cache.merge!(globs_to_owner(files))
54-
55-
# TODO: Make `tracked_files` return a set
56-
fileset = Set.new(Private.tracked_files)
5755
invalid_files = cache.keys.select do |file|
58-
!fileset.include?(file)
56+
!Private.file_tracked?(file)
5957
end
6058
invalid_files.each do |invalid_file|
6159
cache.delete(invalid_file)
@@ -116,7 +114,7 @@ def remove_file_annotation!(filename)
116114

117115
sig { override.returns(String) }
118116
def description
119-
'Annotations at the top of file'
117+
DESCRIPTION
120118
end
121119

122120
sig { override.void }

lib/code_ownership/private/validations/files_have_owners.rb

+5-5
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@ class FilesHaveOwners
1010

1111
sig { override.params(files: T::Array[String], autocorrect: T::Boolean, stage_changes: T::Boolean).returns(T::Array[String]) }
1212
def validation_errors(files:, autocorrect: true, stage_changes: true)
13-
files_by_mapper = Private.glob_cache.files_by_mapper
14-
15-
files_not_mapped_at_all = files.select do |file|
16-
files_by_mapper.fetch(file, []).count == 0
13+
cache = Private.glob_cache
14+
file_mappings = cache.mapper_descriptions_that_map_files(files)
15+
files_not_mapped_at_all = file_mappings.select do |file, mapper_descriptions|
16+
mapper_descriptions.count == 0
1717
end
1818

1919
errors = T.let([], T::Array[String])
@@ -22,7 +22,7 @@ def validation_errors(files:, autocorrect: true, stage_changes: true)
2222
errors << <<~MSG
2323
Some files are missing ownership:
2424
25-
#{files_not_mapped_at_all.map { |file| "- #{file}" }.join("\n")}
25+
#{files_not_mapped_at_all.map { |file, mappers| "- #{file}" }.join("\n")}
2626
MSG
2727
end
2828

lib/code_ownership/private/validations/files_have_unique_owners.rb

+5-9
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,10 @@ class FilesHaveUniqueOwners
1010

1111
sig { override.params(files: T::Array[String], autocorrect: T::Boolean, stage_changes: T::Boolean).returns(T::Array[String]) }
1212
def validation_errors(files:, autocorrect: true, stage_changes: true)
13-
files_by_mapper = Private.glob_cache.files_by_mapper
14-
15-
files_mapped_by_multiple_mappers = {}
16-
files.each do |file|
17-
mappers = files_by_mapper.fetch(file, [])
18-
if mappers.count > 1
19-
files_mapped_by_multiple_mappers[file] = mappers
20-
end
13+
cache = Private.glob_cache
14+
file_mappings = cache.mapper_descriptions_that_map_files(files)
15+
files_mapped_by_multiple_mappers = file_mappings.select do |file, mapper_descriptions|
16+
mapper_descriptions.count > 1
2117
end
2218

2319
errors = T.let([], T::Array[String])
@@ -26,7 +22,7 @@ def validation_errors(files:, autocorrect: true, stage_changes: true)
2622
errors << <<~MSG
2723
Code ownership should only be defined for each file in one way. The following files have declared ownership in multiple ways.
2824
29-
#{files_mapped_by_multiple_mappers.map { |file, descriptions| "- #{file} (#{descriptions.join(', ')})" }.join("\n")}
25+
#{files_mapped_by_multiple_mappers.map { |file, descriptions| "- #{file} (#{descriptions.to_a.join(', ')})" }.join("\n")}
3026
MSG
3127
end
3228

spec/lib/code_ownership/cli_spec.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
expect(CodeOwnership).to receive(:validate!) do |args| # rubocop:disable RSpec/MessageSpies
1616
expect(args[:autocorrect]).to eq true
1717
expect(args[:stage_changes]).to eq true
18-
expect(args[:files]).to match_array(["app/services/my_file.rb", "frontend/javascripts/my_file.jsx"])
18+
expect(args[:files]).to be_nil
1919
end
2020
subject
2121
end

spec/lib/code_ownership/private/validations/files_have_owners_spec.rb

+28
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,14 @@ module CodeOwnership
2020
before do
2121
write_file('app/missing_ownership.rb', <<~CONTENTS)
2222
CONTENTS
23+
24+
write_file('app/some_other_file.rb', <<~CONTENTS)
25+
# @team Bar
26+
CONTENTS
27+
28+
write_file('config/teams/bar.yml', <<~CONTENTS)
29+
name: Bar
30+
CONTENTS
2331
end
2432

2533
context 'the file is not in unowned_globs' do
@@ -58,6 +66,26 @@ module CodeOwnership
5866
end
5967
end
6068

69+
context 'many files in owned_globs do not have an owner' do
70+
before do
71+
write_configuration
72+
73+
500.times do |i|
74+
write_file("app/missing_ownership#{i}.rb", <<~CONTENTS)
75+
CONTENTS
76+
end
77+
end
78+
79+
it 'lets the user know the file must have ownership' do
80+
expect { CodeOwnership.validate! }.to raise_error do |e|
81+
expect(e).to be_a CodeOwnership::InvalidCodeOwnershipConfigurationError
82+
expect(e.message).to include "Some files are missing ownership:"
83+
500.times do |i|
84+
expect(e.message).to include "- app/missing_ownership#{i}.rb"
85+
end
86+
end
87+
end
88+
end
6189
end
6290
end
6391
end

0 commit comments

Comments
 (0)