Skip to content

Commit

Permalink
init
Browse files Browse the repository at this point in the history
  • Loading branch information
tonytonyjan committed Sep 6, 2014
0 parents commit 2550338
Show file tree
Hide file tree
Showing 4 changed files with 156 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .rspec
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
--color
--require spec_helper
50 changes: 50 additions & 0 deletions lib/jaro_winkler.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
module JaroWinkler
module_function
def jaro_distance s1, s2
return 0.0 if s1.empty? || s2.empty?
length1, length2 = s1.length, s2.length
window_size = ([length1, length2].max / 2) - 1
matches = 0.0
transpositions = 0
previous_index = -1
s1.chars.each_with_index do |c1, i|
max_index = length2 - 1
left = i - window_size
right = i + window_size
left = 0 if left < 0
right = max_index if right > max_index
matched = false
found = false
s2[left..right].chars.each_with_index do |c2, j|
if c1 == c2
matched = true
s2_index = left + j
unless found
if s2_index > previous_index
previous_index = s2_index
found = true
end
end
end
end
if matched
matches += 1
transpositions += 1 unless found
end
end
# Don't divide transpositions by 2 since it's been counted directly by above code.
matches == 0 ? 0 : 1.0 / 3.0 * (matches / length1 + matches / length2 + (matches - transpositions) / matches)
end

def jaro_winkler_distance s1, s2, weight: 0.1, threshold: 0.7, case_match: false
raise 'Scaling factor should not exceed 0.25, otherwise the distance can become larger than 1' if weight > 0.25
s1, s2 = s1.downcase, s2.downcase if case_match
distance = jaro_distance(s1, s2)
prefix = 0
max_length = [4, s1.length, s2.length].min
s1[0, max_length].chars.each_with_index do |c1, i|
c1 == s2[i] ? prefix += 1 : break
end
distance < threshold ? distance : distance + ((prefix * weight) * (1 - distance))
end
end
15 changes: 15 additions & 0 deletions spec/jaro_winkler_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
require 'jaro_winkler'

describe JaroWinkler do
it 'works' do
expect(JaroWinkler.jaro_winkler_distance("MARTHA", "MARHTA")).to be_within(0.0001).of(0.9611)
expect(JaroWinkler.jaro_winkler_distance("DIXON", "DICKSONX")).to be_within(0.0001).of(0.8133)
expect(JaroWinkler.jaro_winkler_distance("abcvwxyz", "cabvwxyz")).to be_within(0.0001).of(0.9583)
expect(JaroWinkler.jaro_winkler_distance("DWAYNE", "DUANE")).to eq 0.84
expect(JaroWinkler.jaro_winkler_distance("tony", "tony")).to eq 1.0
expect(JaroWinkler.jaro_winkler_distance("tonytonyjan", "tonytonyjan")).to eq 1.0
expect(JaroWinkler.jaro_winkler_distance("", "")).to eq 0.0
expect(JaroWinkler.jaro_winkler_distance("tony", "")).to eq 0.0
expect(JaroWinkler.jaro_winkler_distance("", "tony")).to eq 0.0
end
end
89 changes: 89 additions & 0 deletions spec/spec_helper.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
# This file was generated by the `rspec --init` command. Conventionally, all
# specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
# The generated `.rspec` file contains `--require spec_helper` which will cause this
# file to always be loaded, without a need to explicitly require it in any files.
#
# Given that it is always loaded, you are encouraged to keep this file as
# light-weight as possible. Requiring heavyweight dependencies from this file
# will add to the boot time of your test suite on EVERY test run, even for an
# individual file that may not need all of that loaded. Instead, consider making
# a separate helper file that requires the additional dependencies and performs
# the additional setup, and require it from the spec files that actually need it.
#
# The `.rspec` file also contains a few flags that are not defaults but that
# users commonly want.
#
# See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
RSpec.configure do |config|
# rspec-expectations config goes here. You can use an alternate
# assertion/expectation library such as wrong or the stdlib/minitest
# assertions if you prefer.
config.expect_with :rspec do |expectations|
# This option will default to `true` in RSpec 4. It makes the `description`
# and `failure_message` of custom matchers include text for helper methods
# defined using `chain`, e.g.:
# be_bigger_than(2).and_smaller_than(4).description
# # => "be bigger than 2 and smaller than 4"
# ...rather than:
# # => "be bigger than 2"
expectations.include_chain_clauses_in_custom_matcher_descriptions = true
end

# rspec-mocks config goes here. You can use an alternate test double
# library (such as bogus or mocha) by changing the `mock_with` option here.
config.mock_with :rspec do |mocks|
# Prevents you from mocking or stubbing a method that does not exist on
# a real object. This is generally recommended, and will default to
# `true` in RSpec 4.
mocks.verify_partial_doubles = true
end

# The settings below are suggested to provide a good initial experience
# with RSpec, but feel free to customize to your heart's content.
=begin
# These two settings work together to allow you to limit a spec run
# to individual examples or groups you care about by tagging them with
# `:focus` metadata. When nothing is tagged with `:focus`, all examples
# get run.
config.filter_run :focus
config.run_all_when_everything_filtered = true
# Limits the available syntax to the non-monkey patched syntax that is recommended.
# For more details, see:
# - http://myronmars.to/n/dev-blog/2012/06/rspecs-new-expectation-syntax
# - http://teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/
# - http://myronmars.to/n/dev-blog/2014/05/notable-changes-in-rspec-3#new__config_option_to_disable_rspeccore_monkey_patching
config.disable_monkey_patching!
# This setting enables warnings. It's recommended, but in some cases may
# be too noisy due to issues in dependencies.
config.warnings = true
# Many RSpec users commonly either run the entire suite or an individual
# file, and it's useful to allow more verbose output when running an
# individual spec file.
if config.files_to_run.one?
# Use the documentation formatter for detailed output,
# unless a formatter has already been configured
# (e.g. via a command-line flag).
config.default_formatter = 'doc'
end
# Print the 10 slowest examples and example groups at the
# end of the spec run, to help surface which specs are running
# particularly slow.
config.profile_examples = 10
# Run specs in random order to surface order dependencies. If you find an
# order dependency and want to debug it, you can fix the order by providing
# the seed, which is printed after each run.
# --seed 1234
config.order = :random
# Seed global randomization in this process using the `--seed` CLI option.
# Setting this allows you to use `--seed` to deterministically reproduce
# test failures related to randomization by passing the same `--seed` value
# as the one that triggered the failure.
Kernel.srand config.seed
=end
end

0 comments on commit 2550338

Please sign in to comment.