From 4f4d2e9e0bef3be548ba87cdb798de35462b47d0 Mon Sep 17 00:00:00 2001 From: Ian Limarta Date: Tue, 16 Jul 2024 11:19:18 -0500 Subject: [PATCH] Update packges. Accommodate for various string types in CSV. Remove unused packages. --- Project.toml | 17 +++++++---------- src/distributions/add_typos.jl | 4 ++-- src/distributions/choose_proportionally.jl | 5 +++++ src/distributions/distributions.jl | 2 +- src/distributions/string_prior.jl | 10 +++++----- src/distributions/time_prior.jl | 2 +- src/utils.jl | 2 +- 7 files changed, 22 insertions(+), 20 deletions(-) diff --git a/Project.toml b/Project.toml index a9941a4..3b9c924 100644 --- a/Project.toml +++ b/Project.toml @@ -4,23 +4,20 @@ authors = ["alex-lew "] version = "0.1.0" [deps] -BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" -DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" -Interpolations = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59" JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" LightGraphs = "093fc24a-ae57-5d10-9952-331d41423f4d" MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" -Memoize = "c03570c3-d221-55d1-a50c-7939bbd78826" -Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" -Polynomials = "f27b6e38-b328-58d1-80ce-0feddd5e7a45" -PyPlot = "d330b81b-6aea-500a-939a-2ce795aea3ee" -Revise = "295af30f-e4ad-537b-8983-00126c2a3abe" -StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" StringDistances = "88034a9c-02f8-509d-84a9-84ec65e18404" [compat] -CSV = "0.8.5" \ No newline at end of file +CSV = "0.10.14" +DataFrames = "1.6.1" +julia = "1.10" +Distributions = "0.25.109" +LightGraphs = "1.3.5" +MacroTools = "0.5.13" +StringDistances = "0.11.3" diff --git a/src/distributions/add_typos.jl b/src/distributions/add_typos.jl index 8ffcc4e..74b5d3e 100644 --- a/src/distributions/add_typos.jl +++ b/src/distributions/add_typos.jl @@ -33,7 +33,7 @@ end const IMPOSSIBLE = -1e5 -random(::AddTypos, word::String, max_typos=nothing) = begin +random(::AddTypos, word::AbstractString, max_typos=nothing) = begin num_typos = rand(NegativeBinomial(ceil(length(word) / 5.0), 0.9)) num_typos = isnothing(max_typos) ? num_typos : min(max_typos, num_typos) for i=1:num_typos @@ -47,7 +47,7 @@ end const add_typos_density_dict = Dict{Tuple{String, String}, Float64}() const LETTERS_PER_TYPO = 5.0 -logdensity(::AddTypos, observed::Union{String,Missing}, word::String, max_typos=nothing) = begin +logdensity(::AddTypos, observed::Union{<:AbstractString,Missing}, word::AbstractString, max_typos=nothing) = begin if ismissing(observed) return 0.0 end diff --git a/src/distributions/choose_proportionally.jl b/src/distributions/choose_proportionally.jl index 7e468f2..fd29f9b 100644 --- a/src/distributions/choose_proportionally.jl +++ b/src/distributions/choose_proportionally.jl @@ -1,3 +1,8 @@ +""" + struct ChooseProportionally <: PCleanDistribution + +A categorical distribution. +""" struct ChooseProportionally <: PCleanDistribution end function random(::ChooseProportionally, options, probs::AbstractArray{T}) where T <: Real diff --git a/src/distributions/distributions.jl b/src/distributions/distributions.jl index f6911d6..52cf519 100644 --- a/src/distributions/distributions.jl +++ b/src/distributions/distributions.jl @@ -5,7 +5,7 @@ function random end function logdensity end struct ProposalDummyValue end -const proposal_dummy_value = ProposalDummyValue() +const PROPOSAL_DUMMY_VALUE = ProposalDummyValue() # Can this distribution enumerate values on which # its posterior support is concentrated? If the distribution's diff --git a/src/distributions/string_prior.jl b/src/distributions/string_prior.jl index e8f134a..ab648fe 100644 --- a/src/distributions/string_prior.jl +++ b/src/distributions/string_prior.jl @@ -13,19 +13,19 @@ const alphabet_lookup = Dict([l => i for (i, l) in enumerate(alphabet)]) has_discrete_proposal(::StringPrior) = true # Assume proposal_atoms are unique. -function discrete_proposal(::StringPrior, min_length::Int, max_length::Int, proposal_atoms::Vector{String})::Tuple{Vector{Union{String, ProposalDummyValue}}, Vector{Float64}} - options = Union{String, ProposalDummyValue}[proposal_atoms..., proposal_dummy_value] +function discrete_proposal(::StringPrior, min_length::Int, max_length::Int, proposal_atoms::Vector{T})::Tuple{Vector{Union{T, ProposalDummyValue}}, Vector{Float64}} where T<:AbstractString + options = Union{T, ProposalDummyValue}[proposal_atoms..., PROPOSAL_DUMMY_VALUE] probs = map(s -> logdensity(StringPrior(), s, min_length, max_length, proposal_atoms), proposal_atoms) total = logsumexp(probs) probs = Float64[probs..., log1p(-exp(total))] return (options, probs) end -discrete_proposal_dummy_value(::StringPrior, min_length::Int, max_length::Int, proposal_atoms::Vector{String}) = begin +function discrete_proposal_dummy_value(::StringPrior, min_length::Int, max_length::Int, proposal_atoms::Vector{<:AbstractString}) join(fill("*", Int(floor((min_length + max_length) / 2)))) end -random(::StringPrior, min_length::Int, max_length::Int, proposal_atoms::Vector{String}) = begin +random(::StringPrior, min_length::Int, max_length::Int, proposal_atoms::Vector{<:AbstractString}) = begin len = rand(DiscreteUniform(min_length, max_length)) letters = [] for i=1:len @@ -40,7 +40,7 @@ end const UNUSUAL_LETTER_PENALTY = 1000 const string_prior_density_dict = Dict{Tuple{String, Int, Int}, Float64}() -function logdensity(::StringPrior, observed::String, min_length::Int, max_length::Int, proposal_atoms::Vector{String}) +function logdensity(::StringPrior, observed::AbstractString, min_length::Int, max_length::Int, proposal_atoms::Vector{<:AbstractString}) get!(string_prior_density_dict, (observed, min_length, max_length)) do if length(observed) < min_length || length(observed) > max_length return -Inf diff --git a/src/distributions/time_prior.jl b/src/distributions/time_prior.jl index c22f6f5..7184092 100644 --- a/src/distributions/time_prior.jl +++ b/src/distributions/time_prior.jl @@ -6,7 +6,7 @@ has_discrete_proposal(::TimePrior) = true # Assume proposal_atoms are unique. function discrete_proposal(::TimePrior, proposal_atoms::Vector{String}) - options = [proposal_atoms..., proposal_dummy_value] + options = [proposal_atoms..., PROPOSAL_DUMMY_VALUE] probs = map(x -> isnothing(match(r"^\d?\d:\d\d [ap]\.m\.$", x)) ? -Inf : -log(1440), proposal_atoms) total = logsumexp(probs) probs = [probs..., log1p(-exp(total))] diff --git a/src/utils.jl b/src/utils.jl index 2561061..5c8ec20 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -30,7 +30,7 @@ end Like `normalize`, but in log-space and more numerically stable. """ -logprobs(proportions::Vector{Float64}) = begin +function logprobs(proportions::AbstractVector{<:Real}) l = log.(proportions) #l .- logsumexp(l) end