Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
200 changes: 200 additions & 0 deletions src/PDB/Kabsch.jl
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,206 @@ function superimpose(
end
end

# Structure similarity metrics
# ----------------------------

_default_matches(A::AbstractVector{PDBResidue}, B::AbstractVector{PDBResidue}) =
((i, i) for i in 1:min(length(A), length(B)))

@inline _has_ca(res::PDBResidue) = !isempty(findatoms(res, "CA"))

function _valid_matches(
A::AbstractVector{PDBResidue},
B::AbstractVector{PDBResidue},
matches,
)
lenA, lenB = length(A), length(B)
valid = Tuple{Int,Int}[]
for (i, j) in matches
if (1 <= i <= lenA) && (1 <= j <= lenB) && _has_ca(A[i]) && _has_ca(B[j])
push!(valid, (i, j))
end
end
return valid
end

function _ca_distances(
A::AbstractVector{PDBResidue},
B::AbstractVector{PDBResidue},
matches::AbstractVector{<:Tuple{Int,Int}},
)
distances = Vector{Float64}(undef, length(matches))
@inbounds for idx in eachindex(matches)
i, j = matches[idx]
distances[idx] = distance(getCA(A[i]), getCA(B[j]))
end
return distances
end

function _fragment_windows(
matches::AbstractVector{<:Tuple{Int,Int}},
window_sizes::Tuple,
)
fragments = Vector{Vector{Tuple{Int,Int}}}()
isempty(matches) && return fragments
push!(fragments, matches)
for w in window_sizes
step = max(1, w ÷ 2)
start = 1
while start <= length(matches)
stop = min(length(matches), start + w - 1)
if stop - start + 1 >= 3
push!(fragments, matches[start:stop])
end
start += step
end
end
return fragments
end

function _best_counts(
A::AbstractVector{PDBResidue},
B::AbstractVector{PDBResidue},
fragments::AbstractVector{<:AbstractVector{<:Tuple{Int,Int}}},
matches_all::AbstractVector{<:Tuple{Int,Int}},
cutoffs::AbstractVector{<:Real},
)
best_counts = Dict{Float64,Int}(float(c) => 0 for c in cutoffs)
for fragment in fragments
Asuper, Bsuper, _ = superimpose(A, B, fragment)
distances = _ca_distances(Asuper, Bsuper, matches_all)
for cutoff in cutoffs
cutofff = float(cutoff)
cnt = count(d -> d <= cutofff, distances)
if cnt > best_counts[cutofff]
best_counts[cutofff] = cnt
end
end
end
return best_counts
end

"""
gdt_per_cutoff(A, B; matches = nothing, cutoffs = (1.0, 2.0, 4.0, 8.0), local_search = true, window_sizes = (8, 16, 32))

Compute the Global Distance Test (GDT) percentage for each distance cutoff in `cutoffs`
between two structures `A` (reference) and `B` (model). Corresponding residues are
provided via `matches`; if it is `nothing`, a 1:1 mapping up to the shortest structure
length is assumed.

Only Cα atoms are considered. When `local_search` is `true` (default), multiple candidate
superpositions are generated from contiguous alignment fragments of lengths given by
`window_sizes` to approximate the LGA search; otherwise, a single global superposition is
used. The result is a `Dict` mapping each cutoff to the fraction of aligned residues (in
percent) that can be placed within the cutoff distance.
"""
function gdt_per_cutoff(
A::AbstractVector{PDBResidue},
B::AbstractVector{PDBResidue};
matches = nothing,
cutoffs = (1.0, 2.0, 4.0, 8.0),
local_search::Bool = true,
window_sizes = (8, 16, 32),
)
matches_iter = isnothing(matches) ? _default_matches(A, B) : matches
matches_all = _valid_matches(A, B, matches_iter)
if isempty(matches_all)
return Dict(float(c) => 0.0 for c in cutoffs)
end

cutoffs_vec = Float64.(collect(cutoffs))
fragments = local_search ? _fragment_windows(matches_all, window_sizes) : [matches_all]
best_counts = _best_counts(A, B, fragments, matches_all, cutoffs_vec)
N = length(matches_all)
return Dict(c => 100.0 * best_counts[c] / N for c in keys(best_counts))
end

"""
gdt_ts(A, B; kwargs...)

Compute the CASP-style GDT_TS score (average of GDT at 1, 2, 4, and 8 Å) between `A`
and `B`. Keyword arguments are forwarded to [`gdt_per_cutoff`](@ref).
"""
function gdt_ts(
A::AbstractVector{PDBResidue},
B::AbstractVector{PDBResidue};
matches = nothing,
cutoffs = (1.0, 2.0, 4.0, 8.0),
kwargs...,
)
scores = gdt_per_cutoff(A, B; matches = matches, cutoffs = cutoffs, kwargs...)
return mean(values(scores))
end

"""
gdt_ha(A, B; kwargs...)

Compute the high-accuracy GDT_HA score (average of GDT at 0.5, 1, 2, and 4 Å) between
`A` and `B`. Keyword arguments are forwarded to [`gdt_per_cutoff`](@ref).
"""
function gdt_ha(
A::AbstractVector{PDBResidue},
B::AbstractVector{PDBResidue};
matches = nothing,
cutoffs = (0.5, 1.0, 2.0, 4.0),
kwargs...,
)
scores = gdt_per_cutoff(A, B; matches = matches, cutoffs = cutoffs, kwargs...)
return mean(values(scores))
end

tm_d0(L::Integer) = max(0.5, 1.24 * (L - 15)^(1 / 3) - 1.8)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Avoid DomainError in tm_score for short targets

tm_d0 currently computes 1.24 * (L - 15)^(1 / 3) - 1.8; when Ltarget is below 15 (short peptides), raising a negative base to the fractional exponent throws a DomainError before the max(0.5, …) guard can run. That means tm_score crashes instead of returning a score for small structures. Handle the Ltarget < 15 case explicitly (e.g., real cube root or early clamp) so short targets are supported.

Useful? React with 👍 / 👎.


function _tm_score_from_distances(distances::AbstractVector{<:Real}, Ltarget::Integer)
d0 = tm_d0(Ltarget)
invL = 1.0 / Ltarget
s = 0.0
@inbounds for d in distances
s += 1.0 / (1.0 + (d / d0)^2)
end
return invL * s
end

"""
tm_score(A, B; matches = nothing, Ltarget = length(A), local_search = true, window_sizes = (8, 16, 32))

Approximate the TM-score between reference structure `A` and model `B` using only Cα
atoms. `Ltarget` is the length of the reference structure used for normalization (default
`length(A)`). When `local_search` is `true`, multiple fragment-based superpositions are
evaluated and the maximum TM-score is returned; otherwise, a single global superposition
is used.
"""
function tm_score(
A::AbstractVector{PDBResidue},
B::AbstractVector{PDBResidue};
matches = nothing,
Ltarget::Integer = length(A),
local_search::Bool = true,
window_sizes = (8, 16, 32),
)
if Ltarget <= 0
return 0.0
end

matches_iter = isnothing(matches) ? _default_matches(A, B) : matches
matches_all = _valid_matches(A, B, matches_iter)
if isempty(matches_all)
return 0.0
end

fragments = local_search ? _fragment_windows(matches_all, window_sizes) : [matches_all]
best_score = 0.0
for fragment in fragments
Asuper, Bsuper, _ = superimpose(A, B, fragment)
distances = _ca_distances(Asuper, Bsuper, matches_all)
score = _tm_score_from_distances(distances, Ltarget)
if score > best_score
best_score = score
end
end
return best_score
end

# RMSF: Root Mean-Square-average distance (Fluctuation)
# -----------------------------------------------------

Expand Down
4 changes: 4 additions & 0 deletions src/PDB/PDB.jl
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,10 @@ export # PDBResidues
superimpose,
mean_coordinates,
rmsf,
gdt_per_cutoff,
gdt_ts,
gdt_ha,
tm_score,
# MIToS.Utils
All,
read_file,
Expand Down
49 changes: 49 additions & 0 deletions test/PDB/StructureSimilarity.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
@testset "Structure similarity (GDT/TM-score)" begin
structure = read_file(joinpath(DATA, "1CBN.pdb"), PDBFile, group = "ATOM", model = "1")

@test gdt_ts(structure, structure) ≈ 100.0 atol = 1.0e-8
@test gdt_ha(structure, structure) ≈ 100.0 atol = 1.0e-8
@test tm_score(structure, structure) ≈ 1.0 atol = 1.0e-8

rot_z = [0.0 -1.0 0.0; 1.0 0.0 0.0; 0.0 0.0 1.0]
trans = [3.0, -2.0, 1.5]

function transform_residues(residues, rotation, translation)
coords = coordinatesmatrix(residues)
transformed = coords * rotation .+ transpose(translation)
return change_coordinates(residues, transformed)
end

transformed = transform_residues(structure, rot_z, trans)

@test gdt_ts(structure, transformed) ≈ 100.0 atol = 1.0e-6
@test gdt_ha(structure, transformed) ≈ 100.0 atol = 1.0e-6
@test tm_score(structure, transformed) ≈ 1.0 atol = 1.0e-6
@test gdt_ts(structure, transformed; local_search = false) ≈ 100.0 atol = 1.0e-6

function perturb_last_residues(residues, n::Int, shift)
coords = coordinatesmatrix(residues)
offset = 1
ranges = Vector{UnitRange{Int}}()
for res in residues
len = length(res)
push!(ranges, offset:(offset + len - 1))
offset += len
end
perturbed = copy(coords)
for r in ranges[(end - n + 1):end]
perturbed[r, :] .+= reshape(shift, 1, :)
end
return change_coordinates(residues, perturbed)
end

shifted = perturb_last_residues(structure, 5, [5.0, 0.0, 0.0])

noisy_gdt = gdt_ts(structure, shifted)
noisy_ha = gdt_ha(structure, shifted)
noisy_tm = tm_score(structure, shifted)

@test noisy_gdt < 100.0
@test noisy_ha < noisy_gdt
@test 0.0 < noisy_tm < 1.0
end
1 change: 1 addition & 0 deletions test/tests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ end
include("PDB/PDBResidues.jl")
include("PDB/MMCIFParser.jl")
include("PDB/Plots.jl")
include("PDB/StructureSimilarity.jl")
end

# SIFTS
Expand Down
Loading