Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ version = "0.1.0"
[deps]
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341"
FITSIO = "525bcba6-941b-5504-bd06-fd0dc1a4d2eb"
HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f"
Expand All @@ -21,6 +22,7 @@ StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
[compat]
DataFrames = "1.3"
Distributions = "0.25"
DocStringExtensions = "0.9.4"
FFTW = "1.4"
FITSIO = "0.16"
HDF5 = "0.16"
Expand Down
4 changes: 3 additions & 1 deletion src/Stingray.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
module Stingray

using DocStringExtensions

using ResumableFunctions, StatsBase, Statistics, DataFrames
using FFTW, NaNMath, FITSIO, Intervals
using ProgressBars: tqdm as show_progress
Expand Down Expand Up @@ -33,6 +35,6 @@ export bin_intervals_from_gtis
include("utils.jl")

include("events.jl")
export readevents, EventList, DictMetadata
export readevents, EventList, filter_time!, filter_energy!

end
241 changes: 183 additions & 58 deletions src/events.jl
Original file line number Diff line number Diff line change
@@ -1,85 +1,210 @@
"""
DictMetadata
FITSMetadata{H}
A structure containing metadata from FITS file headers.
Metadata associated with a FITS or events file (`.fits` or `.evt`).
## Fields
$(FIELDS)
"""
struct FITSMetadata{H}
"Path to the FITS file."
filepath::String
"HDU index that the metadata was read from."
hdu::Int
"Units of energy (currently just ENERGY or PI or HPA)"
energy_units::Union{Nothing,String}
"Extra columns that were requested during read."
extra_columns::Dict{String,Vector}
"Fits headers from the selected HDU."
headers::H
end

function Base.show(io::IO, ::MIME"text/plain", @nospecialize(m::FITSMetadata))
println(
io,
"FITSMetadata for $(m.filepath)[$(m.hdu)] with $(length(m.extra_columns)) extra column(s)",
)
end

"""
EventList{TimeType, MetaType <: FITSMetadata}
Container for an events list. Generally should not be directly constructed, but
read from file using [`readevents`](@ref).
- `headers::Vector{Dict{String,Any}}`: A vector of dictionaries containing header information from each HDU.
$(FIELDS)
"""
struct DictMetadata
headers::Vector{Dict{String,Any}}
struct EventList{TimeType<:AbstractVector,MetaType<:FITSMetadata}
"Vector with recorded times."
times::TimeType
"Vector with recorded energies (else `nothing`)"
energies::Union{Nothing,TimeType}
meta::MetaType
end

function Base.show(io::IO, ::MIME"text/plain", @nospecialize(ev::EventList))
print(io, "EventList with $(length(ev.times)) times")
if !isnothing(ev.energies)
print(io, " and energies")
end
println(io)
end

"""
filter_time!(f, ev::EventList)
Filter all columns of the eventlist based on a predicate `f` applied to the
times.
# Example
```julia
# filter only positive times
filter_time!(t -> t > 0, ev)
```
See also [`filter_energy!`](@ref).
"""
EventList{T}
filter_time!(f, ev::EventList) = filter_on!(f, ev.times, ev)

A structure containing event data from a FITS file.
"""
filter_energy!(f, ev::EventList)
## Fields
Filter all columns of the eventlist based on a predicate `f` applied to the
energies.
- `filename::String`: Path to the source FITS file.
- `times::Vector{T}`: Vector of event times.
- `energies::Vector{T}`: Vector of event energies.
- `metadata::DictMetadata`: Metadata information extracted from the FITS file headers.
See also [`filter_time!`](@ref).
"""
struct EventList{T}
filename::String
times::Vector{T}
energies::Vector{T}
metadata::DictMetadata
function filter_energy!(f, ev::EventList)
@assert !isnothing(ev.energies) "No energies present in the EventList."
filter_on!(f, ev.energies, ev)
end

function filter_on!(f, src_col::AbstractVector, ev::EventList)
@assert size(src_col) == size(ev.times)

# modified from the Base.filter! implementation
j = firstindex(ev.times)

for i in eachindex(ev.times)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does this do?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

eachindex gives you a lazy iterator that goes over all of the indices of an array. If your array has unusual indexing (e.g. starts at 0 or something else, or maybe uses IndexCartesian), it gives you something that will hit every element.

In this case, it's equivalent to firstindex(ev.times):lastindex(ev.times), or equivalently 1:length(ev.times), but would work with other array types just fine.

Somewhere in the Julia docs it says to prefer eachindex over 1:length so that code is more portable. In this case it really doesn't make a difference. So many ways to skin a cat!

ev.times[j] = ev.times[i]

if !isnothing(ev.energies)
ev.energies[j] = ev.energies[i]
end

for (_, col) in ev.meta.extra_columns
col[j] = col[i]
end

predicate = f(src_col[i])::Bool
j = ifelse(predicate, nextind(ev.times, j), j)
end

if j <= lastindex(ev.times)
resize!(ev.times, j-1)

if !isnothing(ev.energies)
resize!(ev.energies, j-1)
end

for (_, col) in ev.meta.extra_columns
resize!(col, j-1)
end
end

ev
end

"""
readevents(path; T = Float64)
colnames(file::AbstractString; hdu = 2)
Read event data from a FITS file into an EventList structure. The `path` is a
string that points to the location of the FITS file. `T` is used to specify
which numeric type to convert the data to.
Return a vector of all column names of `file`, reading a specific data unit
`hdu` if applicable.
"""
function colnames(file::AbstractString; hdu = 2)
FITS(file) do f
selected_hdu = f[hdu]
FITSIO.colnames(selected_hdu)
end
end

Returns an [`EventList`](@ref) containing the extracted data.
"""
read_energy_column(hdu; energy_alternatives = ["ENERGY", "PI", "PHA"], T = Float64)
## Notes
Attempt to read the energy column of a HDU from a list of alternative names.
Returns `Tuple{String,Vector{T}}` with the column that applied, and the column
cast to `Vector{T}`.
The function extracts `TIME` and `ENERGY` columns from any TableHDU in the FITS
file. All headers from each HDU are collected into the metadata field.
If no column applies, returns `(nothing, nothing)`.
"""
function readevents(path; T = Float64)
headers = Dict{String,Any}[]
times = T[]
energies = T[]

FITS(path, "r") do f
for i = 1:length(f) # Iterate over HDUs
hdu = f[i]
header_dict = Dict{String,Any}()
for key in keys(read_header(hdu))
header_dict[string(key)] = read_header(hdu)[key]
end
push!(headers, header_dict)

# Check if the HDU is a table
if isa(hdu, TableHDU)
# Get column names using the correct FITSIO method
colnames = FITSIO.colnames(hdu)

if "TIME" in colnames
times = convert(Vector{T}, read(hdu, "TIME"))
end
if "ENERGY" in colnames
energies = convert(Vector{T}, read(hdu, "ENERGY"))
end
end
function read_energy_column(hdu; energy_alternatives = ["ENERGY", "PI", "PHA"], T = Float64)
all_cols = uppercase.(FITSIO.colnames(hdu))
for col_name in energy_alternatives
if col_name in all_cols
return col_name, convert.(T, read(hdu, col_name))
end
end
if isempty(times)
@warn "No TIME data found in FITS file $(path). Time series analysis will not be possible."
nothing, nothing
end

"""
readevents(path; kwargs...)
Read an [`EventList`](@ref) from a FITS file. Will attempt to read an energy
column if one exists.
Keyword arguments and defaults:
- `hdu = 2`: which HDU unit to read
- `T = Float64`: the type to cast the time and energy columns to.
- `extra_columns = []`: extra columns to read from the same HDU.
All other `kwargs` are passed to [`Stingray.read_energy_column`](@ref).
"""
function readevents(
path::AbstractString;
hdu = 2,
T = Float64,
sort = false,
extra_columns = [],
kwargs...,
)
time, energy, ecol, header, add = FITS(path, "r") do f
selected_hdu = f[hdu]
header = read_header(selected_hdu)
time = convert.(T, read(selected_hdu, "TIME"))
energy_column, energy = read_energy_column(selected_hdu; T = T, kwargs...)

add = Dict{String,Vector}(
name => read(selected_hdu, name) for name in extra_columns
)

(time, energy, energy_column, header, add)
end

if !isnothing(energy)
@assert size(time) == size(energy) "Time and energy do not match sizes ($(size(time)) != $(size(energy)))."
end

if isempty(energies)
@warn "No ENERGY data found in FITS file $(path). Energy spectrum analysis will not be possible."
if !issorted(time)
if sort
# cheap and cheerful way to sort two arrays
# there are more performant ways to do this if it becomes an issue
I = sortperm(time)
time = time[I]
if !isnothing(energy)
energy = energy[I]
end
for (k, col) in add
add[k] = col[I]
end
else
@assert false "Times are not sorted (pass `sort = true` to force sorting)."
end
end

metadata = DictMetadata(headers)
return EventList{T}(path, times, energies, metadata)
EventList(
time::Vector{T},
energy,
FITSMetadata(path, hdu, ecol, add, header::FITSIO.FITSHeader),
)
end
13 changes: 13 additions & 0 deletions test/Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[deps]
CodecZlib = "944b1d66-785c-5afd-91f1-9de20f533193"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341"
FITSIO = "525bcba6-941b-5504-bd06-fd0dc1a4d2eb"
HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f"
Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
Tar = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
38 changes: 35 additions & 3 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,38 @@ using Stingray
using Test
using FFTW, Distributions, Statistics, StatsBase, HDF5, FITSIO
using Logging
include("test_fourier.jl")
include("test_gti.jl")
include("test_events.jl")

TEST_DATA_PATH = joinpath(@__DIR__, "_data")

TEST_DATA_REMOTE = "https://www.star.bristol.ac.uk/fergus/stingray/testdata.tar.gz"
TEST_DATA_SHA256 = "cd5311e5ea1aaf7eef8253a369bd8ba6fbc10c922a22281adc14be61dc6e468c"

# assert the test data is available
if !isdir(TEST_DATA_PATH)
using Downloads, Tar, CodecZlib, SHA
dest = TEST_DATA_PATH * ".tar.gz"
Downloads.download(TEST_DATA_REMOTE, dest)

open(dest) do tar_gz
@assert bytes2hex(sha256(tar_gz)) == TEST_DATA_SHA256
seek(tar_gz, 0)
tar = GzipDecompressorStream(tar_gz)
Tar.extract(tar, TEST_DATA_PATH)
end

rm(dest)
else
@info "Test data is present"
end

@testset "GTI" begin
include("test_gti.jl")
end

@testset "Fourier" begin
include("test_fourier.jl")
end

@testset "Eventlist" begin
include("test_events.jl")
end
Loading