diff --git a/Project.toml b/Project.toml index a8edb55..09f18fa 100644 --- a/Project.toml +++ b/Project.toml @@ -6,6 +6,7 @@ version = "0.1.0" [deps] DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" +DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341" FITSIO = "525bcba6-941b-5504-bd06-fd0dc1a4d2eb" HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" @@ -21,6 +22,7 @@ StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" [compat] DataFrames = "1.3" Distributions = "0.25" +DocStringExtensions = "0.9.4" FFTW = "1.4" FITSIO = "0.16" HDF5 = "0.16" diff --git a/src/Stingray.jl b/src/Stingray.jl index 6d8e74f..f7974e2 100644 --- a/src/Stingray.jl +++ b/src/Stingray.jl @@ -1,5 +1,7 @@ module Stingray +using DocStringExtensions + using ResumableFunctions, StatsBase, Statistics, DataFrames using FFTW, NaNMath, FITSIO, Intervals using ProgressBars: tqdm as show_progress @@ -33,6 +35,6 @@ export bin_intervals_from_gtis include("utils.jl") include("events.jl") -export readevents, EventList, DictMetadata +export readevents, EventList, filter_time!, filter_energy! end diff --git a/src/events.jl b/src/events.jl index 70c54c7..0d7039d 100644 --- a/src/events.jl +++ b/src/events.jl @@ -1,85 +1,210 @@ """ - DictMetadata + FITSMetadata{H} -A structure containing metadata from FITS file headers. +Metadata associated with a FITS or events file (`.fits` or `.evt`). -## Fields +$(FIELDS) +""" +struct FITSMetadata{H} + "Path to the FITS file." + filepath::String + "HDU index that the metadata was read from." + hdu::Int + "Units of energy (currently just ENERGY or PI or HPA)" + energy_units::Union{Nothing,String} + "Extra columns that were requested during read." + extra_columns::Dict{String,Vector} + "Fits headers from the selected HDU." + headers::H +end + +function Base.show(io::IO, ::MIME"text/plain", @nospecialize(m::FITSMetadata)) + println( + io, + "FITSMetadata for $(m.filepath)[$(m.hdu)] with $(length(m.extra_columns)) extra column(s)", + ) +end + +""" + EventList{TimeType, MetaType <: FITSMetadata} + +Container for an events list. Generally should not be directly constructed, but +read from file using [`readevents`](@ref). -- `headers::Vector{Dict{String,Any}}`: A vector of dictionaries containing header information from each HDU. +$(FIELDS) """ -struct DictMetadata - headers::Vector{Dict{String,Any}} +struct EventList{TimeType<:AbstractVector,MetaType<:FITSMetadata} + "Vector with recorded times." + times::TimeType + "Vector with recorded energies (else `nothing`)" + energies::Union{Nothing,TimeType} + meta::MetaType end +function Base.show(io::IO, ::MIME"text/plain", @nospecialize(ev::EventList)) + print(io, "EventList with $(length(ev.times)) times") + if !isnothing(ev.energies) + print(io, " and energies") + end + println(io) +end + +""" + filter_time!(f, ev::EventList) + +Filter all columns of the eventlist based on a predicate `f` applied to the +times. + +# Example + +```julia +# filter only positive times +filter_time!(t -> t > 0, ev) +``` + +See also [`filter_energy!`](@ref). """ - EventList{T} +filter_time!(f, ev::EventList) = filter_on!(f, ev.times, ev) -A structure containing event data from a FITS file. +""" + filter_energy!(f, ev::EventList) -## Fields +Filter all columns of the eventlist based on a predicate `f` applied to the +energies. -- `filename::String`: Path to the source FITS file. -- `times::Vector{T}`: Vector of event times. -- `energies::Vector{T}`: Vector of event energies. -- `metadata::DictMetadata`: Metadata information extracted from the FITS file headers. +See also [`filter_time!`](@ref). """ -struct EventList{T} - filename::String - times::Vector{T} - energies::Vector{T} - metadata::DictMetadata +function filter_energy!(f, ev::EventList) + @assert !isnothing(ev.energies) "No energies present in the EventList." + filter_on!(f, ev.energies, ev) +end + +function filter_on!(f, src_col::AbstractVector, ev::EventList) + @assert size(src_col) == size(ev.times) + + # modified from the Base.filter! implementation + j = firstindex(ev.times) + + for i in eachindex(ev.times) + ev.times[j] = ev.times[i] + + if !isnothing(ev.energies) + ev.energies[j] = ev.energies[i] + end + + for (_, col) in ev.meta.extra_columns + col[j] = col[i] + end + + predicate = f(src_col[i])::Bool + j = ifelse(predicate, nextind(ev.times, j), j) + end + + if j <= lastindex(ev.times) + resize!(ev.times, j-1) + + if !isnothing(ev.energies) + resize!(ev.energies, j-1) + end + + for (_, col) in ev.meta.extra_columns + resize!(col, j-1) + end + end + + ev end """ - readevents(path; T = Float64) + colnames(file::AbstractString; hdu = 2) -Read event data from a FITS file into an EventList structure. The `path` is a -string that points to the location of the FITS file. `T` is used to specify -which numeric type to convert the data to. +Return a vector of all column names of `file`, reading a specific data unit +`hdu` if applicable. +""" +function colnames(file::AbstractString; hdu = 2) + FITS(file) do f + selected_hdu = f[hdu] + FITSIO.colnames(selected_hdu) + end +end -Returns an [`EventList`](@ref) containing the extracted data. +""" + read_energy_column(hdu; energy_alternatives = ["ENERGY", "PI", "PHA"], T = Float64) -## Notes +Attempt to read the energy column of a HDU from a list of alternative names. +Returns `Tuple{String,Vector{T}}` with the column that applied, and the column +cast to `Vector{T}`. -The function extracts `TIME` and `ENERGY` columns from any TableHDU in the FITS -file. All headers from each HDU are collected into the metadata field. +If no column applies, returns `(nothing, nothing)`. """ -function readevents(path; T = Float64) - headers = Dict{String,Any}[] - times = T[] - energies = T[] - - FITS(path, "r") do f - for i = 1:length(f) # Iterate over HDUs - hdu = f[i] - header_dict = Dict{String,Any}() - for key in keys(read_header(hdu)) - header_dict[string(key)] = read_header(hdu)[key] - end - push!(headers, header_dict) - - # Check if the HDU is a table - if isa(hdu, TableHDU) - # Get column names using the correct FITSIO method - colnames = FITSIO.colnames(hdu) - - if "TIME" in colnames - times = convert(Vector{T}, read(hdu, "TIME")) - end - if "ENERGY" in colnames - energies = convert(Vector{T}, read(hdu, "ENERGY")) - end - end +function read_energy_column(hdu; energy_alternatives = ["ENERGY", "PI", "PHA"], T = Float64) + all_cols = uppercase.(FITSIO.colnames(hdu)) + for col_name in energy_alternatives + if col_name in all_cols + return col_name, convert.(T, read(hdu, col_name)) end end - if isempty(times) - @warn "No TIME data found in FITS file $(path). Time series analysis will not be possible." + nothing, nothing +end + +""" + readevents(path; kwargs...) + +Read an [`EventList`](@ref) from a FITS file. Will attempt to read an energy +column if one exists. + +Keyword arguments and defaults: +- `hdu = 2`: which HDU unit to read +- `T = Float64`: the type to cast the time and energy columns to. +- `extra_columns = []`: extra columns to read from the same HDU. + +All other `kwargs` are passed to [`Stingray.read_energy_column`](@ref). +""" +function readevents( + path::AbstractString; + hdu = 2, + T = Float64, + sort = false, + extra_columns = [], + kwargs..., +) + time, energy, ecol, header, add = FITS(path, "r") do f + selected_hdu = f[hdu] + header = read_header(selected_hdu) + time = convert.(T, read(selected_hdu, "TIME")) + energy_column, energy = read_energy_column(selected_hdu; T = T, kwargs...) + + add = Dict{String,Vector}( + name => read(selected_hdu, name) for name in extra_columns + ) + + (time, energy, energy_column, header, add) + end + + if !isnothing(energy) + @assert size(time) == size(energy) "Time and energy do not match sizes ($(size(time)) != $(size(energy)))." end - if isempty(energies) - @warn "No ENERGY data found in FITS file $(path). Energy spectrum analysis will not be possible." + if !issorted(time) + if sort + # cheap and cheerful way to sort two arrays + # there are more performant ways to do this if it becomes an issue + I = sortperm(time) + time = time[I] + if !isnothing(energy) + energy = energy[I] + end + for (k, col) in add + add[k] = col[I] + end + else + @assert false "Times are not sorted (pass `sort = true` to force sorting)." + end end - metadata = DictMetadata(headers) - return EventList{T}(path, times, energies, metadata) + EventList( + time::Vector{T}, + energy, + FITSMetadata(path, hdu, ecol, add, header::FITSIO.FITSHeader), + ) end diff --git a/test/Project.toml b/test/Project.toml new file mode 100644 index 0000000..797319a --- /dev/null +++ b/test/Project.toml @@ -0,0 +1,13 @@ +[deps] +CodecZlib = "944b1d66-785c-5afd-91f1-9de20f533193" +Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" +Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6" +FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341" +FITSIO = "525bcba6-941b-5504-bd06-fd0dc1a4d2eb" +HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" +Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" +SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce" +Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" +StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" +Tar = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" diff --git a/test/runtests.jl b/test/runtests.jl index 4275ebf..a0ff8f5 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -2,6 +2,38 @@ using Stingray using Test using FFTW, Distributions, Statistics, StatsBase, HDF5, FITSIO using Logging -include("test_fourier.jl") -include("test_gti.jl") -include("test_events.jl") + +TEST_DATA_PATH = joinpath(@__DIR__, "_data") + +TEST_DATA_REMOTE = "https://www.star.bristol.ac.uk/fergus/stingray/testdata.tar.gz" +TEST_DATA_SHA256 = "cd5311e5ea1aaf7eef8253a369bd8ba6fbc10c922a22281adc14be61dc6e468c" + +# assert the test data is available +if !isdir(TEST_DATA_PATH) + using Downloads, Tar, CodecZlib, SHA + dest = TEST_DATA_PATH * ".tar.gz" + Downloads.download(TEST_DATA_REMOTE, dest) + + open(dest) do tar_gz + @assert bytes2hex(sha256(tar_gz)) == TEST_DATA_SHA256 + seek(tar_gz, 0) + tar = GzipDecompressorStream(tar_gz) + Tar.extract(tar, TEST_DATA_PATH) + end + + rm(dest) +else + @info "Test data is present" +end + +@testset "GTI" begin + include("test_gti.jl") +end + +@testset "Fourier" begin + include("test_fourier.jl") +end + +@testset "Eventlist" begin + include("test_events.jl") +end diff --git a/test/test_events.jl b/test/test_events.jl index ff1c91b..7a0a804 100644 --- a/test/test_events.jl +++ b/test/test_events.jl @@ -1,149 +1,75 @@ -@testset "EventList Tests" begin - # Test 1: Create a sample FITS file for testing - @testset "Sample FITS file creation" begin - test_dir = mktempdir() - sample_file = joinpath(test_dir, "sample.fits") - f = FITS(sample_file, "w") - write(f, Int[]) # Empty primary array - # Create a binary table HDU with TIME and ENERGY columns - times = Float64[1.0, 2.0, 3.0, 4.0, 5.0] - energies = Float64[10.0, 20.0, 15.0, 25.0, 30.0] - # Add a binary table extension - table = Dict{String,Array}() - table["TIME"] = times - table["ENERGY"] = energies - write(f, table) - close(f) +using Test, Stingray +using FITSIO - @test isfile(sample_file) +TEST_DATA_PATH = joinpath(@__DIR__, "_data", "testdata") +@assert isdir(TEST_DATA_PATH) - # Test reading the sample file - data = readevents(sample_file) - @test data.filename == sample_file - @test length(data.times) == 5 - @test length(data.energies) == 5 - @test eltype(data.times) == Float64 - @test eltype(data.energies) == Float64 - end +readdir(TEST_DATA_PATH) - # Test 2: Test with different data types - @testset "Different data types" begin - test_dir = mktempdir() - sample_file = joinpath(test_dir, "sample_float32.fits") - f = FITS(sample_file, "w") - write(f, Int[]) - # Create data - times = Float64[1.0, 2.0, 3.0] - energies = Float64[10.0, 20.0, 30.0] +# generate mock data +function mock_data(times, energies; energy_column = "ENERGY") + test_dir = mktempdir() + sample_file = joinpath(test_dir, "sample.fits") + # Create a sample FITS file + FITS(sample_file, "w") do f + # Create primary HDU with a small array instead of empty + # Use a single element array instead of empty + write(f, [0]) + # Create event table in HDU 2 table = Dict{String,Array}() table["TIME"] = times - table["ENERGY"] = energies + table[energy_column] = energies + table["INDEX"] = collect(1:length(times)) write(f, table) - close(f) - # Test with Float32 - data_f32 = readevents(sample_file, T = Float32) - @test eltype(data_f32.times) == Float32 - @test eltype(data_f32.energies) == Float32 - # Test with Int64 - data_i64 = readevents(sample_file, T = Int64) - @test eltype(data_i64.times) == Int64 - @test eltype(data_i64.energies) == Int64 end + sample_file +end - # Test 3: Test with missing columns - @testset "Missing columns" begin - test_dir = mktempdir() - sample_file = joinpath(test_dir, "sample_no_energy.fits") - # Create a sample FITS file with only TIME column - f = FITS(sample_file, "w") - write(f, Int[]) - times = Float64[1.0, 2.0, 3.0] - table = Dict{String,Array}() - table["TIME"] = times - write(f, table) - close(f) - local data - @test_logs (:warn, "No ENERGY data found in FITS file $(sample_file). Energy spectrum analysis will not be possible.") begin - data = readevents(sample_file) - end - @test length(data.times) == 3 - @test length(data.energies) == 0 - #create a file with only ENERGY column - sample_file2 = joinpath(test_dir, "sample_no_time.fits") - f = FITS(sample_file2, "w") - write(f, Int[]) # Empty primary array - energies = Float64[10.0, 20.0, 30.0] - table = Dict{String,Array}() - table["ENERGY"] = energies - write(f, table) - close(f) - local data2 - @test_logs (:warn, "No TIME data found in FITS file $(sample_file2). Time series analysis will not be possible.") begin - data2 = readevents(sample_file2) - end - @test length(data2.times) == 0 # No TIME column - @test length(data2.energies) == 3 - end +mock_times = Float64[1.0, 2.0, 3.0, 4.0, 5.0] +mock_energies = Float64[10.0, 20.0, 15.0, 25.0, 30.0] +sample = mock_data(mock_times, mock_energies) - # Test 4: Test with multiple HDUs - @testset "Multiple HDUs" begin - test_dir = mktempdir() - sample_file = joinpath(test_dir, "sample_multi_hdu.fits") - # Create a sample FITS file with multiple HDUs - f = FITS(sample_file, "w") - write(f, Int[]) - times1 = Float64[1.0, 2.0, 3.0] - energies1 = Float64[10.0, 20.0, 30.0] - table1 = Dict{String,Array}() - table1["TIME"] = times1 - table1["ENERGY"] = energies1 - write(f, table1) - # Second table HDU (with OTHER column) - other_data = Float64[100.0, 200.0, 300.0] - table2 = Dict{String,Array}() - table2["OTHER"] = other_data - write(f, table2) - # Third table HDU (with TIME only) - times3 = Float64[4.0, 5.0, 6.0] - table3 = Dict{String,Array}() - table3["TIME"] = times3 - write(f, table3) - close(f) - data = readevents(sample_file) - @test length(data.metadata.headers) == 4 # Primary + 3 table HDUs - # Should read the first HDU with both TIME and ENERGY - @test length(data.times) == 3 - @test length(data.energies) == 3 - end +# try reading mock data +data = readevents(sample) +@test data.times == mock_times +@test data.energies == mock_energies +@test length(data.meta.headers) == 17 +@test length(data.meta.extra_columns) == 0 - # Test 5: Test with monol_testA.evt - @testset "test monol_testA.evt" begin - test_filepath = joinpath("data", "monol_testA.evt") - if isfile(test_filepath) - @testset "monol_testA.evt" begin - old_logger = global_logger(ConsoleLogger(stderr, Logging.Error)) - try - data = readevents(test_filepath) - @test data.filename == test_filepath - @test length(data.metadata.headers) > 0 - finally - global_logger(old_logger) - end - end - else - @info "Test file '$(test_filepath)' not found. Skipping this test." - end - end +data = readevents(sample; extra_columns = ["INDEX"]) +@test length(data.meta.extra_columns) == 1 - @testset "Error handling" begin - # Test with non-existent file - using a more generic approach - @test_throws Exception readevents("non_existent_file.fits") +# try reading sample datasets +data = readevents(joinpath(TEST_DATA_PATH, "monol_testA.evt")) - # Test with invalid FITS file - invalid_file = tempname() - open(invalid_file, "w") do io - write(io, "This is not a FITS file") - end - @test_throws Exception readevents(invalid_file) - end -end +@test length(data.energies) == 1000 +@test length(data.times) == 1000 +@test first(data.energies) == 174.0 + +data = readevents(joinpath(TEST_DATA_PATH, "chandra_test.fits")) +@test length(data.times) == 4612 +@test length(data.energies) == 4612 +@test data.meta.energy_units == "ENERGY" + +# should just read the PI column +data = readevents(joinpath(TEST_DATA_PATH, "chandra_noener_test.fits")) +@test length(data.times) == 4612 +@test length(data.energies) == 4612 +@test data.meta.energy_units == "PI" + +data = readevents(joinpath(TEST_DATA_PATH, "xmm_test.fits")) +@test length(data.times) == 1708244 +@test length(data.energies) == 1708244 +@test data.meta.energy_units == "PI" + +@test_throws AssertionError("Times are not sorted (pass `sort = true` to force sorting).") readevents( + joinpath(TEST_DATA_PATH, "monol_testA_calib_unsrt.evt"), +) +data = readevents(joinpath(TEST_DATA_PATH, "monol_testA_calib_unsrt.evt"), sort = true) +@test length(data.times) == 1000 +@test length(data.energies) == 1000 +@test issorted(data.times) +@test data.meta.energy_units == "ENERGY" +# make sure the energy column got sorted correctly +@test data.energies[1:100:1000] ≈ + [9.56, 42.40, 41.36, 3.55, 6.40, 5.55, 40.24, 27.15, 20.31, 35.52] rtol=1e-2