diff --git a/src/gribjump/CMakeLists.txt b/src/gribjump/CMakeLists.txt index 7086f13..22714d0 100644 --- a/src/gribjump/CMakeLists.txt +++ b/src/gribjump/CMakeLists.txt @@ -122,9 +122,8 @@ if( HAVE_GRIBJUMP_LOCAL_EXTRACT ) compression/compressors/Ccsds.cc compression/compressors/Ccsds.h - compression/compressors/algorithms/SimplePacking.h - compression/compressors/algorithms/LibEccodes.h - compression/compressors/algorithms/LibEccodes.cc + compression/compressors/SimplePacking.h + compression/compressors/SimplePacking.cc FDBPlugin.cc FDBPlugin.h diff --git a/src/gribjump/FDBPlugin.cc b/src/gribjump/FDBPlugin.cc index b1d4405..28153f8 100644 --- a/src/gribjump/FDBPlugin.cc +++ b/src/gribjump/FDBPlugin.cc @@ -39,8 +39,12 @@ FDBPlugin::FDBPlugin() { static bool enableGribjump = eckit::Resource("fdbEnableGribjump;$FDB_ENABLE_GRIBJUMP", false); static bool disableGribjump = eckit::Resource("fdbDisableGribjump;$FDB_DISABLE_GRIBJUMP", false); // Emergency off-switch if (enableGribjump && !disableGribjump) { + LOG_DEBUG_LIB(LibGribJump) << "FDBPlugin has been enabled" << std::endl; FDBPlugin::instance().addFDB(fdb); } + else { + LOG_DEBUG_LIB(LibGribJump) << "FDBPlugin has been disabled" << std::endl; + } }); } diff --git a/src/gribjump/GribJumpDataAccessor.h b/src/gribjump/GribJumpDataAccessor.h index ea5480f..4c050ac 100644 --- a/src/gribjump/GribJumpDataAccessor.h +++ b/src/gribjump/GribJumpDataAccessor.h @@ -21,10 +21,6 @@ class GribJumpDataAccessor : public mc::DataAccessor { public: GribJumpDataAccessor(eckit::DataHandle& dh, const mc::Block range) : dh_{dh}, data_section_range_{range} {} - void write(const eckit::Buffer& buffer, const size_t offset) const override { - NOTIMP; - } - eckit::Buffer read(const mc::Block& range) const override { eckit::Offset offset = range.first; eckit::Length size = range.second; diff --git a/src/gribjump/compression/DataAccessor.h b/src/gribjump/compression/DataAccessor.h index cfa6e1c..df9f0c3 100644 --- a/src/gribjump/compression/DataAccessor.h +++ b/src/gribjump/compression/DataAccessor.h @@ -20,75 +20,21 @@ #include "eckit/exception/Exceptions.h" #include "eckit/io/Buffer.h" -namespace mc { +namespace gribjump::mc { class DataAccessor { public: virtual ~DataAccessor() = default; - virtual void write(const eckit::Buffer& buffer, const size_t offset) const = 0; virtual eckit::Buffer read(const Block& range) const = 0; virtual eckit::Buffer read() const = 0; virtual size_t eof() const = 0; }; -class PosixAccessor : public DataAccessor { -public: - explicit PosixAccessor(const std::string& path) : ifs_{path, std::ifstream::binary} {} - ~PosixAccessor() { - ifs_.close(); - } - - void write(const eckit::Buffer& buffer, const size_t offset) const override { - NOTIMP; - } - - eckit::Buffer read(const Block& range) const override { - const auto [offset, size] = range; - eckit::Buffer buf(size); - ifs_.seekg(offset, std::ios::beg); - ifs_.read(reinterpret_cast(buf.data()), size); - if (!ifs_) { - std::stringstream ss; - ss << "Error: only " << ifs_.gcount() << " could be read"; - throw eckit::ReadError(ss.str(), Here()); - } - assert(size != 0); - return eckit::Buffer{buf.data(), size}; - } - - eckit::Buffer read() const override { - ifs_.seekg(0, std::ios::end); - size_t size = ifs_.tellg(); - ifs_.seekg(0, std::ios::beg); - eckit::Buffer buf(size); - ifs_.read(reinterpret_cast(buf.data()), size); - if (!ifs_) { - std::stringstream ss; - ss << "Error: only " << ifs_.gcount() << " could be read"; - throw eckit::ReadError(ss.str(), Here()); - } - assert(size != 0); - return eckit::Buffer{buf.data(), size}; - } - - size_t eof() const override { - ifs_.seekg(0, std::ios::end); - return ifs_.tellg(); - } -private: - mutable std::ifstream ifs_; -}; - - class MemoryAccessor : public DataAccessor { public: explicit MemoryAccessor(const eckit::Buffer& buffer) : buf_{buffer.data(), buffer.size()} {} - void write(const eckit::Buffer& buffer, const size_t offset) const override { - NOTIMP; - } - eckit::Buffer read(const Block& range) const override { const auto [offset, size] = range; if (offset + size > buf_.size()) { @@ -110,4 +56,4 @@ class MemoryAccessor : public DataAccessor { eckit::Buffer buf_; }; -} // namespace mc +} // namespace gribjump::mc diff --git a/src/gribjump/compression/NumericCompressor.h b/src/gribjump/compression/NumericCompressor.h index c8274bb..8ebbf29 100644 --- a/src/gribjump/compression/NumericCompressor.h +++ b/src/gribjump/compression/NumericCompressor.h @@ -18,7 +18,7 @@ #include #include -namespace mc { +namespace gribjump::mc { template class NumericDecompressor; @@ -26,58 +26,79 @@ class NumericDecompressor; template class NumericCompressor { public: - using CompressedData = eckit::Buffer; - using Values = std::vector; - virtual std::pair>, CompressedData> encode(const Values&) = 0; + using CompressedData = eckit::Buffer; + using Values = std::vector; + virtual std::pair>, CompressedData> encode(const Values&) = 0; }; template class NumericDecompressor { public: - using CompressedData = eckit::Buffer; - using Values = std::vector; - virtual Values decode(const CompressedData&) = 0; - virtual Values decode(const std::shared_ptr, const Block&) = 0; - - - virtual std::vector decode(const std::shared_ptr& accessor, const std::vector& ranges) { - using Values = typename NumericDecompressor::Values; - std::vector result; - decode(accessor, ranges, result); - return result; - } - - virtual void decode(const std::shared_ptr& accessor, const std::vector& ranges, std::vector& result) { - using Values = typename NumericDecompressor::Values; - - std::unordered_map>> ranges_map; - - // find which sub_ranges are in which buckets - BlockBuckets buckets; - for (const auto& range : ranges) { - buckets << range; + using CompressedData = eckit::Buffer; + using Values = std::vector; + using Offsets = std::vector; + + virtual Values decode(const CompressedData&) = 0; + virtual Values decode(const std::shared_ptr, const Block&) = 0; + virtual Offsets decode_offsets(const CompressedData&) {NOTIMP;} + + virtual std::vector decode(const std::shared_ptr& accessor, const std::vector& ranges) { + using Values = typename NumericDecompressor::Values; + std::vector result; + decode(accessor, ranges, result); + return result; } - // inverse buckets and associate data with ranges - for (const auto& [bucket_range, bucket_sub_ranges] : buckets) { - auto decoded = std::make_shared(decode(accessor, bucket_range)); - for (const auto& bucket_sub_range : bucket_sub_ranges) { - ranges_map[bucket_sub_range] = std::make_pair(bucket_range, decoded); - } + virtual void decode(const std::shared_ptr& accessor, const std::vector& ranges, std::vector& result) { + using Values = typename NumericDecompressor::Values; + + std::unordered_map>> ranges_map; + + // find which sub_ranges are in which buckets + BlockBuckets buckets; + for (const auto& range : ranges) { + buckets << range; + } + + // inverse buckets and associate data with ranges + for (const auto& [bucket_range, bucket_sub_ranges] : buckets) { + auto decoded = std::make_shared(decode(accessor, bucket_range)); + for (const auto& bucket_sub_range : bucket_sub_ranges) { + ranges_map[bucket_sub_range] = std::make_pair(bucket_range, decoded); + } + } + + // assign data to ranges + for (const auto& user_range : ranges) { + const auto& [bucket_range, decoded] = ranges_map[user_range]; + const auto& [bucket_range_offset, bucket_range_size] = bucket_range; + const auto& [user_range_offset, user_range_size] = user_range; + + ValueType* data_start = reinterpret_cast(decoded->data()) + (user_range_offset - bucket_range_offset); + Values values(data_start, data_start + user_range_size); + result.push_back(std::move(values)); + } } +}; - // assign data to ranges - for (const auto& user_range : ranges) { - const auto& [bucket_range, decoded] = ranges_map[user_range]; - const auto& [bucket_range_offset, bucket_range_size] = bucket_range; - const auto& [user_range_offset, user_range_size] = user_range; - - ValueType* data_start = reinterpret_cast(decoded->data()) + (user_range_offset - bucket_range_offset); - Values values(data_start, data_start + user_range_size); - result.push_back(std::move(values)); +/// @todo: this is copy pasted from eccodes - do we *really* need this? +/* Return n to the power of s */ +template +constexpr T codes_power(long s, long n) { + T divisor = 1.0; + if (s == 0) + return 1.0; + if (s == 1) + return n; + while (s < 0) { + divisor /= n; + s++; } + while (s > 0) { + divisor *= n; + s--; + } + return divisor; +} - } -}; - -} // namespace mc +} // namespace gribjump::mc diff --git a/src/gribjump/compression/Range.cc b/src/gribjump/compression/Range.cc index 0e41db3..3ca80c8 100644 --- a/src/gribjump/compression/Range.cc +++ b/src/gribjump/compression/Range.cc @@ -12,23 +12,25 @@ #include #include -std::pair begin_end(const mc::Block& range) +namespace gribjump::mc{ + +std::pair begin_end(const Block& range) { const auto [offset, size] = range; return {offset, offset + size}; } -mc::Block operator+(const mc::Block& r1, const mc::Block& r2) +Block operator+(const Block& r1, const Block& r2) { auto [b1, e1] = begin_end(r1); auto [b2, e2] = begin_end(r2); assert(!((b1 > e2) && (b2 > e1))); - return mc::Block{std::min(b1, b2), std::max(e1, e2) - std::min(b1, b2)}; + return Block{std::min(b1, b2), std::max(e1, e2) - std::min(b1, b2)}; } -std::ostream& operator<<(std::ostream& os, const mc::Block& range) +std::ostream& operator<<(std::ostream& os, const Block& range) { auto [rb, re] = begin_end(range); os << "[" << rb << ", " << re << "]"; @@ -36,7 +38,7 @@ std::ostream& operator<<(std::ostream& os, const mc::Block& range) } -std::ostream& operator<<(std::ostream& os, const mc::BlockBucket& range) +std::ostream& operator<<(std::ostream& os, const BlockBucket& range) { os << range.first << std::endl; for (const auto& r : range.second) { @@ -47,20 +49,20 @@ std::ostream& operator<<(std::ostream& os, const mc::BlockBucket& range) } -mc::BlockBuckets& operator<<(mc::BlockBuckets& buckets, const mc::Block& r) { +BlockBuckets& operator<<(BlockBuckets& buckets, const Block& r) { - const mc::Block sub_range{r}; + const Block sub_range{r}; const auto [sub_start, sub_end] = begin_end(sub_range); // Find the position where the range might be inserted auto it = std::lower_bound(buckets.begin(), buckets.end(), sub_range, - [](const mc::BlockBucket& bucket, const mc::Block& range) { + [](const BlockBucket& bucket, const Block& range) { const auto [bucket_start, bucket_end] = begin_end(bucket.first); return bucket_end < range.first; }); - mc::Block merged_range = sub_range; - mc::SubBlocks merged_subranges = {sub_range}; + Block merged_range = sub_range; + SubBlocks merged_subranges = {sub_range}; // Merge with any overlapping buckets before the insertion point while (it != buckets.begin()) { @@ -98,11 +100,12 @@ mc::BlockBuckets& operator<<(mc::BlockBuckets& buckets, const mc::Block& r) { return buckets; } -std::size_t std::hash::operator() (const mc::Block& range) const +} // namespace gribjump::mc + +std::size_t std::hash::operator() (const gribjump::mc::Block& range) const { static_assert(sizeof(std::size_t) == sizeof(std::uint64_t), "std::size_t must be 64 bits"); const auto [offset, size] = range; return offset ^ (size << 32); } - diff --git a/src/gribjump/compression/Range.h b/src/gribjump/compression/Range.h index 58779e9..4dd5645 100644 --- a/src/gribjump/compression/Range.h +++ b/src/gribjump/compression/Range.h @@ -17,17 +17,17 @@ #include #include #include -namespace mc { +namespace gribjump::mc { using Block = std::pair; } template<> -struct std::hash { - std::size_t operator()(const mc::Block& range) const; +struct std::hash { + std::size_t operator()(const gribjump::mc::Block& range) const; }; -namespace mc { +namespace gribjump::mc { // A bucket is a continous range of data that can be decoded in one go std::pair get_begin_end(const Block& range); @@ -35,11 +35,12 @@ using SubBlock = Block; using SubBlocks = std::vector; using BlockBucket = std::pair; using BlockBuckets = std::vector; // Sorted to allow binary search (std::lower_bound) -} // namespace mc -std::ostream& operator<<(std::ostream& os, const mc::Block& range); -std::ostream& operator<<(std::ostream& os, const mc::BlockBucket& bucket); -mc::BlockBuckets& operator<<(mc::BlockBuckets& buckets, const mc::Block& r); -mc::Block operator+(const mc::Block& r1, const mc::Block& r2); +std::ostream& operator<<(std::ostream& os, const Block& range); +std::ostream& operator<<(std::ostream& os, const BlockBucket& bucket); +BlockBuckets& operator<<(BlockBuckets& buckets, const Block& r); +Block operator+(const Block& r1, const Block& r2); -std::pair begin_end(const mc::Block& range); +std::pair begin_end(const Block& range); + +} // namespace gribjump::mc diff --git a/src/gribjump/compression/compressors/Aec.cc b/src/gribjump/compression/compressors/Aec.cc index 12482dc..797805a 100644 --- a/src/gribjump/compression/compressors/Aec.cc +++ b/src/gribjump/compression/compressors/Aec.cc @@ -8,31 +8,175 @@ * does it submit to any jurisdiction. */ -#include "Aec.h" - -namespace mc { - -void print_aec_stream_info(struct aec_stream* strm, const char* func) -{ - fprintf(stderr, "ECCODES DEBUG CCSDS %s aec_stream.flags=%u\n", func, strm->flags); - fprintf(stderr, "ECCODES DEBUG CCSDS %s aec_stream.bits_per_sample=%u\n", func, strm->bits_per_sample); - fprintf(stderr, "ECCODES DEBUG CCSDS %s aec_stream.block_size=%u\n", func, strm->block_size); - fprintf(stderr, "ECCODES DEBUG CCSDS %s aec_stream.rsi=%u\n", func, strm->rsi); - fprintf(stderr, "ECCODES DEBUG CCSDS %s aec_stream.avail_out=%lu\n", func, strm->avail_out); - fprintf(stderr, "ECCODES DEBUG CCSDS %s aec_stream.avail_in=%lu\n", func, strm->avail_in); - fprintf(stderr, "ECCODES DEBUG CCSDS %s aec_stream.next_out=%p\n", func, strm->next_out); - fprintf(stderr, "ECCODES DEBUG CCSDS %s aec_stream.next_in=%p\n", func, strm->next_in); - fprintf(stderr, "ECCODES DEBUG CCSDS %s aec_stream.total_in=%lu\n", func, strm->total_in); - fprintf(stderr, "ECCODES DEBUG CCSDS %s aec_stream.total_out=%lu\n", func, strm->total_out); - fprintf(stderr, "\n"); +#include "gribjump/compression/compressors/Aec.h" + +#include +#include + +#include + +#define AEC_CALL(a) aec_call(a, #a) + +namespace { + +void aec_call(int err, std::string func) { + if (err != AEC_OK) { + std::string msg = "Error code: " + std::to_string(err); + throw eckit::FailedLibraryCall("libaec", func, msg, Here()); + } } +// debugging +[[maybe_unused]] +void print_aec_stream_info(struct aec_stream* strm, const char* func) { + eckit::Log::info() << "DEBUG CCSDS " << func << " aec_stream.flags=" << strm->flags << std::endl; + eckit::Log::info() << "DEBUG CCSDS " << func << " aec_stream.bits_per_sample=" << strm->bits_per_sample << std::endl; + eckit::Log::info() << "DEBUG CCSDS " << func << " aec_stream.block_size=" << strm->block_size << std::endl; + eckit::Log::info() << "DEBUG CCSDS " << func << " aec_stream.rsi=" << strm->rsi << std::endl; + eckit::Log::info() << "DEBUG CCSDS " << func << " aec_stream.avail_out=" << strm->avail_out << std::endl; + eckit::Log::info() << "DEBUG CCSDS " << func << " aec_stream.avail_in=" << strm->avail_in << std::endl; + eckit::Log::info() << "DEBUG CCSDS " << func << " aec_stream.next_out=" << strm->next_out << std::endl; + eckit::Log::info() << "DEBUG CCSDS " << func << " aec_stream.next_in=" << strm->next_in << std::endl; + eckit::Log::info() << "DEBUG CCSDS " << func << " aec_stream.total_in=" << strm->total_in << std::endl; + eckit::Log::info() << "DEBUG CCSDS " << func << " aec_stream.total_out=" << strm->total_out << std::endl; + eckit::Log::info() << std::endl; +} + +[[maybe_unused]] void print_binary(const char* name, const void* data, size_t n_bytes) { - std::cout << name << ": "; - for (size_t i = 0; i < n_bytes; ++i) { - std::cout << std::bitset<8>(static_cast(data)[i]) << " "; - } - std::cout << std::endl; + eckit::Log::info() << name << ": "; + for (size_t i = 0; i < n_bytes; ++i) { + eckit::Log::info() << std::bitset<8>(static_cast(data)[i]) << " "; + } + eckit::Log::info() << std::endl; +} + +} // namespace + +namespace gribjump::mc { + +// --------------------------------------------------------------------------------------------------------------------- +template +typename NumericCompressor::Values AecDecompressor::decode(const typename NumericCompressor::CompressedData& encoded) { + + validateBitsPerSample(); + + Values decoded(n_elems_); + + struct aec_stream strm; + strm.rsi = rsi_; + strm.block_size = block_size_; + strm.bits_per_sample = bits_per_sample_; + strm.flags = flags_; + strm.avail_in = encoded.size(); + strm.next_in = reinterpret_cast(encoded.data()); + strm.avail_out = decoded.size() * sizeof(ValueType); + strm.next_out = reinterpret_cast(decoded.data()); + + AEC_CALL(aec_decode_init(&strm)); + AEC_CALL(aec_decode_enable_offsets(&strm)); + AEC_CALL(aec_decode(&strm, AEC_FLUSH)); + + size_t offsets_count = 0; + AEC_CALL(aec_decode_count_offsets(&strm, &offsets_count)); + + std::vector offsets(offsets_count); + AEC_CALL(aec_decode_get_offsets(&strm, offsets.data(), offsets.size())); + + AEC_CALL(aec_decode_end(&strm)); + + offsets_ = std::move(offsets); + return decoded; +} + +// --------------------------------------------------------------------------------------------------------------------- +template +typename NumericCompressor::Values AecDecompressor::decode(const std::shared_ptr accessor, const Block& range) { + + validateBitsPerSample(); + + ASSERT(!offsets_.empty()); + + auto [range_offset, range_size] = range; + auto range_offset_bytes = range_offset * sizeof(ValueType); + auto range_size_bytes = range_size * sizeof(ValueType); + + Values decoded(range_size); + + size_t nbytes = (bits_per_sample_ + 7) / 8; + if (nbytes == 3) { + nbytes = 4; + } + size_t rsi_size_bytes = rsi_ * block_size_ * nbytes; + + auto start_idx = range_offset_bytes / rsi_size_bytes; + auto end_idx = (range_offset_bytes + range_size_bytes) / rsi_size_bytes + 1; + + ASSERT(start_idx < end_idx); + ASSERT(end_idx <= offsets_.size()); + + size_t start_offset_bits = offsets_[start_idx]; + size_t start_offset_bytes = start_offset_bits / 8; + // size_t end_offset_bits = end_idx == offsets_.size() ? accessor->eof() * 8 : offsets_[end_idx]; + size_t end_offset_bytes = end_idx == offsets_.size() ? accessor->eof() : (offsets_[end_idx] + 7) / 8; + + // The new offset to the RSI may not be aligned to the start of the range, so we need to shift the offsets by the difference + /// @todo It seems unlikely that two copies are required here. + std::vector new_offsets_tmp; + std::vector new_offsets; + std::copy(offsets_.begin() + start_idx, offsets_.end(), std::back_inserter(new_offsets_tmp)); + size_t shift = start_offset_bits >> 3 << 3; // align to byte boundary + std::transform(new_offsets_tmp.begin(), new_offsets_tmp.end(), std::back_inserter(new_offsets), [shift] (size_t offset) { + return offset - shift; + }); + + ASSERT(!new_offsets.empty()); + + // Read RSIs + CompressedData encoded = accessor->read({start_offset_bytes, end_offset_bytes - start_offset_bytes}); + + struct aec_stream strm; + strm.rsi = rsi_; + strm.block_size = block_size_; + strm.bits_per_sample = bits_per_sample_; + strm.flags = flags_; + strm.avail_in = encoded.size(); + strm.next_in = reinterpret_cast(encoded.data()); + strm.avail_out = decoded.size() * sizeof(ValueType); + strm.next_out = reinterpret_cast(decoded.data()); + + size_t new_offset_bytes = range_offset_bytes - rsi_size_bytes * start_idx; + size_t new_size_bytes = range_size_bytes; + + AEC_CALL(aec_decode_init(&strm)); + AEC_CALL(aec_decode_range(&strm, new_offsets.data(), new_offsets.size(), new_offset_bytes, new_size_bytes)); + AEC_CALL(aec_decode_end(&strm)); + + return decoded; +} + +// --------------------------------------------------------------------------------------------------------------------- +template +AecParams::Offsets AecDecompressor::decode_offsets(const typename NumericCompressor::CompressedData& encoded) { + decode(encoded); // sets offsets_ + return std::move(offsets_); +} + +// --------------------------------------------------------------------------------------------------------------------- +template +void AecDecompressor::validateBitsPerSample() { + if (sizeof(ValueType) == 1 && !(bits_per_sample_ > 0 && bits_per_sample_ <= 8)) + throw eckit::Exception("bits_per_sample must be between 1 and 8 for 1-byte types", Here()); + if (sizeof(ValueType) == 2 && !(bits_per_sample_ > 8 && bits_per_sample_ <= 16)) + throw eckit::Exception("bits_per_sample must be between 9 and 16 for 2-byte types", Here()); + if (sizeof(ValueType) == 4 && !(bits_per_sample_ > 16 && bits_per_sample_ <= 32)) + throw eckit::Exception("bits_per_sample must be between 17 and 32 for 4-byte types", Here()); } -} // namespace mc +// --------------------------------------------------------------------------------------------------------------------- +// Explicit instantiations of the template class +template class AecDecompressor; +template class AecDecompressor; +template class AecDecompressor; + +} // namespace gribjump::mc diff --git a/src/gribjump/compression/compressors/Aec.h b/src/gribjump/compression/compressors/Aec.h index 14b303d..eb83e4b 100644 --- a/src/gribjump/compression/compressors/Aec.h +++ b/src/gribjump/compression/compressors/Aec.h @@ -11,279 +11,70 @@ #pragma once -#include "../NumericCompressor.h" - -#include -#include -#include "eckit/exception/Exceptions.h" - -#include #include -#include -#include - -#include -#include -#include -#include -#include - - -namespace mc { - -void print_binary(const char* name, const void* data, size_t n_bytes); -void print_aec_stream_info(struct aec_stream* strm, const char* func); +#include "gribjump/compression/NumericCompressor.h" +namespace gribjump::mc { class AecParams { public: - using Offsets = std::vector; - AecParams() : flags_(8ul), rsi_(128ul), block_size_(32ul), bits_per_sample_(16ul) {} - - size_t flags() const { return flags_; } - size_t rsi() const { return rsi_; } - size_t block_size() const { return block_size_; } - size_t bits_per_sample() const { return bits_per_sample_; } - std::optional offsets() const { - return offsets_.size() > 0 ? std::optional(offsets_) : std::nullopt; - } + using Offsets = std::vector; + AecParams() : flags_(8ul), rsi_(128ul), block_size_(32ul), bits_per_sample_(16ul) {} + + size_t flags() const { return flags_; } + size_t rsi() const { return rsi_; } + size_t block_size() const { return block_size_; } + size_t bits_per_sample() const { return bits_per_sample_; } + std::optional offsets() const { + return offsets_.size() > 0 ? std::optional(offsets_) : std::nullopt; + } - AecParams& flags(size_t flags) { flags_ = flags; return *this; } - AecParams& rsi(size_t rsi) { rsi_ = rsi; return *this; } - AecParams& block_size(size_t block_size) { block_size_ = block_size; return *this; } - AecParams& bits_per_sample(size_t bits_per_sample) { bits_per_sample_ = bits_per_sample; return *this; } - AecParams& offsets(const Offsets& offsets) { - assert(offsets.size() > 0); - offsets_ = offsets; - return *this; - } + AecParams& flags(size_t flags) { flags_ = flags; return *this; } + AecParams& rsi(size_t rsi) { rsi_ = rsi; return *this; } + AecParams& block_size(size_t block_size) { block_size_ = block_size; return *this; } + AecParams& bits_per_sample(size_t bits_per_sample) { bits_per_sample_ = bits_per_sample; return *this; } + AecParams& offsets(const Offsets& offsets) { + assert(offsets.size() > 0); + offsets_ = offsets; + return *this; + } protected: - size_t flags_; - size_t rsi_; - size_t block_size_; - size_t bits_per_sample_; - std::vector offsets_; + size_t flags_; + size_t rsi_; + size_t block_size_; + size_t bits_per_sample_; + std::vector offsets_; }; -template class AecCompressor; template class AecDecompressor; -template -class AecCompressor : public NumericCompressor, public AecParams { -public: - static_assert(std::is_integral::value, "AecDecompressor only supports integral types"); - static_assert(sizeof(ValueType) == 1 || sizeof(ValueType) == 2 || sizeof(ValueType) == 4, "AecDecompressor only supports 1, 2 or 4 byte integral types"); - using CompressedData = typename NumericCompressor::CompressedData; - using Values = typename NumericCompressor::Values; - - std::pair>, CompressedData> encode(const Values& in_buf) override - { - if (sizeof(ValueType) == 1 && !(bits_per_sample_ > 0 && bits_per_sample_ <= 8)) - throw eckit::BadValue("bits_per_sample must be between 1 and 8 for 1-byte types", Here()); - if (sizeof(ValueType) == 2 && !(bits_per_sample_ > 8 && bits_per_sample_ <= 16)) - throw eckit::BadValue("bits_per_sample must be between 9 and 16 for 2-byte types", Here()); - if (sizeof(ValueType) == 4 && !(bits_per_sample_ > 16 && bits_per_sample_ <= 32)) - throw eckit::BadValue("bits_per_sample must be between 17 and 32 for 4-byte types", Here()); - - const size_t n_elems{in_buf.size()}; - CompressedData out_buf{n_elems * sizeof(ValueType) * 2 + 1000}; - - struct aec_stream strm; - strm.flags = flags_; - strm.bits_per_sample = bits_per_sample_; - strm.block_size = block_size_; - strm.rsi = rsi_; - strm.avail_in = n_elems * sizeof(ValueType); - strm.next_in = reinterpret_cast(in_buf.data()); - strm.avail_out = out_buf.size(); - strm.next_out = reinterpret_cast(out_buf.data()); - - //print_aec_stream_info(&strm, "Compressing"); - - if (aec_encode_init(&strm) != AEC_OK) - throw eckit::FailedLibraryCall("libaec", "aec_encode_init", "Error initializing encoder", Here()); - - if (aec_encode_enable_offsets(&strm) != AEC_OK) - throw eckit::FailedLibraryCall("libaec", "aec_encode_enable_offsets", "Error enabling offsets", Here()); - - if (aec_encode(&strm, AEC_FLUSH) != AEC_OK) - throw eckit::FailedLibraryCall("libaec", "aec_encode", "Error encoding buffer", Here()); - - size_t offsets_count{0}; - if (aec_encode_count_offsets(&strm, &offsets_count) != AEC_OK) - throw eckit::FailedLibraryCall("libaec", "aec_encode_count_offsets", "Error counting offsets", Here()); - - std::vector offsets(offsets_count); - if (aec_encode_get_offsets(&strm, offsets.data(), offsets.size()) != AEC_OK) - throw eckit::FailedLibraryCall("libaec", "aec_encode_get_offsets", "Error getting offsets", Here()); - - if (aec_encode_end(&strm) != AEC_OK) - throw eckit::FailedLibraryCall("libaec", "aec_encode_end", "Error ending encoding", Here()); - - auto decompressor = std::unique_ptr>(new AecDecompressor{}); - decompressor->flags(flags_); - decompressor->bits_per_sample(bits_per_sample_); - decompressor->block_size(block_size_); - decompressor->rsi(rsi_); - decompressor->offsets(offsets); - decompressor->n_elems(in_buf.size()); - - return std::make_pair(std::move(decompressor), CompressedData{out_buf.data(), strm.total_out}); - } -}; - - template class AecDecompressor : public NumericDecompressor, public AecParams { -public: - static_assert(std::is_integral::value, "AecDecompressor only supports integral types"); - - - using CompressedData = typename NumericCompressor::CompressedData; - using Values = typename NumericCompressor::Values; - - Values decode(const CompressedData& encoded) override { - if (sizeof(ValueType) == 1 && !(bits_per_sample_ > 0 && bits_per_sample_ <= 8)) - throw eckit::BadValue("bits_per_sample must be between 1 and 8 for 1-byte types", Here()); - if (sizeof(ValueType) == 2 && !(bits_per_sample_ > 8 && bits_per_sample_ <= 16)) - throw eckit::BadValue("bits_per_sample must be between 9 and 16 for 2-byte types", Here()); - if (sizeof(ValueType) == 4 && !(bits_per_sample_ > 16 && bits_per_sample_ <= 32)) - throw eckit::BadValue("bits_per_sample must be between 17 and 32 for 4-byte types", Here()); - - Values decoded(n_elems_); - - //print_binary("decode_e ", encoded.data(), encoded.size()); - - struct aec_stream strm; - strm.rsi = rsi_; - strm.block_size = block_size_; - strm.bits_per_sample = bits_per_sample_; - strm.flags = flags_; - strm.avail_in = encoded.size(); - strm.next_in = reinterpret_cast(encoded.data()); - strm.avail_out = decoded.size() * sizeof(ValueType); - strm.next_out = reinterpret_cast(decoded.data()); - - //print_aec_stream_info(&strm, "Decompressing"); - - if (aec_decode_init(&strm) != AEC_OK) - throw eckit::FailedLibraryCall("libaec", "aec_decode_init", "Error initializing decoder", Here()); - - if (aec_decode_enable_offsets(&strm) != AEC_OK) - throw eckit::FailedLibraryCall("libaec", "aec_decode_enable_offsets", "Error enabling offsets", Here()); - - if (aec_decode(&strm, AEC_FLUSH) != AEC_OK) - throw eckit::FailedLibraryCall("libaec", "aec_decode", "Error decoding buffer", Here()); - - size_t offsets_count{0}; - if (aec_decode_count_offsets(&strm, &offsets_count) != AEC_OK) - throw eckit::FailedLibraryCall("libaec", "aec_decode_count_offsets", "Error counting offsets", Here()); - - std::vector offsets(offsets_count); - if (aec_decode_get_offsets(&strm, offsets.data(), offsets.size()) != AEC_OK) - throw eckit::FailedLibraryCall("libaec", "aec_decode_get_offsets", "Error getting offsets", Here()); - - //print_aec_stream_info(&strm, "Decompressing"); - - if (aec_decode_end(&strm) != AEC_OK) - throw eckit::FailedLibraryCall("libaec", "aec_decode_end", "Error ending decoder", Here()); - - offsets_ = std::move(offsets); - return decoded; - } - - - Values decode(const std::shared_ptr accessor, const Block& range) override - { - if (sizeof(ValueType) == 1 && !(bits_per_sample_ > 0 && bits_per_sample_ <= 8)) - throw eckit::Exception("bits_per_sample must be between 1 and 8 for 1-byte types", Here()); - if (sizeof(ValueType) == 2 && !(bits_per_sample_ > 8 && bits_per_sample_ <= 16)) - throw eckit::Exception("bits_per_sample must be between 9 and 16 for 2-byte types", Here()); - if (sizeof(ValueType) == 4 && !(bits_per_sample_ > 16 && bits_per_sample_ <= 32)) - throw eckit::Exception("bits_per_sample must be between 17 and 32 for 4-byte types", Here()); - - if (offsets_.empty()) - throw eckit::Exception("No offsets found", Here()); - - - auto [range_offset, range_size] = range; - auto range_offset_bytes = range_offset * sizeof(ValueType); - auto range_size_bytes = range_size * sizeof(ValueType); - - Values decoded(range_size); - - struct aec_stream strm; - strm.rsi = rsi_; - strm.block_size = block_size_; - strm.bits_per_sample = bits_per_sample_; - strm.flags = flags_; - - size_t nbytes = (strm.bits_per_sample + 7) / 8; - if (nbytes == 3) { - nbytes = 4; - } - size_t rsi_size_bytes = strm.rsi * strm.block_size * nbytes; - - auto start_idx = range_offset_bytes / rsi_size_bytes; - auto end_idx = (range_offset_bytes + range_size_bytes) / rsi_size_bytes + 1; - - assert(start_idx < end_idx); - assert(end_idx <= offsets_.size()); - - size_t start_offset_bits = offsets_[start_idx]; - size_t start_offset_bytes = start_offset_bits / 8; - - size_t end_offset_bits = end_idx == offsets_.size() ? accessor->eof() * 8 : offsets_[end_idx]; - size_t end_offset_bytes = end_idx == offsets_.size() ? accessor->eof() : (offsets_[end_idx] + 7) / 8; - - - // The new offset to the RSI may not be aligned to the start of the range - // so we need to shift the offsets by the difference - std::vector new_offsets_tmp; - std::vector new_offsets; - std::copy(offsets_.begin() + start_idx, offsets_.end(), std::back_inserter(new_offsets_tmp)); - size_t shift = start_offset_bits >> 3 << 3; - std::transform(new_offsets_tmp.begin(), new_offsets_tmp.end(), std::back_inserter(new_offsets), [shift] (size_t offset) { - return offset - shift; - }); - - // Read RSIs - CompressedData encoded = accessor->read({start_offset_bytes, end_offset_bytes - start_offset_bytes}); - - //print_binary("decode_range_e", encoded.data(), encoded.size()); - strm.avail_in = encoded.size(); - strm.next_in = reinterpret_cast(encoded.data()); - strm.avail_out = decoded.size() * sizeof(ValueType); - strm.next_out = reinterpret_cast(decoded.data()); +public: // types + static_assert(std::is_integral::value, "AecDecompressor only supports integral types"); + using CompressedData = typename NumericCompressor::CompressedData; + using Values = typename NumericCompressor::Values; - //print_aec_stream_info(&strm, "Decompressing"); +public: // methods - aec_decode_init(&strm); - int err; - size_t new_offset_bytes = range_offset_bytes - rsi_size_bytes * start_idx; - size_t new_size_bytes = range_size_bytes; + Values decode(const CompressedData& encoded) override; - if (new_offsets.empty()) - throw eckit::Exception("AEC offsets list is empty", Here()); + Values decode(const std::shared_ptr accessor, const Block& range) override; - if ((err = aec_decode_range(&strm, new_offsets.data(), new_offsets.size(), new_offset_bytes, new_size_bytes)) != AEC_OK) - throw eckit::FailedLibraryCall("libaec", "aec_decode_range", "Error decoding buffer", Here()); + Offsets decode_offsets(const CompressedData& encoded) override; - //print_aec_stream_info(&strm, "Decompressing"); - //print_binary("decode_range_d", decoded.data(), decoded.size()); + size_t n_elems() const { return n_elems_; } + AecDecompressor& n_elems(size_t n_elems) { n_elems_ = n_elems; return *this; } - aec_decode_end(&strm); +private: // methods - return decoded; - } + void validateBitsPerSample(); - size_t n_elems() const { return n_elems; } - AecDecompressor& n_elems(size_t n_elems) { n_elems_ = n_elems; return *this; } -private: - size_t n_elems_; +private: // members + size_t n_elems_; }; -} // namespace mc +} // namespace gribjump::mc diff --git a/src/gribjump/compression/compressors/Ccsds.cc b/src/gribjump/compression/compressors/Ccsds.cc index dddc641..827478d 100644 --- a/src/gribjump/compression/compressors/Ccsds.cc +++ b/src/gribjump/compression/compressors/Ccsds.cc @@ -7,35 +7,192 @@ * granted to it by virtue of its status as an intergovernmental organisation nor * does it submit to any jurisdiction. */ + +#include "gribjump/compression/compressors/Ccsds.h" -#include "Ccsds.h" -#include "Aec.h" -//#include "serializer.h" -//#include "libaec.h" - -#include -#include -#include #include +#include +#include + +#include "gribjump/compression/compressors/Aec.h" -namespace mc { +namespace { -bool is_big_endian() -{ +bool is_big_endian() { unsigned char is_big_endian = 0; unsigned short endianess_test = 1; return reinterpret_cast(&endianess_test)[0] == is_big_endian; } -size_t modify_aec_flags(size_t flags) -{ - // ECC-1602: Performance improvement: enabled the use of native data types - flags &= ~AEC_DATA_3BYTE; // disable support for 3-bytes per value - if (is_big_endian()) - flags |= AEC_DATA_MSB; // enable big-endian - else - flags &= ~AEC_DATA_MSB; // enable little-endian - return flags; +size_t modify_aec_flags(size_t flags) { + // ECC-1602: Performance improvement: enabled the use of native data types + flags &= ~AEC_DATA_3BYTE; // disable support for 3-bytes per value + if (is_big_endian()) + flags |= AEC_DATA_MSB; // enable big-endian + else + flags &= ~AEC_DATA_MSB; // enable little-endian + return flags; +} + +} // namespace + +namespace gribjump::mc { + +// --------------------------------------------------------------------------------------------------------------------- +template +CcsdsParams::Offsets CcsdsDecompressor::decode_offsets(const eckit::Buffer& in_buf){ + + Offsets offsets; + switch (auto nbytes = sample_nbytes()) { + case 1: + offsets = decode_offsets_(in_buf); + break; + case 2: + offsets = decode_offsets_(in_buf); + break; + case 4: + offsets = decode_offsets_(in_buf); + break; + default: + std::stringstream ss; + ss << nbytes; + throw eckit::SeriousBug("Invalid number of bytes per sample: " + ss.str(), Here()); + } + + return offsets; +} + +// --------------------------------------------------------------------------------------------------------------------- +template +typename CcsdsDecompressor::Values CcsdsDecompressor::decode(const eckit::Buffer& in_buf) { + auto bscale = codes_power(binary_scale_factor_, 2); + auto dscale = codes_power(decimal_scale_factor_, -10); + auto flags = modify_aec_flags(flags_); + + Values values; + + switch (auto nbytes = sample_nbytes()) { + case 1: + values = decode_all_(in_buf, bscale, dscale); + break; + case 2: + values = decode_all_(in_buf, bscale, dscale); + break; + case 4: + values = decode_all_(in_buf, bscale, dscale); + break; + default: + std::stringstream ss; + ss << nbytes; + throw eckit::SeriousBug("Invalid number of bytes per sample: " + ss.str(), Here()); + } + + return values; +} + +// --------------------------------------------------------------------------------------------------------------------- +template +typename CcsdsDecompressor::Values CcsdsDecompressor::decode(const std::shared_ptr accessor, const Block& range){ + if (range.second == 0) { + return Values{}; + } + + auto bscale = codes_power(binary_scale_factor_, 2); + auto dscale = codes_power(decimal_scale_factor_, -10); + Values values; + + switch (auto nbytes = sample_nbytes()) { + case 1: + values = decode_range_(accessor, range, bscale, dscale); + break; + case 2: + values = decode_range_(accessor, range, bscale, dscale); + break; + case 4: + values = decode_range_(accessor, range, bscale, dscale); + break; + default: + std::stringstream ss; + ss << nbytes; + throw eckit::SeriousBug("Invalid number of bytes per sample: " + ss.str(), Here()); + } + + return values; +} + +// --------------------------------------------------------------------------------------------------------------------- +template +size_t CcsdsDecompressor::sample_nbytes() const { + size_t nbytes = (bits_per_sample_ + 7) / 8; + if (nbytes == 3) { + nbytes = 4; + } + return nbytes; +} +// --------------------------------------------------------------------------------------------------------------------- +template +template +typename CcsdsDecompressor::Values CcsdsDecompressor::decode_range_ (const std::shared_ptr accessor, const Block& simple_range, double bscale, double dscale) { + AecDecompressor aec{}; + auto flags = modify_aec_flags(flags_); + aec.flags(flags); + aec.bits_per_sample(bits_per_sample_); + aec.block_size(block_size_); + aec.rsi(rsi_); + aec.offsets(offsets_); + + typename AecDecompressor::Values simple_values = aec.decode(accessor, simple_range); + + Values values(simple_values.size()); + std::transform(simple_values.begin(), simple_values.end(), values.begin(), [&](const auto& simple_value) { + return (simple_value * bscale + reference_value_) * dscale; + }); + + return values; +} + +// --------------------------------------------------------------------------------------------------------------------- +template +template +CcsdsParams::Offsets CcsdsDecompressor::decode_offsets_ (const typename AecDecompressor::CompressedData& in_buf) { + AecDecompressor aec{}; + auto flags = modify_aec_flags(flags_); + aec.flags(flags); + aec.bits_per_sample(bits_per_sample_); + aec.block_size(block_size_); + aec.rsi(rsi_); + aec.n_elems(n_elems_); + + return aec.decode_offsets(in_buf); +} + +// --------------------------------------------------------------------------------------------------------------------- +template +template +typename CcsdsDecompressor::Values CcsdsDecompressor::decode_all_ (const typename AecDecompressor::CompressedData& in_buf, double bscale, double dscale) { + AecDecompressor aec{}; + auto flags = modify_aec_flags(flags_); + aec.flags(flags); + aec.bits_per_sample(bits_per_sample_); + aec.block_size(block_size_); + aec.rsi(rsi_); + aec.n_elems(n_elems_); + + typename AecDecompressor::Values simple_values = aec.decode(in_buf); + + if (aec.offsets()) { + offsets_ = aec.offsets().value(); + } + + Values values(simple_values.size()); + std::transform(simple_values.begin(), simple_values.end(), values.begin(), [&](const auto& simple_value) { + return (simple_value * bscale + reference_value_) * dscale; + }); + + return values; } +// --------------------------------------------------------------------------------------------------------------------- +// Explicit instantiations of the template class +template class CcsdsDecompressor; -} // namespace mc +} // namespace gribjump::mc diff --git a/src/gribjump/compression/compressors/Ccsds.h b/src/gribjump/compression/compressors/Ccsds.h index 1cd5a3d..36b4382 100644 --- a/src/gribjump/compression/compressors/Ccsds.h +++ b/src/gribjump/compression/compressors/Ccsds.h @@ -10,282 +10,93 @@ #pragma once -#include "../NumericCompressor.h" -#include "Aec.h" - -#include -#include -#include "eckit/exception/Exceptions.h" - -#include #include -#include - -#pragma once - -namespace mc { - -/* Return n to the power of s */ -template -constexpr T codes_power(long s, long n) -{ - T divisor = 1.0; - if (s == 0) - return 1.0; - if (s == 1) - return n; - while (s < 0) { - divisor /= n; - s++; - } - while (s > 0) { - divisor *= n; - s--; - } - return divisor; -} - - -bool is_big_endian(); -size_t modify_aec_flags(size_t flags); +#include "gribjump/compression/compressors/Aec.h" +#include "gribjump/compression/NumericCompressor.h" +namespace gribjump::mc { +/// @todo can we unify this with AecParams? There is a lot of overlap. class CcsdsParams { public: - using Offsets = std::vector; - CcsdsParams() : flags_(8ul), rsi_(128ul), block_size_(32ul), bits_per_sample_(16ul) {} - - size_t flags() const { return flags_; } - size_t rsi() const { return rsi_; } - size_t block_size() const { return block_size_; } - size_t bits_per_sample() const { return bits_per_sample_; } - double reference_value() const { return reference_value_; } - size_t decimal_scale_factor() const { return decimal_scale_factor_; } - size_t binary_scale_factor() const { return binary_scale_factor_; } - std::optional offsets() const { - return offsets_.size() > 0 ? std::optional(offsets_) : std::nullopt; - } - - CcsdsParams& flags(size_t flags) { flags_ = flags; return *this; } - CcsdsParams& rsi(size_t rsi) { rsi_ = rsi; return *this; } - CcsdsParams& block_size(size_t block_size) { block_size_ = block_size; return *this; } - CcsdsParams& bits_per_sample(size_t bits_per_sample) { bits_per_sample_ = bits_per_sample; return *this; } - CcsdsParams& reference_value(double reference_value) { reference_value_ = reference_value; return *this; } - CcsdsParams& decimal_scale_factor(size_t decimal_scale_factor) { decimal_scale_factor_ = decimal_scale_factor; return *this; } - CcsdsParams& binary_scale_factor(size_t binary_scale_factor) { binary_scale_factor_ = binary_scale_factor; return *this; } - CcsdsParams& offsets(const Offsets& offsets) { - assert(offsets.size() > 0); - offsets_ = offsets; - return *this; - } - -protected: - size_t flags_; - size_t rsi_; - size_t block_size_; - size_t bits_per_sample_; - double reference_value_; - size_t decimal_scale_factor_; - size_t binary_scale_factor_; - std::vector offsets_; -}; - -template class CcsdsCompressor; -template class CcsdsDecompressor; - -template -class CcsdsCompressor : public NumericCompressor, public CcsdsParams { -public: - using CompressedData = typename NumericCompressor::CompressedData; - using Values = typename NumericCompressor::Values; - std::pair>, CompressedData> encode(const Values& values) override { - size_t nbytes = (bits_per_sample_ + 7) / 8; - if (nbytes == 3) - nbytes = 4; - - CcsdsCompressor ccsds_decompressor{}; - CcsdsCompressor::CompressedData data{}; - switch (nbytes) { - case 1: - std::tie(ccsds_decompressor, data) = encode_all_(values, nbytes); - break; - case 2: - std::tie(ccsds_decompressor, data) = encode_all_(values, nbytes); - break; - case 4: - std::tie(ccsds_decompressor, data) = encode_all_(values, nbytes); - break; - default: - std::stringstream ss; - ss << nbytes; - throw eckit::SeriousBug("Unsupported number of bytes per value: " + ss.str(), Here()); + using Offsets = std::vector; + CcsdsParams() : flags_(8ul), rsi_(128ul), block_size_(32ul), bits_per_sample_(16ul) {} + + size_t flags() const { return flags_; } + size_t rsi() const { return rsi_; } + size_t block_size() const { return block_size_; } + size_t bits_per_sample() const { return bits_per_sample_; } + double reference_value() const { return reference_value_; } + size_t decimal_scale_factor() const { return decimal_scale_factor_; } + size_t binary_scale_factor() const { return binary_scale_factor_; } + std::optional offsets() const { + return offsets_.size() > 0 ? std::optional(offsets_) : std::nullopt; } - //return std::make_pair(std::move(ccsds_decompressor), std::move(data)); - return std::make_pair(ccsds_decompressor, data); - } - -private: - template - std::pair, CompressedData> encode_all_(const Values& values, size_t nbytes) { - ValueType divisor = codes_power(binary_scale_factor_, 2); - ValueType d = codes_power(decimal_scale_factor_, 10); - std::vector simple_values(values.size()); - - std::transform(values.begin(), values.end(), simple_values.begin(), [&](const auto& value) { - return ((value * d - reference_value_) * divisor) + 0.5; - }); - - AecCompressor aec{}; - aec.flags(flags_); - aec.bits_per_sample(bits_per_sample_); - aec.block_size(block_size_); - aec.rsi(rsi_); - - auto [aec_decompressor_base, aec_data] = aec.encode(simple_values); - AecDecompressor aec_decompressor = dynamic_cast&>(*aec_decompressor_base); - - std::unique_ptr> ccsds_decompressor = std::make_unique>(); - ccsds_decompressor->flags(aec_decompressor.flags()); - ccsds_decompressor->rsi(aec_decompressor.rsi()); - ccsds_decompressor->block_size(aec_decompressor.block_size()); - ccsds_decompressor->bits_per_sample(aec_decompressor.bits_per_sample()); - ccsds_decompressor->reference_value(reference_value_); - ccsds_decompressor->decimal_scale_factor(decimal_scale_factor_); - ccsds_decompressor->binary_scale_factor(binary_scale_factor_); - if (aec_decompressor.offsets()) { - ccsds_decompressor->offsets(aec_decompressor.offsets().value()); + CcsdsParams& flags(size_t flags) { flags_ = flags; return *this; } + CcsdsParams& rsi(size_t rsi) { rsi_ = rsi; return *this; } + CcsdsParams& block_size(size_t block_size) { block_size_ = block_size; return *this; } + CcsdsParams& bits_per_sample(size_t bits_per_sample) { bits_per_sample_ = bits_per_sample; return *this; } + CcsdsParams& reference_value(double reference_value) { reference_value_ = reference_value; return *this; } + CcsdsParams& decimal_scale_factor(size_t decimal_scale_factor) { decimal_scale_factor_ = decimal_scale_factor; return *this; } + CcsdsParams& binary_scale_factor(size_t binary_scale_factor) { binary_scale_factor_ = binary_scale_factor; return *this; } + CcsdsParams& offsets(const Offsets& offsets) { + ASSERT(offsets.size() > 0); + offsets_ = offsets; + return *this; } - ccsds_decompressor->n_elems(aec_decompressor.n_elems()); - //return std::make_pair(std::move(ccsds_decompressor), std::move(aec_data)); - return std::make_pair(ccsds_decompressor, aec_data); - } +protected: + size_t flags_; + size_t rsi_; + size_t block_size_; + size_t bits_per_sample_; + double reference_value_; + size_t decimal_scale_factor_; + size_t binary_scale_factor_; + std::vector offsets_; }; - template class CcsdsDecompressor : public mc::NumericDecompressor, public CcsdsParams { -public: - using CompressedData = typename mc::NumericDecompressor::CompressedData; - using Values = typename mc::NumericDecompressor::Values; - - Values decode(const CompressedData& in_buf) override - { - auto bscale = codes_power(binary_scale_factor_, 2); - auto dscale = codes_power(decimal_scale_factor_, -10); - auto flags = modify_aec_flags(flags_); - size_t nbytes = (bits_per_sample_ + 7) / 8; - if (nbytes == 3) - nbytes = 4; +public: // types + using CompressedData = typename mc::NumericDecompressor::CompressedData; + using Values = typename mc::NumericDecompressor::Values; - Values values; +public: // methods + using NumericDecompressor::decode; - size_t i; - switch (nbytes) { - case 1: - values = decode_all_(in_buf, bscale, dscale); - break; - case 2: - values = decode_all_(in_buf, bscale, dscale); - break; - case 4: - values = decode_all_(in_buf, bscale, dscale); - break; - default: - std::stringstream ss; - ss << nbytes; - throw eckit::SeriousBug("Invalid number of bytes per sample: " + ss.str(), Here()); - } + /// Decode a compressed buffer + Values decode(const eckit::Buffer& in_buf) override; - return values; - } + /// Decode a range of values + Values decode(const std::shared_ptr accessor, const Block& range) override; + /// Avoid fully decoding the data, only decode the offsets + /// @note: libaec will still do its part of the decoding + Offsets decode_offsets(const eckit::Buffer& in_buf) override; - Values decode(const std::shared_ptr accessor, const Block& range) override { - if (range.second == 0) - return Values{}; + size_t n_elems() const { return n_elems_; } + CcsdsDecompressor& n_elems(size_t n_elems) { n_elems_ = n_elems; return *this; } - size_t simple_nbytes = (bits_per_sample_ + 7) / 8; - if (simple_nbytes == 3) - simple_nbytes = 4; +private: // methods + size_t sample_nbytes() const; - size_t value_nbytes = sizeof(double); - auto bscale = codes_power(binary_scale_factor_, 2); - auto dscale = codes_power(decimal_scale_factor_, -10); - Values values; - - switch (simple_nbytes) { - case 1: - values = decode_range_(accessor, range, bscale, dscale); - break; - case 2: - values = decode_range_(accessor, range, bscale, dscale); - break; - case 4: - values = decode_range_(accessor, range, bscale, dscale); - break; - default: - std::stringstream ss; - ss << simple_nbytes; - throw eckit::SeriousBug("Invalid number of bytes per sample: " + ss.str(), Here()); - } + // Wrappers around aec.decode - return values; - } + template + Values decode_range_(const std::shared_ptr accessor, const Block& simple_range, double bscale, double dscale); - using NumericDecompressor::decode; - - size_t n_elems() const { return n_elems_; } - CcsdsDecompressor& n_elems(size_t n_elems) { n_elems_ = n_elems; return *this; } - -private: - size_t n_elems_; - - template - Values decode_range_ (const std::shared_ptr accessor, const Block& simple_range, double bscale, double dscale) { - AecDecompressor aec{}; - auto flags = modify_aec_flags(flags_); - aec.flags(flags); - aec.bits_per_sample(bits_per_sample_); - aec.block_size(block_size_); - aec.rsi(rsi_); - aec.offsets(offsets_); - - typename AecDecompressor::Values simple_values = aec.decode(accessor, simple_range); - Values values(simple_values.size()); - std::transform(simple_values.begin(), simple_values.end(), values.begin(), [&](const auto& simple_value) { - return (simple_value * bscale + reference_value_) * dscale; - }); - return values; - } - - - template - Values decode_all_ (const typename AecDecompressor::CompressedData& in_buf, double bscale, double dscale) { - AecDecompressor aec{}; - auto flags = modify_aec_flags(flags_); - aec.flags(flags); - aec.bits_per_sample(bits_per_sample_); - aec.block_size(block_size_); - aec.rsi(rsi_); - aec.n_elems(n_elems_); - - typename AecDecompressor::Values simple_values = aec.decode(in_buf); - - if (aec.offsets()) { - offsets_ = aec.offsets().value(); - } + template + Offsets decode_offsets_(const typename AecDecompressor::CompressedData& in_buf); - Values values(simple_values.size()); - std::transform(simple_values.begin(), simple_values.end(), values.begin(), [&](const auto& simple_value) { - return (simple_value * bscale + reference_value_) * dscale; - }); - return values; - } + template + Values decode_all_(const typename AecDecompressor::CompressedData& in_buf, double bscale, double dscale); +private: // members + size_t n_elems_; }; -} // namespace mc +} // namespace gribjump::mc diff --git a/src/gribjump/compression/compressors/Simple.cc b/src/gribjump/compression/compressors/Simple.cc index e69de29..4493b9a 100644 --- a/src/gribjump/compression/compressors/Simple.cc +++ b/src/gribjump/compression/compressors/Simple.cc @@ -0,0 +1,95 @@ +/* + * (C) Copyright 2023- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation nor + * does it submit to any jurisdiction. + */ + + +#include "gribjump/compression/compressors/Simple.h" +#include "gribjump/compression/compressors/SimplePacking.h" + +namespace { + +size_t bin_pos(size_t value_idx, size_t bits_per_value) { + constexpr size_t chunk_nvals = 8; + size_t chunk_size = bits_per_value; + size_t chunk_idx = value_idx / chunk_nvals; + size_t value_idx_first_in_chunk = chunk_idx * chunk_nvals; + size_t chunk_offset_bytes = value_idx_first_in_chunk * bits_per_value / chunk_nvals; + + return chunk_offset_bytes; +} + +} // namespace + +namespace gribjump::mc { + +template +typename SimpleDecompressor::Values SimpleDecompressor::decode(const std::shared_ptr accessor, const Block& range){ + using SP = SimplePacking; + SimplePacking sp{}; + DecodeParameters params{}; + constexpr int VSIZE = sizeof(ValueType); + + auto [offset, size] = range; + params.bits_per_value = bits_per_value_; + params.reference_value = reference_value_; + params.binary_scale_factor = binary_scale_factor_; + params.decimal_scale_factor = decimal_scale_factor_; + + constexpr size_t chunk_nvals = 8; + size_t new_offset = offset / chunk_nvals * chunk_nvals; + size_t end = offset + size; + size_t new_end = (end + (chunk_nvals - 1)) / chunk_nvals * chunk_nvals; + size_t new_size = new_end - new_offset; + Block inclusive_range{new_offset, new_size}; + + params.n_vals = new_size; + + size_t last_pos = std::min(bin_pos(new_size, bits_per_value_), accessor->eof() - bin_pos(new_offset, bits_per_value_)); + Block data_range{bin_pos(new_offset, bits_per_value_), last_pos}; + eckit::Buffer compressed = accessor->read(data_range); + + typename SP::Buffer data{(unsigned char*) compressed.data(), (unsigned char*) compressed.data() + data_range.second}; + typename SP::Values decoded = sp.unpack(params, data); + + size_t shift_offset = offset - new_offset; + Values out_values{decoded.data() + shift_offset, decoded.data() + shift_offset + size}; + + return out_values; +} + +// --------------------------------------------------------------------------------------------------------------------- +template +typename SimpleDecompressor::Values SimpleDecompressor::decode(const typename NumericDecompressor::CompressedData& in_buf){ + + using SP = SimplePacking; + SP sp{}; + DecodeParameters params{}; + params.bits_per_value = bits_per_value_; + params.reference_value = reference_value_; + params.binary_scale_factor = binary_scale_factor_; + params.decimal_scale_factor = decimal_scale_factor_; + params.n_vals = in_buf.size() * 8 / bits_per_value_; + + typename SP::Buffer data{(unsigned char*) in_buf.data(), (unsigned char*) in_buf.data() + params.n_vals}; + typename SP::Values decoded = sp.unpack(params, data); + + auto decoded_data = reinterpret_cast(decoded.data()); + + /// @todo: this line looks very strange to me. + Values out_buf{decoded_data, decoded_data + decoded.size()}; + //Values out_buf{decoded.data(), decoded.size() * sizeof(ValueType)}; + + return out_buf; +} + +// --------------------------------------------------------------------------------------------------------------------- +// Explicit instantiations of the template class +template class SimpleDecompressor; + +} // namespace gribjump::mc \ No newline at end of file diff --git a/src/gribjump/compression/compressors/Simple.h b/src/gribjump/compression/compressors/Simple.h index 1b0695b..d1f36b8 100644 --- a/src/gribjump/compression/compressors/Simple.h +++ b/src/gribjump/compression/compressors/Simple.h @@ -10,159 +10,53 @@ #pragma once -#include "../NumericCompressor.h" -#include "algorithms/SimplePacking.h" - -#include -#include - -#include -#include -#include - -namespace mc { - - -template -size_t bin_pos(size_t value_idx, size_t bits_per_value) -{ - constexpr size_t chunk_nvals = 8; - size_t chunk_size = bits_per_value; - size_t chunk_idx = value_idx / chunk_nvals; - size_t value_idx_first_in_chunk = chunk_idx * chunk_nvals; - size_t chunk_offset_bytes = value_idx_first_in_chunk * bits_per_value / chunk_nvals; - - return chunk_offset_bytes; -} +#include "gribjump/compression/NumericCompressor.h" +namespace gribjump::mc { class SimpleParams { public: - using Offsets = std::vector; - SimpleParams() : bits_per_value_(16ul), reference_value_(0.0), decimal_scale_factor_(0ul), binary_scale_factor_(0ul) {} + using Offsets = std::vector; + SimpleParams() : bits_per_value_(16ul), reference_value_(0.0), decimal_scale_factor_(0ul), binary_scale_factor_(0ul) {} - size_t bits_per_value() const { return bits_per_value_; } - double reference_value() const { return reference_value_; } - long decimal_scale_factor() const { return decimal_scale_factor_; } - long binary_scale_factor() const { return binary_scale_factor_; } + size_t bits_per_value() const { return bits_per_value_; } + double reference_value() const { return reference_value_; } + long decimal_scale_factor() const { return decimal_scale_factor_; } + long binary_scale_factor() const { return binary_scale_factor_; } - SimpleParams& bits_per_value(size_t bits_per_value) { bits_per_value_ = bits_per_value; return *this; } - SimpleParams& reference_value(double reference_value) { reference_value_ = reference_value; return *this; } - SimpleParams& decimal_scale_factor(long decimal_scale_factor) { decimal_scale_factor_ = decimal_scale_factor; return *this; } - SimpleParams& binary_scale_factor(long binary_scale_factor) { binary_scale_factor_ = binary_scale_factor; return *this; } + SimpleParams& bits_per_value(size_t bits_per_value) { bits_per_value_ = bits_per_value; return *this; } + SimpleParams& reference_value(double reference_value) { reference_value_ = reference_value; return *this; } + SimpleParams& decimal_scale_factor(long decimal_scale_factor) { decimal_scale_factor_ = decimal_scale_factor; return *this; } + SimpleParams& binary_scale_factor(long binary_scale_factor) { binary_scale_factor_ = binary_scale_factor; return *this; } protected: - size_t bits_per_value_; - double reference_value_; - long decimal_scale_factor_; - long binary_scale_factor_; + size_t bits_per_value_; + double reference_value_; + long decimal_scale_factor_; + long binary_scale_factor_; }; -template class SimpleCompressor; template class SimpleDecompressor; -template -class SimpleCompressor : public NumericCompressor, public SimpleParams { -public: - using CompressedData = typename NumericDecompressor::CompressedData; - using Values = typename NumericDecompressor::Values; - std::pair>, CompressedData> encode(const Values& values) override { - using SP = SimplePacking; - SP sp{}; - DecodeParameters params{}; - params.bits_per_value = bits_per_value_; - params.reference_value = reference_value_; - params.binary_scale_factor = binary_scale_factor_; - params.decimal_scale_factor = decimal_scale_factor_; - params.n_vals = values.size() * 8 / bits_per_value_; - - typename SP::Data data{(ValueType*) values.data(), (ValueType*) values.data() + values.size() / sizeof(ValueType)}; - auto [decode_params, encoded] = sp.pack(params, data); - - std::unique_ptr> decompressor{}; - decompressor->bits_per_value(decode_params.bits_per_value); - decompressor->reference_value(decode_params.reference_value); - decompressor->binary_scale_factor(decode_params.binary_scale_factor); - decompressor->decimal_scale_factor(decode_params.decimal_scale_factor); - - CompressedData out_buf{encoded.data(), encoded.size()}; - - return std::make_pair(std::move(decompressor), std::move(out_buf)); - } -}; - - - -// Decode template class SimpleDecompressor : public NumericDecompressor, public SimpleParams { public: - using CompressedData = typename NumericDecompressor::CompressedData; - using Values = typename NumericDecompressor::Values; - - size_t buffer_size() const { return buffer_size_; } - SimpleDecompressor& buffer_size(size_t buffer_size) { buffer_size_ = buffer_size; return *this; } - - Values decode(const std::shared_ptr accessor, const Block& range) override { - //SP sp{}; - using SP = SimplePacking; - SimplePacking sp{}; - DecodeParameters params{}; - constexpr int VSIZE = sizeof(ValueType); - - auto [offset, size] = range; - params.bits_per_value = bits_per_value_; - params.reference_value = reference_value_; - params.binary_scale_factor = binary_scale_factor_; - params.decimal_scale_factor = decimal_scale_factor_; - - constexpr size_t chunk_nvals = 8; - size_t new_offset = offset / chunk_nvals * chunk_nvals; - size_t end = offset + size; - size_t new_end = (end + (chunk_nvals - 1)) / chunk_nvals * chunk_nvals; - size_t new_size = new_end - new_offset; - Block inclusive_range{new_offset, new_size}; - - params.n_vals = new_size; - - size_t last_pos = std::min(bin_pos(new_size, bits_per_value_), accessor->eof() - bin_pos(new_offset, bits_per_value_)); - Block data_range{bin_pos(new_offset, bits_per_value_), last_pos}; - eckit::Buffer compressed = accessor->read(data_range); - - typename SP::Buffer data{(unsigned char*) compressed.data(), (unsigned char*) compressed.data() + data_range.second}; - typename SP::Values decoded = sp.unpack(params, data); - - size_t shift_offset = offset - new_offset; - Values out_values{decoded.data() + shift_offset, decoded.data() + shift_offset + size}; - - return out_values; - } - - - Values decode(const CompressedData& in_buf) override { - using SP = SimplePacking; - SP sp{}; - DecodeParameters params{}; - params.bits_per_value = bits_per_value_; - params.reference_value = reference_value_; - params.binary_scale_factor = binary_scale_factor_; - params.decimal_scale_factor = decimal_scale_factor_; - params.n_vals = in_buf.size() * 8 / bits_per_value_; + using CompressedData = typename NumericDecompressor::CompressedData; + using Values = typename NumericDecompressor::Values; + using NumericDecompressor::decode; - typename SP::Buffer data{(unsigned char*) in_buf.data(), (unsigned char*) in_buf.data() + params.n_vals}; - typename SP::Values decoded = sp.unpack(params, data); + Values decode(const CompressedData& in_buf) override; - auto decoded_data = reinterpret_cast(decoded.data()); - Values out_buf{decoded_data, decoded_data + decoded.size()}; - //Values out_buf{decoded.data(), decoded.size() * sizeof(ValueType)}; + Values decode(const std::shared_ptr accessor, const Block& range) override; - return out_buf; - } + // getters and setters + + size_t buffer_size() const { return buffer_size_; } - using NumericDecompressor::decode; + SimpleDecompressor& buffer_size(size_t buffer_size) { buffer_size_ = buffer_size; return *this; } private: - size_t buffer_size_; + size_t buffer_size_; }; -} // namespace mc +} // namespace gribjump::mc diff --git a/src/gribjump/compression/compressors/SimplePacking.cc b/src/gribjump/compression/compressors/SimplePacking.cc new file mode 100644 index 0000000..3dd24ac --- /dev/null +++ b/src/gribjump/compression/compressors/SimplePacking.cc @@ -0,0 +1,130 @@ +/* + * (C) Copyright 2023- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation nor + * does it submit to any jurisdiction. + */ + +#include "gribjump/compression/compressors/SimplePacking.h" +#include "gribjump/compression/NumericCompressor.h" +#include "eckit/exception/Exceptions.h" + +namespace { + +inline unsigned long bitmask(size_t x) { + constexpr size_t maxNBits = sizeof(unsigned long) * 8; + return (x == maxNBits) ? (unsigned long)-1UL : (1UL << x) - 1; +} + +/// @note Appears to have been copied from eccodes... +template +int decode_array(const unsigned char* p, long bitsPerValue, + double reference_value, double s, double d, + size_t n_vals, T* val) { + unsigned long lvalue = 0; + T x; + + if (bitsPerValue % 8 == 0) { + /* See ECC-386 */ + int bc; + int l = bitsPerValue / 8; + size_t o = 0; + + for (size_t i = 0; i < n_vals; i++) { + lvalue = 0; + lvalue <<= 8; + lvalue |= p[o++]; + + for (bc = 1; bc < l; bc++) { + lvalue <<= 8; + lvalue |= p[o++]; + } + x = ((lvalue * s) + reference_value) * d; + val[i] = x; + } + } + else { + unsigned long mask = bitmask(bitsPerValue); + + /* pi: position of bitp in p[]. >>3 == /8 */ + long bitp = 0; + long pi = 0; + /* some bits might of the current byte at pi might be used */ + /* by the previous number usefulBitsInByte gives remaining unused bits */ + /* number of useful bits in current byte */ + int usefulBitsInByte = 8 - (bitp & 7); + for (size_t i = 0; i < n_vals; i++) { + /* value read as long */ + long bitsToRead = 0; + lvalue = 0; + bitsToRead = bitsPerValue; + /* read one byte after the other to lvalue until >= bitsPerValue are read */ + while (bitsToRead > 0) { + lvalue <<= 8; + lvalue += p[pi]; + pi++; + bitsToRead -= usefulBitsInByte; + usefulBitsInByte = 8; + } + bitp += bitsPerValue; + /* bitsToRead is now <= 0, remove the last bits */ + lvalue >>= -1 * bitsToRead; + /* set leading bits to 0 - removing bits used for previous number */ + lvalue &= mask; + + usefulBitsInByte = -1 * bitsToRead; /* prepare for next round */ + if (usefulBitsInByte > 0) { + pi--; /* reread the current byte */ + } + else { + usefulBitsInByte = 8; /* start with next full byte */ + } + /* scaling and move value to output */ + x = ((lvalue * s) + reference_value) * d; + val[i] = x; + } + } + return 0; +} + +} // namespace + +namespace gribjump::mc { + +template +typename SimplePacking::Values SimplePacking::unpack( + const DecodeParameters& params, + const Buffer& buf) { + + if (params.bits_per_value > (sizeof(long) * 8)) { + throw eckit::BadValue("Invalid BPV", Here()); + } + + // Empty field + if (params.n_vals == 0) { + return Values{}; + } + + /// @todo: I think there is already logic for checking for constant fields upstream of this code. + // Constant field + if (params.bits_per_value == 0) { + return Values(params.n_vals, params.reference_value); + } + + double s = mc::codes_power(params.binary_scale_factor, 2); + double d = mc::codes_power(-params.decimal_scale_factor, 10); + + Values values(params.n_vals); + decode_array(buf.data(), params.bits_per_value, params.reference_value, s, d, params.n_vals, values.data()); + + return values; +} + +// --------------------------------------------------------------------------------------------------------------------- +// Explicit instantiations of the template class +template class SimplePacking; + +} // namespace gribjump::mc diff --git a/src/gribjump/compression/compressors/SimplePacking.h b/src/gribjump/compression/compressors/SimplePacking.h new file mode 100644 index 0000000..4ed5813 --- /dev/null +++ b/src/gribjump/compression/compressors/SimplePacking.h @@ -0,0 +1,44 @@ +/* + * (C) Copyright 2023- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation nor + * does it submit to any jurisdiction. + */ + +#pragma once + +#include +#include + +namespace gribjump::mc { + +template +struct DecodeParameters { + using ValueType = T; + + ValueType reference_value; + ValueType binary_scale_factor; + ValueType decimal_scale_factor; + long bits_per_value; + size_t n_vals; + + DecodeParameters() : reference_value(0), binary_scale_factor(0), decimal_scale_factor(0), bits_per_value(0), n_vals(0) {} +}; + +template +class SimplePacking { +public: + using ValueType = T; + using Buffer = std::vector; + using Values = std::vector; + + SimplePacking() = default; + ~SimplePacking() = default; + + Values unpack(const DecodeParameters& params, const Buffer& buffer); +}; + +} // namespace gribjump::mc \ No newline at end of file diff --git a/src/gribjump/compression/compressors/algorithms/LibEccodes.cc b/src/gribjump/compression/compressors/algorithms/LibEccodes.cc deleted file mode 100644 index b65d9db..0000000 --- a/src/gribjump/compression/compressors/algorithms/LibEccodes.cc +++ /dev/null @@ -1,828 +0,0 @@ -/* - * (C) Copyright 2023- ECMWF. - * - * This software is licensed under the terms of the Apache Licence Version 2.0 - * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. - * In applying this licence, ECMWF does not waive the privileges and immunities - * granted to it by virtue of its status as an intergovernmental organisation nor - * does it submit to any jurisdiction. - */ - -#include -#include -#include -#include -#include -#include -#include -//#include -// -#include "LibEccodes.h" - -//#define LIB_ECCODES_SUCCESS 0 -//#define LIB_ECCODES_NO_VALUES 1 -//#define LIB_ECCODES_INVALID_BPV 2 -//#define LIB_ECCODES_INTERNAL_ERROR 3 -//#define LIB_ECCODES_LOG_DEBUG 4 -//#define LIB_ECCODES_LOG_ERROR 5 -//#define LIB_ECCODES_ENCODING_ERROR 6 -//#define LIB_ECCODES_OUT_OF_RANGE 7 -//#define LIB_ECCODES_UNDERFLOW 8 -//#define LIB_ECCODES_ARRAY_TOO_SMALL 9 - -#define DebugAssert(a) assert(a) -#define mask1(i) (1UL << i) -#define test(n, i) !!((n)&mask1(i)) - -constexpr double dbl_min = std::numeric_limits::min(); -constexpr double dbl_max = std::numeric_limits::max(); -int grib_get_nearest_smaller_value(double val, double* nearest); -unsigned long grib_ieee_nearest_smaller_to_long(double x); -unsigned long grib_ieee_to_long(double x); -double grib_long_to_ieee(unsigned long x); - -static const unsigned long nbits[32] = { - 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, - 0x40, 0x80, 0x100, 0x200, 0x400, 0x800, - 0x1000, 0x2000, 0x4000, 0x8000, 0x10000, 0x20000, - 0x40000, 0x80000, 0x100000, 0x200000, 0x400000, 0x800000, - 0x1000000, 0x2000000, 0x4000000, 0x8000000, 0x10000000, 0x20000000, - 0x40000000, 0x80000000 -}; - -static const unsigned long dmasks[] = { - 0xFF, - 0xFE, - 0xFC, - 0xF8, - 0xF0, - 0xE0, - 0xC0, - 0x80, - 0x00, -}; - -static ieee_table_t ieee_table = { 0, {0, }, {0, }, 0, 0 }; - -void grib_set_bit_off(unsigned char* p, long* bitp) -{ - p += *bitp / 8; - *p &= ~(1u << (7 - ((*bitp) % 8))); - (*bitp)++; -} - -void grib_set_bit_on(unsigned char* p, long* bitp) -{ - unsigned char o = 1; - p += (*bitp >> 3); - o <<= 7 - ((*bitp) % 8); - *p |= o; - (*bitp) += 1; -} - -int grib_encode_unsigned_longb(unsigned char* p, unsigned long val, long* bitp, long nb) -{ - auto test = [](unsigned long n, int i) { - return !!(n & (1UL << i)); - }; - long i = 0; - - if (nb > maxNBits) { - fprintf(stderr, "Number of bits (%ld) exceeds maximum number of bits (%d)\n", nb, maxNBits); - assert(0); - return LIB_ECCODES_INTERNAL_ERROR; - } -#ifdef DEBUG - { - unsigned long maxV = grib_power(nb, 2); - if (val > maxV) { - fprintf(stderr, "grib_encode_unsigned_longb: Value=%lu, but number of bits=%ld!\n", val, nb); - Assert(0); - } - } -#endif - for (i = nb - 1; i >= 0; i--) { - if (test(val, i)) - grib_set_bit_on(p, bitp); - else - grib_set_bit_off(p, bitp); - } - return LIB_ECCODES_SUCCESS; -} - -int grib_encode_double_array(size_t n_vals, const double* val, long bits_per_value, double reference_value, double d, double divisor, unsigned char* p, long* off) -{ - size_t i = 0; - unsigned long unsigned_val = 0; - unsigned char* encoded = p; - double x; - if (bits_per_value % 8) { - for (i = 0; i < n_vals; i++) { - x = (((val[i] * d) - reference_value) * divisor) + 0.5; - unsigned_val = (unsigned long)x; - grib_encode_unsigned_longb(encoded, unsigned_val, off, bits_per_value); - } - } - else { - for (i = 0; i < n_vals; i++) { - int blen = 0; - blen = bits_per_value; - x = ((((val[i] * d) - reference_value) * divisor) + 0.5); - unsigned_val = (unsigned long)x; - while (blen >= 8) { - blen -= 8; - *encoded = (unsigned_val >> blen); - encoded++; - *off += 8; - } - } - } - return LIB_ECCODES_SUCCESS; -} - - - - -long grib_get_binary_scale_fact(double max, double min, long bpval, int* ret) -{ - double range = max - min; - double zs = 1; - long scale = 0; - const long last = 127; /* Depends on edition, should be parameter */ - unsigned long maxint = 0; - const size_t ulong_size = sizeof(maxint) * 8; - - /* See ECC-246 - unsigned long maxint = grib_power(bpval,2) - 1; - double dmaxint=(double)maxint; - */ - if (bpval >= ulong_size) { - *ret = LIB_ECCODES_OUT_OF_RANGE; /*overflow*/ - return 0; - } - const double dmaxint = grib_power(bpval, 2) - 1; - maxint = (unsigned long)dmaxint; /* Now it's safe to cast */ - - *ret = 0; - if (bpval < 1) { - *ret = LIB_ECCODES_ENCODING_ERROR; /* constant field */ - return 0; - } - - assert(bpval >= 1); - /* printf("---- Maxint %ld range=%g\n",maxint,range); */ - if (range == 0) - return 0; - - /* range -= 1e-10; */ - while ((range * zs) <= dmaxint) { - scale--; - zs *= 2; - } - - while ((range * zs) > dmaxint) { - scale++; - zs /= 2; - } - - while ((unsigned long)(range * zs + 0.5) <= maxint) { - scale--; - zs *= 2; - } - - while ((unsigned long)(range * zs + 0.5) > maxint) { - scale++; - zs /= 2; - } - - if (scale < -last) { - *ret = LIB_ECCODES_UNDERFLOW; - /*printf("grib_get_binary_scale_fact: max=%g min=%g\n",max,min);*/ - scale = -last; - } - assert(scale <= last); - return scale; -} - - -void factec(int* krep, const double pa, const int knbit, const long kdec, const int range, long* ke, int* knutil) -{ - *krep = 0; - - *ke = 0; - *knutil = 0; - - if (pa < dbl_min) { - *knutil = 1; - goto end; - } - - if ((fabs(log10(fabs(pa)) + (double)kdec) >= range)) { - *krep = 1; - goto end; - } - - /* Binary scale factor associated to kdec */ - *ke = floor(log2((pa * grib_power(kdec, 10)) / (grib_power(knbit, 2) - 0.5))) + 1; - /* Encoded value for pa = max - min */ - *knutil = floor(0.5 + pa * grib_power(kdec, 10) * grib_power(-*ke, 2)); - -end: - return; -} - -double epsilon() -{ - double e = 1.; - while (1. != (1. + e)) { - e /= 2; - } - return e; -} - -int vrange() -{ - return (int)(log(dbl_max) / log(10)) - 1; -} - - -int grib_optimize_decimal_factor( - const double pmax, const double pmin, const int knbit, - const int compat_gribex, const int compat_32bit, - long* kdec, long* kbin, double* ref) -{ - constexpr double flt_min = std::numeric_limits::min(); - constexpr double flt_max = std::numeric_limits::max(); - - //grib_handle* gh = grib_handle_of_accessor(a); - int idecmin = -15; - int idecmax = 5; - long inbint; - double xtinyr4, xhuger4, xnbint; - int inumax, inutil; - long jdec, ie; - int irep; - int RANGE = vrange(); - double EPSILON = epsilon(); - double pa = pmax - pmin; - - if (pa == 0) { - *kdec = 0; - *kbin = 0; - *ref = 0.; - return LIB_ECCODES_SUCCESS; - } - - inumax = 0; - - if (fabs(pa) <= EPSILON) { - *kdec = 0; - idecmin = 1; - idecmax = 0; - } - else if (pmin != 0. && fabs(pmin) < EPSILON) { - *kdec = 0; - idecmin = 1; - idecmax = 0; - } - - xtinyr4 = flt_min; - xhuger4 = flt_max; - - inbint = grib_power(knbit, 2) - 1; - xnbint = (double)inbint; - - /* Test decimal scale factors; keep the most suitable */ - for (jdec = idecmin; jdec <= idecmax; jdec++) { - /* Fix a problem in GRIBEX */ - if (compat_gribex) - if (pa * grib_power(jdec, 10) <= 1.E-12) - continue; - - /* Check it will be possible to decode reference value with 32bit floats */ - if (compat_32bit) - if (fabs(pmin) > dbl_min) - if (log10(fabs(pmin)) + (double)jdec <= log10(xtinyr4)) - continue; - - /* Check if encoding will not cause an overflow */ - if (fabs(log10(fabs(pa)) + (double)jdec) >= (double)RANGE) - continue; - - factec(&irep, pa, knbit, jdec, RANGE, &ie, &inutil); - - if (irep != 0) - continue; - - /* Check it will be possible to decode the maximum value of the fields using 32bit floats */ - if (compat_32bit) - if (pmin * grib_power(jdec, 10) + xnbint * grib_power(ie, 2) >= xhuger4) - continue; - - /* GRIB1 demands that the binary scale factor be encoded in a single byte */ - if (compat_gribex) - if ((ie < -126) || (ie > 127)) - continue; - - if (inutil > inumax) { - inumax = inutil; - *kdec = jdec; - *kbin = ie; - } - } - - if (inumax > 0) { - double decimal = grib_power(+*kdec, 10); - double divisor = grib_power(-*kbin, 2); - double min = pmin * decimal; - long vmin, vmax; - if (grib_get_nearest_smaller_value(min, ref) != LIB_ECCODES_SUCCESS) { - //grib_context_log(gh->context, LIB_ECCODES_LOG_ERROR, - // "unable to find nearest_smaller_value of %g for %s", min, reference_value); - return LIB_ECCODES_INTERNAL_ERROR; - } - - vmax = (((pmax * decimal) - *ref) * divisor) + 0.5; - vmin = (((pmin * decimal) - *ref) * divisor) + 0.5; - - /* This may happen if pmin*decimal-*ref is too large */ - if ((vmin != 0) || (vmax > inbint)) - inumax = 0; - } - - /* If seeking for an optimal decimal scale factor fails, fall back to a basic method */ - if (inumax == 0) { - int last = compat_gribex ? 99 : 127; - double min = pmin, max = pmax; - double range = max - min; - double f = grib_power(knbit, 2) - 1; - double minrange = grib_power(-last, 2) * f; - double maxrange = grib_power(+last, 2) * f; - double decimal = 1; - int err; - - *kdec = 0; - - while (range < minrange) { - *kdec += 1; - decimal *= 10; - min = pmin * decimal; - max = pmax * decimal; - range = max - min; - } - - while (range > maxrange) { - *kdec -= 1; - decimal /= 10; - min = pmin * decimal; - max = pmax * decimal; - range = max - min; - } - - if (grib_get_nearest_smaller_value(min, ref) != LIB_ECCODES_SUCCESS) { - //grib_context_log(gh->context, LIB_ECCODES_LOG_ERROR, - //"unable to find nearest_smaller_value of %g for %s", min, reference_value); - return LIB_ECCODES_INTERNAL_ERROR; - } - - *kbin = grib_get_binary_scale_fact(max, *ref, knbit, &err); - - if (err == LIB_ECCODES_UNDERFLOW) { - *kbin = 0; - *kdec = 0; - *ref = 0; - } - } - - return LIB_ECCODES_SUCCESS; -} - - -int number_of_bits(unsigned long x, long* result) -{ - const int count = sizeof(nbits) / sizeof(nbits[0]); - const unsigned long* n = nbits; - *result = 0; - while (x >= *n) { - n++; - (*result)++; - if (*result >= count) { - return LIB_ECCODES_ENCODING_ERROR; - } - } - return LIB_ECCODES_SUCCESS; -} - - -void binary_search(const double xx[], const unsigned long n, double x, unsigned long* j) -{ - /*These routine works only on ascending ordered arrays*/ - unsigned long ju, jm, jl; - jl = 0; - ju = n; - while (ju - jl > 1) { - jm = (ju + jl) >> 1; - /* printf("jl=%lu jm=%lu ju=%lu\n",jl,jm,ju); */ - /* printf("xx[jl]=%.10e xx[jm]=%.10e xx[ju]=%.10e\n",xx[jl],xx[jm],xx[ju]); */ - if (x >= xx[jm]) - jl = jm; - else - ju = jm; - } - *j = jl; -} - - - - -void init_ieee_table() -{ - if (!ieee_table.inited) { - unsigned long i; - unsigned long mmin = 0x800000; - unsigned long mmax = 0xffffff; - double e = 1; - for (i = 1; i <= 104; i++) { - e *= 2; - ieee_table.e[i + 150] = e; - ieee_table.v[i + 150] = e * mmin; - } - ieee_table.e[150] = 1; - ieee_table.v[150] = mmin; - e = 1; - for (i = 1; i < 150; i++) { - e /= 2; - ieee_table.e[150 - i] = e; - ieee_table.v[150 - i] = e * mmin; - } - ieee_table.vmin = ieee_table.v[1]; - ieee_table.vmax = ieee_table.e[254] * mmax; - ieee_table.inited = 1; - /*for (i=0;i<128;i++) printf("++++ ieee_table.v[%d]=%g\n",i,ieee_table.v[i]);*/ - } -} - - - - -/** - * decode a value consisting of nbits from an octet-bitstream to long-representation - * - * @param p input bitstream, for technical reasons put into octets - * @param bitp current start position in the bitstream - * @param nbits number of bits needed to build a number (e.g. 8=byte, 16=short, 32=int, but also other sizes allowed) - * @return value encoded as 32/64bit numbers - */ -unsigned long grib_decode_unsigned_long(const unsigned char* p, long* bitp, long nbits) -{ - unsigned long ret = 0; - long oc = *bitp / 8; - unsigned long mask = 0; - long pi = 0; - int usefulBitsInByte = 0; - long bitsToRead = 0; - - if (nbits == 0) - return 0; - - if (nbits > maxNBits) { - int bits = nbits; - int mod = bits % maxNBits; - - if (mod != 0) { - int e = grib_decode_unsigned_long(p, bitp, mod); - assert(e == 0); - bits -= mod; - } - - while (bits > maxNBits) { - int e = grib_decode_unsigned_long(p, bitp, maxNBits); - assert(e == 0); - bits -= maxNBits; - } - - return grib_decode_unsigned_long(p, bitp, bits); - } - mask = LIB_ECCODES_BIT_MASK(nbits); - /* pi: position of bitp in p[]. >>3 == /8 */ - pi = oc; - /* number of useful bits in current byte */ - usefulBitsInByte = 8 - (*bitp & 7); - /* read at least enough bits (byte by byte) from input */ - bitsToRead = nbits; - while (bitsToRead > 0) { - ret <<= 8; - /* ret += p[pi]; */ - DebugAssert((ret & p[pi]) == 0); - ret = ret | p[pi]; - pi++; - bitsToRead -= usefulBitsInByte; - usefulBitsInByte = 8; - } - *bitp += nbits; - /* printf("%d %d %d\n", pi, ret, offset); */ - /* bitsToRead might now be negative (too many bits read) */ - /* remove those which are too much */ - ret >>= -1 * bitsToRead; - /* remove leading bits (from previous value) */ - ret &= mask; - /* printf("%d %d\n", ret2, ret);*/ - return ret; -} - -int grib_encode_unsigned_long(unsigned char* p, unsigned long val, long* bitp, long nbits) -{ - long len = nbits; - int s = *bitp % 8; - int n = 8 - s; - unsigned char tmp = 0; /*for temporary results*/ - - if (nbits > maxNBits) { - /* TODO: Do some real code here, to support long longs */ - int bits = nbits; - int mod = bits % maxNBits; - long zero = 0; - - if (mod != 0) { - int e = grib_encode_unsigned_long(p, zero, bitp, mod); - /* printf(" -> : encoding %ld bits=%ld %ld\n",zero,(long)mod,*bitp); */ - assert(e == 0); - bits -= mod; - } - - while (bits > maxNBits) { - int e = grib_encode_unsigned_long(p, zero, bitp, maxNBits); - /* printf(" -> : encoding %ld bits=%ld %ld\n",zero,(long)maxNBits,*bitp); */ - assert(e == 0); - bits -= maxNBits; - } - - /* printf(" -> : encoding %ld bits=%ld %ld\n",val,(long)bits,*bitp); */ - return grib_encode_unsigned_long(p, val, bitp, bits); - } - - p += (*bitp >> 3); /* skip the bytes */ - - /* head */ - if (s) { - len -= n; - if (len < 0) { - tmp = ((val << -len) | ((*p) & dmasks[n])); - } - else { - tmp = ((val >> len) | ((*p) & dmasks[n])); - } - *p++ = tmp; - } - - /* write the middle words */ - while (len >= 8) { - len -= 8; - *p++ = (val >> len); - } - - /* write the end bits */ - if (len) - *p = (val << (8 - len)); - - *bitp += nbits; - return LIB_ECCODES_SUCCESS; -} - -void init_table_if_needed() -{ - //LIB_ECCODES_MUTEX_INIT_ONCE(&once, &init) - //LIB_ECCODES_MUTEX_LOCK(&mutex) - - if (!ieee_table.inited) - init_ieee_table(); - - //LIB_ECCODES_MUTEX_UNLOCK(&mutex) -} - -int grib_nearest_smaller_ieee_float(double a, double* ret) -{ - unsigned long l = 0; - - init_table_if_needed(); - - if (a > ieee_table.vmax) { - std::cout << "Number is too large: x=" << a << " > xmax=" << ieee_table.vmax << " (IEEE float)"; - return LIB_ECCODES_INTERNAL_ERROR; - } - - l = grib_ieee_nearest_smaller_to_long(a); - *ret = grib_long_to_ieee(l); - return LIB_ECCODES_SUCCESS; -} - -int nearest_smaller_value(double val, double* nearest) -{ - return grib_nearest_smaller_ieee_float(val, nearest); -} - -int grib_nearest_smaller_value(double val, double* nearest) -{ - return nearest_smaller_value(val, nearest); -} - -int grib_get_nearest_smaller_value(double val, double* nearest) -{ - return grib_nearest_smaller_value(val, nearest); -} - - -int grib_check_data_values_range(const double min_val, const double max_val) -{ - int result = LIB_ECCODES_SUCCESS; - - if (!(min_val < dbl_max && min_val > -dbl_max)) { - std::cout << "Minimum value out of range: " << min_val << std::endl; - return LIB_ECCODES_ENCODING_ERROR; - } - if (!(max_val < dbl_max && max_val > -dbl_max)) { - std::cout << "Maximum value out of range: " << max_val << std::endl; - return LIB_ECCODES_ENCODING_ERROR; - } - - //// Data Quality checks - //if (ctx->grib_data_quality_checks) { - // result = grib_util_grib_data_quality_check(h, min_val, max_val); - //} - - return result; -} - -unsigned long grib_ieee_to_long(double x) -{ - unsigned long s = 0; - unsigned long mmax = 0xffffff; - unsigned long mmin = 0x800000; - unsigned long m = 0; - unsigned long e = 0; - double rmmax = mmax + 0.5; - - init_table_if_needed(); - - /* printf("\ngrib_ieee_to_long: x=%.20e\n",x); */ - if (x < 0) { - s = 1; - x = -x; - } - - /* Underflow */ - if (x < ieee_table.vmin) { - /*printf("grib_ieee_to_long: (x < ieee_table.vmin) x=%.20e vmin=%.20e v=0x%lX\n",x,ieee_table.vmin,(s<<31));*/ - return (s << 31); - } - - /* Overflow */ - if (x > ieee_table.vmax) { - fprintf(stderr, "grib_ieee_to_long: Number is too large: x=%.20e > xmax=%.20e\n", x, ieee_table.vmax); - assert(0); - return 0; - } - - binary_search(ieee_table.v, 254, x, &e); - - /* printf("grib_ieee_to_long: e=%ld\n",e); */ - - x /= ieee_table.e[e]; - - /* printf("grib_ieee_to_long: x=%.20e\n",x); */ - - while (x < mmin) { - x *= 2; - e--; - /* printf("grib_ieee_to_long (e--): x=%.20e e=%ld \n",x,e); */ - } - - while (x > rmmax) { - x /= 2; - e++; - /* printf("grib_ieee_to_long (e++): x=%.20e e=%ld \n",x,e); */ - } - - m = x + 0.5; - /* printf("grib_ieee_to_long: m=0x%lX (%lu) x=%.10e \n",m,m,x ); */ - if (m > mmax) { - e++; - m = 0x800000; - /* printf("grib_ieee_to_long: ( m > mmax ) m=0x%lX (%lu) x=%.10e \n",m,m,x ); */ - } - - /* printf("grib_ieee_to_long: s=%lu c=%lu (0x%lX) m=%lu (0x%lX)\n",s,e,e,m,m ); */ - - return (s << 31) | (e << 23) | (m & 0x7fffff); -} - - -double grib_long_to_ieee(unsigned long x) -{ - unsigned long s = x & 0x80000000; - unsigned long c = (x & 0x7f800000) >> 23; - unsigned long m = (x & 0x007fffff); - - double val; - -#ifdef DEBUG - if (x > 0 && x < 0x800000) { - fprintf(stderr, "grib_long_to_ieee: Invalid input %lu\n", x); - Assert(0); - } -#endif - init_table_if_needed(); - - if (c == 0 && m == 0) - return 0; - - if (c == 0) { - m |= 0x800000; - c = 1; - } - else - m |= 0x800000; - - val = m * ieee_table.e[c]; - if (s) - val = -val; - - return val; -} - - -unsigned long grib_ieee_nearest_smaller_to_long(double x) -{ - unsigned long l; - unsigned long e; - unsigned long m; - unsigned long s; - unsigned long mmin = 0x800000; - double y, eps; - - if (x == 0) - return 0; - - init_table_if_needed(); - - l = grib_ieee_to_long(x); - y = grib_long_to_ieee(l); - - if (x < y) { - if (x < 0 && -x < ieee_table.vmin) { - l = 0x80800000; - } - else { - e = (l & 0x7f800000) >> 23; - m = (l & 0x007fffff) | 0x800000; - s = l & 0x80000000; - - if (m == mmin) { - /* printf("grib_ieee_nearest_smaller_to_long: m == mmin (0x%lX) e=%lu\n",m,e); */ - e = s ? e : e - 1; - if (e < 1) - e = 1; - if (e > 254) - e = 254; - /* printf("grib_ieee_nearest_smaller_to_long: e=%lu \n",e); */ - } - - eps = ieee_table.e[e]; - - /* printf("grib_ieee_nearest_smaller_to_long: x= grib_long_to_ieee(l)); - } - - return l; -} - - -//// Return true(1) if large constant fields are to be created, otherwise false(0) -//int grib_producing_large_constant_fields(grib_handle* h, int edition) -//{ -// // First check if the transient key is set -// grib_context* c = h->context; -// long produceLargeConstantFields = 0; -// if (grib_get_long(h, "produceLargeConstantFields", &produceLargeConstantFields) == LIB_ECCODES_SUCCESS && -// produceLargeConstantFields != 0) { -// return 1; -// } - -// if (c->gribex_mode_on == 1 && edition == 1) { -// return 1; -// } - -// // Finally check the environment variable via the context -// return c->large_constant_fields; -//} - - diff --git a/src/gribjump/compression/compressors/algorithms/LibEccodes.h b/src/gribjump/compression/compressors/algorithms/LibEccodes.h deleted file mode 100644 index d58698c..0000000 --- a/src/gribjump/compression/compressors/algorithms/LibEccodes.h +++ /dev/null @@ -1,174 +0,0 @@ -/* - * (C) Copyright 2023- ECMWF. - * - * This software is licensed under the terms of the Apache Licence Version 2.0 - * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. - * In applying this licence, ECMWF does not waive the privileges and immunities - * granted to it by virtue of its status as an intergovernmental organisation nor - * does it submit to any jurisdiction. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -#define LIB_ECCODES_SUCCESS 0 -#define LIB_ECCODES_NO_VALUES 1 -#define LIB_ECCODES_INVALID_BPV 2 -#define LIB_ECCODES_INTERNAL_ERROR 3 -#define LIB_ECCODES_LOG_DEBUG 4 -#define LIB_ECCODES_LOG_ERROR 5 -#define LIB_ECCODES_ENCODING_ERROR 6 -#define LIB_ECCODES_OUT_OF_RANGE 7 -#define LIB_ECCODES_UNDERFLOW 8 -#define LIB_ECCODES_ARRAY_TOO_SMALL 9 - -constexpr size_t maxNBits = sizeof(unsigned long) * 8; -#define LIB_ECCODES_BIT_MASK(x) (((x) == maxNBits) ? (unsigned long)-1UL : (1UL << (x)) - 1) - -typedef std::numeric_limits dbl; -typedef std::numeric_limits flt; -typedef struct ieee_table_t ieee_table_t; -//static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; - -struct ieee_table_t -{ - int inited; - double e[255]; - double v[255]; - double vmin; - double vmax; -}; - - -int grib_get_nearest_smaller_value(double val, double* nearest); -void grib_set_bit_off(unsigned char* p, long* bitp); -void grib_set_bit_on(unsigned char* p, long* bitp); -int grib_encode_unsigned_longb(unsigned char* p, unsigned long val, long* bitp, long nb); -int grib_encode_double_array(size_t n_vals, const double* val, long bits_per_value, double reference_value, double d, double divisor, unsigned char* p, long* off); -long grib_get_binary_scale_fact(double max, double min, long bpval, int* ret); -void factec(int* krep, const double pa, const int knbit, const long kdec, const int range, long* ke, int* knutil); -double epsilon(); -int vrange(); -int grib_optimize_decimal_factor( - const double pmax, const double pmin, const int knbit, - const int compat_gribex, const int compat_32bit, - long* kdec, long* kbin, double* ref); - -int number_of_bits(unsigned long x, long* result); -void binary_search(const double xx[], const unsigned long n, double x, unsigned long* j); - - - -void init_ieee_table(); -unsigned long grib_decode_unsigned_long(const unsigned char* p, long* bitp, long nbits); -int grib_encode_unsigned_long(unsigned char* p, unsigned long val, long* bitp, long nbits); -void init_table_if_needed(); -int grib_nearest_smaller_ieee_float(double a, double* ret); -int nearest_smaller_value(double val, double* nearest); -int grib_nearest_smaller_value(double val, double* nearest); -int grib_check_data_values_range(const double min_val, const double max_val); -unsigned long grib_ieee_to_long(double x); -double grib_long_to_ieee(unsigned long x); -unsigned long grib_ieee_nearest_smaller_to_long(double x); - - -template -int grib_decode_array(const unsigned char* p, long* bitp, long bitsPerValue, - double reference_value, double s, double d, - size_t n_vals, T* val) -{ - size_t i = 0; - unsigned long lvalue = 0; - T x; - - if (bitsPerValue % 8 == 0) { - /* See ECC-386 */ - int bc; - int l = bitsPerValue / 8; - size_t o = 0; - - for (i = 0; i < n_vals; i++) { - lvalue = 0; - lvalue <<= 8; - lvalue |= p[o++]; - - for (bc = 1; bc < l; bc++) { - lvalue <<= 8; - lvalue |= p[o++]; - } - x = ((lvalue * s) + reference_value) * d; - val[i] = x; - /* *bitp += bitsPerValue * n_vals; */ - } - } - else { - unsigned long mask = LIB_ECCODES_BIT_MASK(bitsPerValue); - - /* pi: position of bitp in p[]. >>3 == /8 */ - long pi = *bitp / 8; - /* some bits might of the current byte at pi might be used */ - /* by the previous number usefulBitsInByte gives remaining unused bits */ - /* number of useful bits in current byte */ - int usefulBitsInByte = 8 - (*bitp & 7); - for (i = 0; i < n_vals; i++) { - /* value read as long */ - long bitsToRead = 0; - lvalue = 0; - bitsToRead = bitsPerValue; - /* read one byte after the other to lvalue until >= bitsPerValue are read */ - while (bitsToRead > 0) { - lvalue <<= 8; - lvalue += p[pi]; - pi++; - bitsToRead -= usefulBitsInByte; - usefulBitsInByte = 8; - } - *bitp += bitsPerValue; - /* bitsToRead is now <= 0, remove the last bits */ - lvalue >>= -1 * bitsToRead; - /* set leading bits to 0 - removing bits used for previous number */ - lvalue &= mask; - - usefulBitsInByte = -1 * bitsToRead; /* prepare for next round */ - if (usefulBitsInByte > 0) { - pi--; /* reread the current byte */ - } - else { - usefulBitsInByte = 8; /* start with next full byte */ - } - /* scaling and move value to output */ - x = ((lvalue * s) + reference_value) * d; - val[i] = x; - } - } - return 0; -} - - -/* Return n to the power of s */ -template -T grib_power(long s, long n) -{ - T divisor = 1.0; - if (s == 0) - return 1.0; - if (s == 1) - return n; - while (s < 0) { - divisor /= n; - s++; - } - while (s > 0) { - divisor *= n; - s--; - } - return divisor; -} - diff --git a/src/gribjump/compression/compressors/algorithms/SimplePacking.h b/src/gribjump/compression/compressors/algorithms/SimplePacking.h deleted file mode 100644 index 096ea60..0000000 --- a/src/gribjump/compression/compressors/algorithms/SimplePacking.h +++ /dev/null @@ -1,499 +0,0 @@ -/* - * (C) Copyright 2023- ECMWF. - * - * This software is licensed under the terms of the Apache Licence Version 2.0 - * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. - * In applying this licence, ECMWF does not waive the privileges and immunities - * granted to it by virtue of its status as an intergovernmental organisation nor - * does it submit to any jurisdiction. - */ - -#pragma once - -#include -#include -#include -#include -#include "LibEccodes.h" - -#include "eckit/exception/Exceptions.h" - - -enum class FloatType { - IEEE32, - IBM32, - UNKNOWN -}; - - - -template -struct DecodeParameters { - using ValueType = T; - - ValueType reference_value; - ValueType binary_scale_factor; - ValueType decimal_scale_factor; - long bits_per_value; - size_t n_vals; - bool is_constant_field; - - DecodeParameters() : reference_value(0), binary_scale_factor(0), decimal_scale_factor(0), bits_per_value(0) {} - void print() { - std::cout << "reference_value: " << reference_value << std::endl; - std::cout << "binary_scale_factor: " << binary_scale_factor << std::endl; - std::cout << "decimal_scale_factor: " << decimal_scale_factor << std::endl; - std::cout << "bits_per_value: " << bits_per_value << std::endl; - std::cout << "n_vals: " << n_vals << std::endl; - std::cout << "is_constant_field: " << is_constant_field << std::endl; - } -}; - - - -template -class SimplePacking { -public: - using ValueType = T; - using Buffer = std::vector; - using Values = std::vector; - - SimplePacking() = default; - ~SimplePacking() = default; - - std::pair, Buffer> pack( - struct DecodeParameters params, - const Values& values, - double units_factor = 1.0, - double units_bias = 0.0); - - Values unpack( - const DecodeParameters& params, - const Buffer& buffer, - double units_factor = 1.0, - double units_bias = 0.0, - long offsetBeforeData = 0, - long offsetAfterData = 0); - - // Setter - void decimal_scale_factor(size_t decimal_scale_factor_get) { decimal_scale_factor_get_ = decimal_scale_factor_get; } - void changing_precision(size_t changing_precision) { changing_precision_ = changing_precision; } - void optimize_scaling_factor(bool optimize_scaling_factor) { optimize_scaling_factor_ = optimize_scaling_factor; } - void large_constant_fields(long large_constant_fields) { large_constant_fields_ = large_constant_fields; } - void dirty(bool dirty) { dirty_ = dirty; } - void compat_gribex_set(bool compat_gribex) { compat_gribex_ = compat_gribex; } - - // Getter - size_t decimal_scale_factor() const { return decimal_scale_factor_get_; } - size_t changing_precision() const { return changing_precision_; } - bool optimize_scaling_factor() const { return optimize_scaling_factor_; } - long large_constant_fields() const { return large_constant_fields_; } - bool dirty() const { return dirty_; } - bool compat_gribex() const { return compat_gribex_; } - -private: - bool optimize_scaling_factor_ = false; - size_t decimal_scale_factor_get_ = 0; - size_t changing_precision_ = 0; - long large_constant_fields_ = 0; - bool dirty_ = false; - bool compat_gribex_ = false; - FloatType float_type_ = FloatType::IEEE32; -}; - - - -template -std::pair, typename SimplePacking::Buffer> SimplePacking::pack( - struct DecodeParameters params, - const Values& values, - double units_factor, - double units_bias -) -{ - - size_t i = 0; - int err = 0; - //long decimal_scale_factor_get = 0; - //long optimize_scaling_factor = 0; - ValueType decimal = 1; - ValueType unscaled_max = 0; - ValueType unscaled_min = 0; - ValueType f = 0; - ValueType range = 0; - ValueType minrange = 0, maxrange = 0; - //long changing_precision = 0; - - - size_t n_vals = values.size(); - - if (n_vals == 0) - throw eckit::Exception("No grib values", Here()); - - - /* check we don't encode bpv > max(ulong)-1 as it is not currently supported by the algorithm */ - if (params.bits_per_value > (sizeof(long) * 8 - 1)){ - std::stringstream ss; - ss << "Invalid bits_per_value: " << params.bits_per_value; - throw eckit::BadValue(ss.str(), Here()); - } - - dirty_ = 1; - - const ValueType* val = values.data(); - auto [min_ptr, max_ptr] = std::minmax_element(values.begin(), values.end()); - auto min = *min_ptr; - auto max = *max_ptr; - - if ((err = grib_check_data_values_range(min, max)) != LIB_ECCODES_SUCCESS) { - throw eckit::BadValue("Invalid data values range", Here()); - } - - if (max == min) { - if (grib_get_nearest_smaller_value(val[0], ¶ms.reference_value) != LIB_ECCODES_SUCCESS) { - throw eckit::Exception("unable to find nearest_smaller_value", Here()); - } - - if (large_constant_fields_) { - params.binary_scale_factor = 0; - params.decimal_scale_factor = 0; - if (params.bits_per_value == 0) { - params.bits_per_value = 16; - } - - params.is_constant_field = true; - params.reference_value = min; - return std::make_pair(params, Buffer()); - } - else { - params.bits_per_value = 0; - params.is_constant_field = true; - params.reference_value = min; - - return std::make_pair(params, Buffer()); - } - } - - /* the packing parameters are not properly defined, this is a safe way of fixing the problem */ - if (changing_precision_ == 0 && params.bits_per_value == 0 && decimal_scale_factor_get_ == 0) { - params.bits_per_value = 24; - } - - long binary_scale_factor = 0; - long decimal_scale_factor = 0; - ValueType reference_value = 0; - - if (params.bits_per_value == 0 || (binary_scale_factor == 0 && decimal_scale_factor_get_ != 0)) { - /* decimal_scale_factor is given, binary_scale_factor=0 and params.bits_per_value is computed */ - binary_scale_factor = 0; - decimal_scale_factor = decimal_scale_factor_get_; - decimal = grib_power(decimal_scale_factor, 10); - min *= decimal; - max *= decimal; - - /* params.bits_per_value=(long)ceil(log((double)(imax-imin+1))/log(2.0)); */ - /* See GRIB-540 for why we use ceil */ - if (number_of_bits((unsigned long)ceil(fabs(max - min)), ¶ms.bits_per_value) != LIB_ECCODES_SUCCESS) { - throw eckit::Exception("Range of values too large. Try a smaller value for decimal precision", Here()); - } - - if (grib_get_nearest_smaller_value(min, &reference_value) != LIB_ECCODES_SUCCESS){ - throw eckit::Exception("unable to find nearest_smaller_value", Here()); - } - /* divisor=1; */ - } - else { - int last = 127; /* 'last' should be a parameter coming from a definitions file */ - //if (c->gribex_mode_on && self->edition == 1) - if (compat_gribex_) - last = 99; - /* params.bits_per_value is given and decimal_scale_factor and binary_scale_factor are calcualated */ - if (max == min) { - binary_scale_factor = 0; - /* divisor=1; */ - if (grib_get_nearest_smaller_value(min, &reference_value) != LIB_ECCODES_SUCCESS) { - throw eckit::Exception("unable to find nearest_smaller_value", Here()); - } - } - else if (optimize_scaling_factor_) { - if ((err = grib_optimize_decimal_factor( - max, min, params.bits_per_value, - compat_gribex_, 1, - &decimal_scale_factor, &binary_scale_factor, &reference_value)) != LIB_ECCODES_SUCCESS) - throw eckit::Exception("grib_optimize_decimal_factor failed", Here()); - } - else { - /* printf("max=%g reference_value=%g grib_power(-last,2)=%g decimal_scale_factor=%ld params.bits_per_value=%ld\n", - max,reference_value,grib_power(-last,2),decimal_scale_factor,params.bits_per_value);*/ - range = (max - min); - unscaled_min = min; - unscaled_max = max; - f = (grib_power(params.bits_per_value, 2) - 1); - minrange = grib_power(-last, 2) * f; - maxrange = grib_power(last, 2) * f; - - while (range < minrange) { - decimal_scale_factor += 1; - decimal *= 10; - min = unscaled_min * decimal; - max = unscaled_max * decimal; - range = (max - min); - } - while (range > maxrange) { - decimal_scale_factor -= 1; - decimal /= 10; - min = unscaled_min * decimal; - max = unscaled_max * decimal; - range = (max - min); - } - - if (grib_get_nearest_smaller_value(min, &reference_value) != LIB_ECCODES_SUCCESS) { - std::stringstream ss; - ss << "unable to find nearest_smaller_value of " << min << " for " << reference_value; - throw eckit::Exception(ss.str(), Here()); - } - - binary_scale_factor = grib_get_binary_scale_fact(max, reference_value, params.bits_per_value, &err); - if (err) { - throw eckit::Exception("unable to compute binary_scale_factor", Here()); - } - } - } - - changing_precision_ = 0; - - //return LIB_ECCODES_SUCCESS; - - -//static int pack_double(grib_accessor* a, const double* cval, size_t* len) - - //double decimal = 1; - size_t buflen = 0; - //unsigned char* buf = NULL; - unsigned char* encoded = NULL; - double divisor = 1; - long off = 0; - int ret = 0; - //double* val = (double*)cval; - //int i; - - //if (*len == 0) { - if (n_vals == 0) { - //grib_buffer_replace(a, NULL, 0, 1, 1); - return {params, Buffer(0)}; - //return LIB_ECCODES_SUCCESS; - } - - //if (ret == LIB_ECCODES_SUCCESS) - // ret = grib_set_long_internal(grib_handle_of_accessor(a), self->number_of_values, *len); - - //if (ret != LIB_ECCODES_SUCCESS) - // return ret; - - //if (self->units_factor && - // (grib_get_double_internal(grib_handle_of_accessor(a), self->units_factor, &units_factor) == LIB_ECCODES_SUCCESS)) { - // grib_set_double_internal(grib_handle_of_accessor(a), self->units_factor, 1.0); - //} - - //if (self->units_bias && - // (grib_get_double_internal(grib_handle_of_accessor(a), self->units_bias, &units_bias) == LIB_ECCODES_SUCCESS)) { - // grib_set_double_internal(grib_handle_of_accessor(a), self->units_bias, 0.0); - //} - - //if (units_factor != 1.0) { - // if (units_bias != 0.0) - // for (i = 0; i < n_vals; i++) - // val[i] = val[i] * units_factor + units_bias; - // else - // for (i = 0; i < n_vals; i++) - // val[i] *= units_factor; - //} - //else if (units_bias != 0.0) - // for (i = 0; i < n_vals; i++) - // val[i] += units_bias; - - /* IEEE packing */ - //if (c->ieee_packing) { - // grib_handle* h = grib_handle_of_accessor(a); - // long precision = 0; [> Either 1(=32 bits) or 2(=64 bits) <] - // size_t lenstr = 10; - // if ((ret = codes_check_grib_ieee_packing_value(c->ieee_packing)) != LIB_ECCODES_SUCCESS) - // return ret; - // precision = c->ieee_packing == 32 ? 1 : 2; - // if ((ret = grib_set_string(h, "packingType", "grid_ieee", &lenstr)) != LIB_ECCODES_SUCCESS) - // return ret; - // if ((ret = grib_set_long(h, "precision", precision)) != LIB_ECCODES_SUCCESS) - // return ret; - - // return grib_set_double_array(h, "values", val, *len); - //} - - //if (super != grib_accessor_class_data_g2simple_packing) { - // [> Normal case: parent not same as me! <] - // ret = super->pack_double(a, val, len); - //} - //else { - // [> GRIB-364: simple packing with logarithm pre-processing <] - // grib_accessor_class* super2 = NULL; - // Assert(super->super); - // super2 = *(super->super); - // ret = super2->pack_double(a, val, len); - //} - //switch (ret) { - // case LIB_ECCODES_CONSTANT_FIELD: - // grib_buffer_replace(a, NULL, 0, 1, 1); - // return LIB_ECCODES_SUCCESS; - // case LIB_ECCODES_SUCCESS: - // break; - // default: - // grib_context_log(a->context, LIB_ECCODES_LOG_ERROR, "GRIB2 simple packing: unable to set values (%s)", grib_get_error_message(ret)); - // return ret; - //} - - - decimal = grib_power(decimal_scale_factor, 10); - divisor = grib_power(-binary_scale_factor, 2); - - buflen = (((params.bits_per_value * n_vals) + 7) / 8) * sizeof(unsigned char); - unsigned char *buf = (unsigned char*)malloc(buflen); - encoded = buf; - - grib_encode_double_array(n_vals, val, params.bits_per_value, reference_value, decimal, divisor, encoded, &off); - - Buffer buffer = Buffer(encoded, encoded + buflen); - - DecodeParameters decode_params; - decode_params.reference_value = reference_value; - decode_params.binary_scale_factor = binary_scale_factor; - decode_params.decimal_scale_factor = decimal_scale_factor; - decode_params.bits_per_value = params.bits_per_value; - decode_params.n_vals = n_vals; - - //grib_context_log(a->context, LIB_ECCODES_LOG_DEBUG, - //"grib_accessor_data_g2simple_packing : pack_double : packing %s, %d values", a->name, n_vals); - - //grib_buffer_replace(a, buf, buflen, 1, 1); - - //grib_context_buffer_free(a->context, buf); - - //return decode_params; -//} - - - return {decode_params, buffer}; -} - - -template -typename SimplePacking::Values SimplePacking::unpack( - const DecodeParameters& params, - const Buffer& buf, - double units_factor, - double units_bias, - long offsetBeforeData, - long offsetAfterData -) -{ - - ////unsigned char* buf, size_t buf_len, T* val, size_t val_len, - ////size_t n_vals, - ////const DecodeParameters& decode_params) - //static_assert(std::is_floating_point::value, "Requires floating point numbers"); - - - //double *val = (unsigned char*)buf.data(); - Values values(params.n_vals); - - double* val = values.data(); - size_t val_len = values.size() * sizeof(ValueType); - size_t n_vals = params.n_vals; - - //unsigned char* buf = (unsigned char*)grib_handle_of_accessor(a)->buffer->data; - - size_t i = 0; - int err = 0; - long pos = 0; - //long count = 0; - - double s = 0; - double d = 0; - size_t count = 0; - - //err = grib_value_count(a, &count); - //if (err) - // return err; - //n_vals = count; - size_t len = val_len / sizeof(ValueType); - - if (len < n_vals) { - len = (long)n_vals; - //return LIB_ECCODES_ARRAY_TOO_SMALL; - throw eckit::Exception("Array too small", Here()); - } - - //if ((err = grib_get_long_internal(gh, self->params.bits_per_value, ¶ms.bits_per_value)) != LIB_ECCODES_SUCCESS) - // return err; - - /* - * check we don't decode bpv > max(ulong) as it is - * not currently supported by the algorithm - */ - if (params.bits_per_value > (sizeof(long) * 8)) { - throw eckit::BadValue("Invalid BPV", Here()); - } - - if (n_vals == 0) { - len = 0; - return Values{}; - } - - dirty_ = 0; - - /* Special case */ - - if (params.bits_per_value == 0) { - for (i = 0; i < n_vals; i++) - val[i] = params.reference_value; - //len = n_vals; - //return LIB_ECCODES_SUCCESS; - return Values(n_vals, params.reference_value); - } - - s = grib_power(params.binary_scale_factor, 2); - d = grib_power(-params.decimal_scale_factor, 10); - - //offsetBeforeData = grib_byte_offset(a); - //buf += offsetBeforeData; - - /*Assert(((params.bits_per_value*n_vals)/8) < (1<<29));*/ /* See GRIB-787 */ - - /* ECC-941 */ - if (float_type_ == FloatType::IEEE32) { - /* Must turn off this check when the environment variable ECCODES_LIB_ECCODES_IEEE_PACKING is on */ - if (offsetAfterData > offsetBeforeData) { - const long valuesSize = (params.bits_per_value * n_vals) / 8; /*in bytes*/ - if (offsetBeforeData + valuesSize > offsetAfterData) { - throw eckit::Exception("Values section size mismatch", Here()); - } - } - } - - grib_decode_array(buf.data(), &pos, params.bits_per_value, params.reference_value, s, d, n_vals, val); - - //len = (long)n_vals; - - if (units_factor != 1.0) { - if (units_bias != 0.0) - for (i = 0; i < n_vals; i++) - val[i] = val[i] * units_factor + units_bias; - else - for (i = 0; i < n_vals; i++) - val[i] *= units_factor; - } - else if (units_bias != 0.0) - for (i = 0; i < n_vals; i++) - val[i] += units_bias; - - return values; -} - diff --git a/src/gribjump/info/CcsdsInfo.cc b/src/gribjump/info/CcsdsInfo.cc index da084d0..b4be994 100644 --- a/src/gribjump/info/CcsdsInfo.cc +++ b/src/gribjump/info/CcsdsInfo.cc @@ -32,7 +32,7 @@ static GribAccessor ccsdsRsi("ccsdsRsi", true); //---------------------------------------------------------------------------------------------------------------------- CcsdsInfo::CcsdsInfo(eckit::DataHandle& handle, const metkit::grib::GribHandle& h, const eckit::Offset startOffset) : JumpInfo(h, startOffset) { - + ccsdsFlags_ = grib::ccsdsFlags(h); ccsdsBlockSize_ = grib::ccsdsBlockSize(h); ccsdsRsi_ = grib::ccsdsRsi(h); @@ -53,17 +53,16 @@ CcsdsInfo::CcsdsInfo(eckit::DataHandle& handle, const metkit::grib::GribHandle& mc::CcsdsDecompressor ccsds{}; ccsds - .flags(ccsdsFlags_) - .bits_per_sample(bitsPerValue_) - .block_size(ccsdsBlockSize_) - .rsi(ccsdsRsi_) - .reference_value(referenceValue_) - .binary_scale_factor(binaryScaleFactor_) - .decimal_scale_factor(decimalScaleFactor_); + .flags(ccsdsFlags_) + .bits_per_sample(bitsPerValue_) + .block_size(ccsdsBlockSize_) + .rsi(ccsdsRsi_) + .reference_value(referenceValue_) + .binary_scale_factor(binaryScaleFactor_) + .decimal_scale_factor(decimalScaleFactor_); ccsds.n_elems(numberOfValues_); - ccsds.decode(buffer); - - ccsdsOffsets_ = ccsds.offsets().value(); + + ccsdsOffsets_ = ccsds.decode_offsets(buffer); } CcsdsInfo::CcsdsInfo(const eckit::message::Message& msg) : JumpInfo(msg) { diff --git a/src/gribjump/info/JumpInfo.cc b/src/gribjump/info/JumpInfo.cc index 3f3949f..e2f779f 100644 --- a/src/gribjump/info/JumpInfo.cc +++ b/src/gribjump/info/JumpInfo.cc @@ -23,10 +23,6 @@ #include "gribjump/info/SimpleInfo.h" #include "gribjump/GribJumpException.h" -extern "C" { - double grib_power(long s, long n); // Todo: Do we really need this? -} - namespace gribjump { // -------------------------------------------------------------------------------------------- @@ -88,9 +84,6 @@ JumpInfo::JumpInfo(const metkit::grib::GribHandle& h, const eckit::Offset startO } -// XXX Need to do something about message start offset ... -// Stop using it. - JumpInfo::JumpInfo(const eckit::message::Message& msg): version_(currentVersion_) { @@ -139,7 +132,6 @@ JumpInfo::JumpInfo(const eckit::message::Message& msg): } } - JumpInfo::JumpInfo(eckit::Stream& s) : Streamable(s) { s >> version_; s >> referenceValue_; diff --git a/tests/test_misc_units.cc b/tests/test_misc_units.cc index 79bb582..9beb5b6 100644 --- a/tests/test_misc_units.cc +++ b/tests/test_misc_units.cc @@ -60,7 +60,7 @@ CASE( "test_lru" ){ //----------------------------------------------------------------------------- CASE( "test buckets" ){ - using namespace mc; + using namespace gribjump::mc; BlockBuckets buckets; for (size_t i = 100; i < 1000; i += 100) { mc::Block r{i, 10};