Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ set(LIBRARY_NAME
file(GLOB kProcessorLibSrc
${CMAKE_CURRENT_SOURCE_DIR}/src/algorithms.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/extend_algorithms.cpp

${CMAKE_CURRENT_SOURCE_DIR}/src/kDataFrames/defaultColumn.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/kDataFrames/kDataFrameBlight.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/kDataFrames/kDataFrameBMQF.cpp
Expand All @@ -118,8 +119,8 @@ file(GLOB kProcessorLibSrc
${CMAKE_CURRENT_SOURCE_DIR}/src/kDataFrames/kDataFramePHMAP.cpp

${CMAKE_CURRENT_SOURCE_DIR}/src/Utils/utils.cpp

${CMAKE_CURRENT_SOURCE_DIR}/ThirdParty/KMC/kmc_api/*cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/restriction_tags.cpp

)

Expand Down
7 changes: 6 additions & 1 deletion include/kProcessor/kDataFrame.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
#include "defaultColumn.hpp"
#include <cstdint>

#include "restriction_tags.hpp"

using phmap::flat_hash_map;
using namespace std;

Expand Down Expand Up @@ -295,7 +297,7 @@ class kDataFrame{
uint32_t lastCheckpoint;
Column* defaultColumn;
virtual void preprocessKmerOrder();

tag * kDataFrame_tags;
kDataFrameIterator* endIterator;
public:
bool isKmersOrderComputed;
Expand Down Expand Up @@ -553,6 +555,7 @@ class kDataFrameMQF: public kDataFrame{
kDataFrameMQF();
explicit kDataFrameMQF(std::uint64_t kSize);
kDataFrameMQF(std::uint64_t kSize, hashingModes hash_mode);
kDataFrameMQF(QF *mqf, readingModes RM, hashingModes HM, map<string, int> params);
kDataFrameMQF(std::uint64_t ksize,uint8_t q,uint8_t fixedCounterSize,uint8_t tagSize
,double falsePositiveRate);

Expand Down Expand Up @@ -660,6 +663,7 @@ class kDataFrameBMQF: public kDataFrame{
kDataFrameBMQF(std::uint64_t kSize,uint64_t nKmers,string path);
kDataFrameBMQF(std::uint64_t ksize,uint8_t q,uint8_t fixedCounterSize,uint8_t tagSize,double falsePositiveRate,string path);
kDataFrameBMQF(bufferedMQF* bufferedmqf,std::uint64_t ksize,double falsePositiveRate);
kDataFrameBMQF(bufferedMQF* bufferedmqf, readingModes RM, hashingModes HM, map<string, int> params);
//count histogram is array where count of kmers repeated n times is found at index n. index 0 holds number of distinct kmers.
kDataFrameBMQF(std::uint64_t ksize,vector<std::uint64_t> countHistogram,uint8_t tagSize
,double falsePositiveRate);
Expand Down Expand Up @@ -736,6 +740,7 @@ class kDataFrameMAP : public kDataFrame
kDataFrameMAP(std::uint64_t ksize);
kDataFrameMAP(std::uint64_t kSize,vector<std::uint64_t> kmersHistogram);
kDataFrameMAP(std::uint64_t kSize,uint64_t nKmers);
kDataFrameMAP(readingModes RM, hashingModes HM, map<string, int> params);
kDataFrame* getTwin();
void reserve (std::uint64_t n);
void reserve (vector<std::uint64_t> countHistogram);
Expand Down
47 changes: 47 additions & 0 deletions include/kProcessor/restriction_tags.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#ifndef _RESTRICTION_TAGS_H_
#define _RESTRICTION_TAGS_H_

#include <map>
#include <string>
#include <vector>
#include <stdexcept>
#include <iostream>

using std::map;
using std::string;
using std::vector;
using std::to_string;
using std::stoi;
using std::cout;
using std::cerr;
using std::endl;


class tag {

public:
vector<string> active_tags;
map<string, int> restrictions = {
{"min_kSize", 7},
{"max_kSize", 31},
{"sorted", false},
};

tag(){}
tag(map<string, int> tags);

void add_restriction(string tag_name, int value);
void check_restrictions();

void tag_min_kSize(int value);
void tag_max_kSize(int value);

~tag() {}
};


typedef void (tag::*intFunc)(int);
typedef map<string, intFunc> intFuncMap;


#endif
38 changes: 29 additions & 9 deletions src/kDataFrames/kDataFrameBMQF.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,21 @@ kDataFrameBMQF::kDataFrameBMQF(uint64_t ksize,uint64_t nKmers,string path):
fileName=path;
reserve(nKmers);
}

kDataFrameBMQF::kDataFrameBMQF(bufferedMQF* bufferedmqf, readingModes RM, hashingModes HM, map<string, int> params){
this->bufferedmqf=bufferedmqf;
this->falsePositiveRate=falsePositiveRate;
KD = kmerDecoder::getInstance(RM, HM, params);
this->kSize = KD->get_kSize();
hashbits=this->bufferedmqf->memoryBuffer->metadata->key_bits;
hashbits=2*kSize;
range=(1ULL<<hashbits);
kDataFrameBMQFIterator* it=new kDataFrameBMQFIterator(bufferedmqf,kSize,KD);
it->endIterator();
endIterator=new kDataFrameIterator(it,(kDataFrame*)this);

}

kDataFrameBMQF::kDataFrameBMQF(bufferedMQF* bufferedmqf,uint64_t ksize,double falsePositiveRate):
kDataFrame(ksize)
{
Expand Down Expand Up @@ -414,24 +429,29 @@ void kDataFrameBMQF::serialize(string filePath){
ofstream file(filePath+".extra");
file<<kSize<<endl;
file << this->KD->hash_mode << endl;
file.close();

file << this->KD->slicing_mode << endl;
file << this->KD->params_to_string() << endl;
bufferedMQF_serialize(bufferedmqf);

}
kDataFrame* kDataFrameBMQF::load(string filePath){
ifstream file(filePath+".extra");

uint64_t filekSize;
int hashing_mode;
double flasePositiveRate;
file>>filekSize;
int hashing_mode, reading_mode;
string KD_params_string;

ifstream file(filePath+".extra");
file >> filekSize;
file >> hashing_mode;

flasePositiveRate = (hashing_mode == 1) ? 0 : 0.5;
file >> reading_mode;
file >> KD_params_string;
hashingModes hash_mode = static_cast<hashingModes>(hashing_mode);
readingModes slicing_mode = static_cast<readingModes>(reading_mode);
map<string, int> kmerDecoder_params = kmerDecoder::string_to_params(KD_params_string);

bufferedMQF* bufferedmqf=new bufferedMQF();
bufferedMQF_deserialize(bufferedmqf,(filePath+".bmqf").c_str());
return new kDataFrameBMQF(bufferedmqf,filekSize, flasePositiveRate);
return new kDataFrameBMQF(bufferedmqf, slicing_mode, hash_mode, kmerDecoder_params);
}

kDataFrameIterator kDataFrameBMQF::begin(){
Expand Down
28 changes: 20 additions & 8 deletions src/kDataFrames/kDataFrameMAP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,12 @@ kDataFrameMAP::kDataFrameMAP() {
// this->hasher = (new IntegerHasher(23));
}

kDataFrameMAP::kDataFrameMAP(readingModes RM, hashingModes HM, map<string, int> params) {
this->class_name = "MAP"; // Temporary until resolving #17
KD = kmerDecoder::getInstance(RM, HM, params);
this->kSize = KD->get_kSize();
}

bool kDataFrameMAP::kmerExist(string kmerS) {
return (this->MAP.find(kmer::str_to_canonical_int(kmerS)) == this->MAP.end()) ? 0 : 1;
}
Expand Down Expand Up @@ -216,8 +222,11 @@ void kDataFrameMAP::serialize(string filePath) {
// Write the kmerSize
ofstream file(filePath + ".extra");
file << kSize << endl;
file << 2 << endl;
file << this->KD->hash_mode << endl;
file << this->KD->slicing_mode << endl;
file << this->KD->params_to_string() << endl;
file.close();

std::ofstream os(filePath + ".map", std::ios::binary);
cereal::BinaryOutputArchive archive(os);
archive(this->MAP);
Expand All @@ -227,19 +236,22 @@ void kDataFrameMAP::serialize(string filePath) {
kDataFrame *kDataFrameMAP::load(string filePath) {

// Load kSize
ifstream file(filePath + ".extra");
uint64_t kSize;
int hashing_mode;
int hashing_mode, reading_mode;
string KD_params_string;

ifstream file(filePath + ".extra");
file >> kSize;
file >> hashing_mode;
file >> reading_mode;
file >> KD_params_string;

if(hashing_mode != 2){
std::cerr << "Error: In the kDataFrameMAP, hashing must be 2:TwoBitsRepresentation mode" << endl;
exit(1);
}
hashingModes hash_mode = static_cast<hashingModes>(hashing_mode);
readingModes slicing_mode = static_cast<readingModes>(reading_mode);
map<string, int> kmerDecoder_params = kmerDecoder::string_to_params(KD_params_string);
file.close();
// Initialize kDataFrameMAP
kDataFrameMAP *KMAP = new kDataFrameMAP(kSize);
kDataFrameMAP *KMAP = new kDataFrameMAP(slicing_mode, hash_mode, kmerDecoder_params);

// Load the hashMap into the kDataFrameMAP
std::ifstream os(filePath + ".map", std::ios::binary);
Expand Down
53 changes: 40 additions & 13 deletions src/kDataFrames/kDataFrameMQF.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <math.h>
#include <limits>
#include <sstream>
#include "restriction_tags.hpp"


/*
Expand Down Expand Up @@ -145,6 +146,10 @@ kDataFrameIterator kDataFrameMQF::find(uint64_t kmer) {

kDataFrameMQF::kDataFrameMQF() : kDataFrame() {
this->class_name = "MQF"; // Temporary until resolving #17
kDataFrame_tags = new tag({{"min_kSize", 17}}); // Add all restrictions at once
kDataFrame_tags->add_restriction("max_kSize", 31); // Add a new restriction
kDataFrame_tags->add_restriction("sorted", true);

mqf = new QF();
qf_init(mqf, (1ULL << 16), 2 * kSize, 0, 2, 32, true, "", 2038074761);
KD = (new Kmers(kSize));
Expand Down Expand Up @@ -261,6 +266,26 @@ kDataFrameMQF::kDataFrameMQF(QF *mqf, uint64_t ksize, double falsePositiveRate)
it->endIterator();
endIterator=new kDataFrameIterator(it,(kDataFrame*)this);
}


kDataFrameMQF::kDataFrameMQF(QF *mqf, readingModes slicing_mode, hashingModes hash_mode, map<string, int> kmerDecoder_params){
this->class_name = "MQF"; // Temporary until resolving #17
this->mqf = mqf;
this->falsePositiveRate = falsePositiveRate;
if (falsePositiveRate == 0) {
KD = (new Kmers(kSize, integer_hasher));
} else if (falsePositiveRate < 1) {
KD = (new Kmers(kSize, mumur_hasher));
}
hashbits = this->mqf->metadata->key_bits;
hashbits = 2 * kSize;
range = (1ULL << hashbits);
kDataFrameMQFIterator *it = new kDataFrameMQFIterator(mqf, kSize, KD);
it->endIterator();
endIterator = new kDataFrameIterator(it,(kDataFrame*)this);
}


kDataFrameMQF::kDataFrameMQF(uint64_t ksize, vector<uint64_t> countHistogram, uint8_t tagSize, double falsePositiveRate)
:
kDataFrame(ksize) {
Expand Down Expand Up @@ -542,34 +567,36 @@ float kDataFrameMQF::max_load_factor() {


void kDataFrameMQF::serialize(string filePath) {
//filePath += ".mqf";
ofstream file(filePath + ".extra");
file << kSize << endl;
file << this->KD->hash_mode << endl;
file << this->KD->slicing_mode << endl;
file << this->KD->params_to_string() << endl;
file.close();
// uint64_t legendSize=tagsLegend.size();
// file<<legendSize<<endl;
// auto it = tagsLegend.begin();
// while(it==tagsLegend.end())
// {
// file<<it->first<<" "<<it->second<<endl;
// it++;
// }
// file.close();
qf_serialize(mqf, (filePath + ".mqf").c_str());
}

kDataFrame *kDataFrameMQF::load(string filePath) {

int kSize, hashing_mode, reading_mode;
string KD_params_string;

ifstream file(filePath + ".extra");
uint64_t filekSize, hashing_mode;
file >> filekSize;
file >> kSize;
file >> hashing_mode;
file >> reading_mode;
file >> KD_params_string;

hashingModes hash_mode = static_cast<hashingModes>(hashing_mode);
readingModes slicing_mode = static_cast<readingModes>(reading_mode);
map<string, int> kmerDecoder_params = kmerDecoder::string_to_params(KD_params_string);

double flasePositiveRate;
flasePositiveRate = (hashing_mode == 1) ? 0 : 0.5;
file.close();
QF *mqf = new QF();
qf_deserialize(mqf, (filePath + ".mqf").c_str());
return new kDataFrameMQF(mqf, filekSize, flasePositiveRate);
return new kDataFrameMQF(mqf, slicing_mode, hash_mode, kmerDecoder_params);
}

void kDataFrameMQF::preprocessKmerOrder()
Expand Down
19 changes: 14 additions & 5 deletions src/kDataFrames/kDataFramePHMAP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,8 @@ void kDataFramePHMAP::serialize(string filePath) {
ofstream file(filePath + ".extra");
file << kSize << endl;
file << this->KD->hash_mode << endl;
file.close();
file << this->KD->slicing_mode << endl;
file << this->KD->params_to_string() << endl;
filePath += ".phmap";
{
phmap::BinaryOutputArchive ar_out(filePath.c_str());
Expand All @@ -249,15 +250,23 @@ void kDataFramePHMAP::serialize(string filePath) {
kDataFrame *kDataFramePHMAP::load(string filePath) {

// Load kSize
int kSize, hashing_mode, reading_mode;
string KD_params_string;

ifstream file(filePath + ".extra");
uint64_t kSize;
int hashing_mode;
file >> kSize;
file >> hashing_mode;
file >> reading_mode;
file >> KD_params_string;
file.close();
filePath += ".phmap";

hashingModes hash_mode = static_cast<hashingModes>(hashing_mode);
kDataFramePHMAP *KMAP = new kDataFramePHMAP(kSize, hash_mode);
readingModes slicing_mode = static_cast<readingModes>(reading_mode);
map<string, int> kmerDecoder_params = kmerDecoder::string_to_params(KD_params_string);

filePath += ".phmap";

kDataFramePHMAP *KMAP = new kDataFramePHMAP(slicing_mode, hash_mode, kmerDecoder_params);
{
phmap::BinaryInputArchive ar_in(filePath.c_str());
KMAP->MAP.load(ar_in);
Expand Down
Loading