...
 
Commits (2)
......@@ -390,7 +390,7 @@ private:
pD.set();
std::vector<stack_element> stack;
stack.push_back({0, -1, NODE_TYPE::NULL_NODE, 0, -1, pD, 0, -1});
stack.push_back({0, -1, NODE_TYPE::NULL_NODE, 0, -1, pD, 0, -1, boost::dynamic_bitset<>()});
while(!stack.empty()) {
const int ad_idx = stack.back().ad_idx;
......@@ -455,7 +455,7 @@ private:
continue;
}
tree_.back().index_.resize(n_ - xj - 1, NODE_TYPE::NULL_NODE);
stack.push_back({static_cast<int>(tree_.size()) - 1, xj, NODE_TYPE::NULL_NODE, xj+1, -1, D_cand_mcv, 0, popcount_cand_mcv});
stack.push_back({static_cast<int>(tree_.size()) - 1, xj, NODE_TYPE::NULL_NODE, xj+1, -1, D_cand_mcv, 0, popcount_cand_mcv, boost::dynamic_bitset<>()});
}
tree_.resize(tree_.size());
......
/***
* $Id$
**
* File: BVCounter.hpp
* Created: Oct 22, 2016
*
* Authors: Matthew Eichhorn <[email protected]>
* Blake Hurlburt <[email protected]>
* Grant Iraci <[email protected]>
* Copyright (c) 2015-2017 SCoRe Group http://www.score-group.org/
* Distributed under the MIT License.
* See accompanying file LICENSE.
*/
#ifndef BV_COUNTER_HPP
#define BV_COUNTER_HPP
#include <algorithm>
#include <cmath>
#include <vector>
#include <bitvector.hpp>
#include <bit_util.hpp>
template <int N> class BVCounter {
public:
typedef uint_type<N> set_type;
typedef uint8_t data_type;
int n() const { return n_; }
int m() const { return m_; }
int r(int i) const { return data_[i].first.size(); }
bool is_reorderable() { return true; }
template <typename score_functor, typename data_type>
void apply(const set_type& xi, const set_type& pa, const std::vector<data_type>& state_xi, const std::vector<data_type>& state_pa, std::vector<score_functor>& F) const {
std::vector<int> xi_vect = as_vector(xi);
int qpa = m_q__(pa);
for (int i = 0; i < F.size(); ++i) F[i].init(r(xi_vect[i]), qpa);
if (is_emptyset(pa)) {
for (int i = 0; i < F.size(); ++i) {
int r_id = r_idx_[xi_vect[i]][state_xi[i]];
F[i](m_);
F[i](data_[xi_vect[i]].first[r_id].weight(), m_);
F[i].finalize(1);
}
return;
}
auto pa_vect = as_vector(pa);
vect res = vect::identity(m_);
vect temp_res;
for (int i = 0; i < state_pa.size(); ++i) {
int id = r_idx_[pa_vect[i]][state_pa[i]];
intersect(res, data_[pa_vect[i]].first[id], temp_res);
if (temp_res.weight() == 0) return;
res = temp_res;
}
for (int i = 0; i < xi_vect.size(); ++i) {
F[i](res.weight());
int r_id = r_idx_[xi_vect[i]][state_xi[i]];
intersect(data_[xi_vect[i]].first[r_id], res, temp_res);
if (temp_res.weight() != 0) F[i](temp_res.weight(), res.weight());
}
} // apply (state specific queries)
template <typename score_functor>
void apply(const std::vector<int>& xi_vect, const set_type& pa, std::vector<score_functor>& F) const {
int qpa = m_q__(pa);
for (int i = 0; i < F.size(); ++i) F[i].init(r(xi_vect[i]), qpa);
if (is_emptyset(pa)) {
for (int i = 0; i < F.size(); ++i) {
F[i](m_);
for (const vect& v : data_[xi_vect[i]].first) {
F[i](v.weight(), m_);
}
F[i].finalize(1);
}
return;
}
using Iter = typename std::vector<vect>::const_iterator;
std::vector<int> pa_sorted;
for (int i : sorted_order_) {
if (in_set(pa, i)) {
pa_sorted.push_back(i);
}
}
//the table stores pairs with
//first = current bitvector at that level of the dfs traversal
//second = iterator over bitvectors of next parent to intersect with
std::vector<std::pair<vect, Iter>> table;
table.reserve(pa_sorted.size());
for (int i : pa_sorted) {
table.push_back(std::make_pair(vect::identity(m_), std::begin(data_[i].first))); //start with identity
}
//[0] should be ?
table.push_back(std::make_pair(vect::identity(m_), std::begin(data_[0].first)));
table.push_back(std::make_pair(vect::identity(m_), std::begin(data_[0].first)));
int layer = 0; //we start at the top of the tree
int qi_obs = 0;
vect container = vect::identity(m_); //bottom layer
while (true) { //once we are finished, we will try to move one layer up from root
while (layer >= 0 && table[layer].second == std::end(data_[pa_sorted[layer]].first)) {
//reset the iterator for next traversal at this level
table[layer].second = std::begin(data_[pa_sorted[layer]].first);
--layer; //move up to continue traversal
}
if (layer < 0) break;
const vect& root = table[layer].first; //fetch from the table
intersect(root, *(table[layer].second), table[layer + 1].first);
++table[layer].second;
int Nij = table[layer + 1].first.weight();
if (Nij) { //if Nijk is zero we just move on
if (layer == (pa_sorted.size() - 1)) { //we have explored all of the parents
for (int i = 0; i < F.size(); ++i) F[i](Nij);
++qi_obs;
for (int i = 0; i < F.size(); ++i) {
for (const vect& v : data_[xi_vect[i]].first) {
intersect(table[layer + 1].first, v, container);
int Nijk = container.weight();
if (Nijk) {
F[i](Nijk, Nij);
}
}
}
} else if (Nij == 1) {
//if we go down a level, we will find that this is the only call that would be made
for (int i = 0; i < F.size(); ++i) F[i](Nij);
++qi_obs;
for (int i = 0; i < F.size(); ++i) {
F[i](1,1);
}
} else { //there are more parents
++layer; //move down a layer and keep going
}
}
}
for (int i = 0; i < F.size(); ++i) F[i].finalize(qi_obs);
} // apply
template <typename score_functor>
void apply(const set_type& xi, const set_type& pa, std::vector<score_functor>& F) const {
std::vector<int> xi_vect = as_vector(xi);
apply(xi_vect, pa, F);
} // apply
template <typename score_functor>
void apply(int xi, const set_type& pa, score_functor& F) const {
std::vector<int> xi_vect{xi};
std::vector<score_functor> F_vect{F};
apply(xi_vect, pa, F_vect);
F = F_vect[0];
} // apply
// reorder variables to improve expected query performance
bool reorder(const std::vector<int>& order) {
std::vector<std::vector<int>> temp_r_idx;
std::vector<std::pair<std::vector<vect>, double>> vec;
vec.reserve(n_);
for (int i : order) {
temp_r_idx.emplace_back(std::move(r_idx_[i]));
vec.emplace_back(std::move(data_[i]));
}
r_idx_ = std::move(temp_r_idx);
data_ = std::move(vec);
return true;
} // reorder
private:
//so far no case in which EWAH is faster
//typedef ewah_vector vect;
typedef bitvector vect;
class ent_order {
public:
ent_order(const std::vector<std::pair<std::vector<vect>, double>>& data) : data_(data) { }
bool operator()(int lhs, int rhs) { return data_[lhs].second < data_[rhs].second; }
private:
const std::vector<std::pair<std::vector<vect>, double>>& data_;
};
// assume that q will not overflow, this will be checked by sabna calling code
int m_q__(const set_type& pa) const {
int q = 1;
for (int i = 0; i < set_max_size<set_type>(); ++i) {
if (in_set(pa, i)) {
q *= r(i);
}
}
return q;
} // m_q__
std::vector<std::vector<int>> r_idx_;
std::vector<std::pair<std::vector<vect>, double>> data_;
std::vector<int> sorted_order_;
int n_ = -1;
int m_ = -1;
template <int M, typename Iter>
friend BVCounter<M> create_BVCounter(int, int, Iter);
}; // class BVCounter
template <int N, typename Iter> BVCounter<N> create_BVCounter(int n, int m, Iter it) {
BVCounter<N> p;
int indices[256];
int temp;
int size;
p.n_ = n;
p.m_ = m;
p.r_idx_.resize(n);
for (int xi = 0; n > 0; --n, ++xi) {
p.data_.push_back(std::make_pair(std::vector<typename BVCounter<N>::vect>(), 0.0));
size = 0;
std::fill_n(indices, 256, -1);
std::vector<std::pair<int, int>> temp_r;
for (int j = 0; j < m; ++j) {
temp = *it++;
if (indices[temp] == -1) {
temp_r.push_back({temp, size});
indices[temp] = size++;
p.data_.back().first.push_back(typename BVCounter<N>::vect(m));
}
p.data_.back().first[indices[temp]].insert(j);
}
std::sort(temp_r.begin(), temp_r.end(), [] (const std::pair<int, int>& lhs, const std::pair<int, int>& rhs) { return lhs.first < rhs.first; });
for (const auto x : temp_r) p.r_idx_[xi].push_back(x.second);
double H = 0.0;
for (auto& v : p.data_.back().first) {
double px = (static_cast<double>(v.weight()) / m);
H += px * (px == 0.0 ? 0 : std::log2(px));
}
p.data_.back().second = -H;
}
for (int i = 0; i < p.data_.size(); ++i) {
p.sorted_order_.push_back(i);
}
std::sort(std::begin(p.sorted_order_), std::end(p.sorted_order_), typename BVCounter<N>::ent_order(p.data_));
return p;
} // create_BVCounter
#endif // BV_COUNTER_HPP
/***
* $Id$
**
* File: MDL.hpp
* Created: Nov 22, 2016
*
* Author: Jaroslaw Zola <[email protected]>
* Copyright (c) 2016 SCoRe Group http://www.score-group.org/
* Distributed under the MIT License.
* See accompanying file LICENSE.
*/
#ifndef MDL_HPP
#define MDL_HPP
#include <cmath>
#include <utility>
class MDL {
public:
typedef std::pair<double, double> score_type;
explicit MDL(int m = 0) : m_(m) { }
void init(int ri, int qi) { score_ = 0.0; nc_ = 0.0; ri_ = ri; qi_ = qi; }
void finalize(int qi) {
// Uncomment to change how number of observed states in handled
// qi_ = qi;
nc_ = 0.5 * std::log2(m_) * (ri_ - 1) * qi_;
} // finalize
void operator()(int Nij) { }
void operator()(int Nijk, int Nij) {
double p = static_cast<double>(Nijk) / Nij;
score_ += (Nijk * std::log2(p));
} // operator()
int r() const { return ri_; }
score_type score() const { return {-(score_ - nc_), -score_}; }
private:
int m_ = 0;
double score_ = 0.0;
double nc_ = 0.0;
int ri_ = 1;
int qi_ = 1;
}; // class MDL
#endif // MDL_HPP
SABNATK_ROOT=../../SABNAtk
CXX=g++
CXXFLAGS=-std=c++17 -O3 -I. -I$(SABNATK_ROOT)/include -DBIT_UTIL_64BIT
random_query:
......@@ -104,6 +104,7 @@ public:
stack.push_back({lhs->ch_[idx], rhs->ch_[idx], lhs, -1, idx});
}
return this;
} // subtract
int state_;
......
/***
* $Id$
**
* File: bit_util.hpp
* Created: Nov 11, 2015
*
* Author: Jaroslaw Zola <[email protected]>
* Copyright (c) 2015-2017 SCoRe Group http://www.score-group.org/
* Distributed under the MIT License.
* See accompanying file LICENSE.
*/
#ifndef BIT_UTIL_HPP
#define BIT_UTIL_HPP
#include <cstdint>
#include <cstring>
#include <ostream>
#include <vector>
struct uint128_t {
// b[0] represents elements 0..63
// b[1] represents elements 64..127
uint64_t b[2];
}; // struct uint128_t
template <int N> struct uint_type {
// b[0] represents elements 0..63
uint64_t b[N];
}; // struct uint
inline bool operator==(uint_type<1> lhs, uint_type<1> rhs) { return lhs.b[0] == rhs.b[0]; }
template <int N> inline bool operator==(const uint_type<N>& lhs, const uint_type<N>& rhs) {
for (int i = 0; i < N; ++i) {
if (lhs.b[i] != rhs.b[i]) return false;
}
return true;
} // operator==
template <int N> inline bool operator!=(const uint_type<N>& lhs, const uint_type<N>& rhs) {
return !(lhs == rhs);
} // operator!=
inline uint_type<1> operator~(uint_type<1> x) {
uint_type<1> res = x;
res.b[0] = ~res.b[0];
return res;
}
template<int N> inline uint_type<N> operator~(const uint_type<N>& x) {
uint_type<N> res;
for (int i = 0; i < N; ++i) res.b[i] = ~x.b[i];
return res;
} // operator~
template <int N> inline uint_type<N> operator^(const uint_type<N>& lhs, const uint_type<N>& rhs) {
uint_type<N> res;
for (int i = 0; i < N; ++i) res.b[i] = lhs.b[i] ^ rhs.b[i];
return res;
} // operator&
template <int N> inline uint_type<N> operator&(const uint_type<N>& lhs, const uint_type<N>& rhs) {
uint_type<N> res;
for (int i = 0; i < N; ++i) res.b[i] = lhs.b[i] & rhs.b[i];
return res;
} // operator&
template <int N> inline uint_type<N> operator|(const uint_type<N>& lhs, const uint_type<N>& rhs) {
uint_type<N> res;
for (int i = 0; i < N; ++i) res.b[i] = lhs.b[i] | rhs.b[i];
return res;
} // operator|
struct uint_hash {
uint64_t operator()(const uint_type<1>& x) const { return x.b[0]; }
// Based on 64-bit FNV
template <int N> uint64_t operator()(const uint_type<N>& x) const {
const unsigned char* p = reinterpret_cast<const unsigned char*>(&x.b);
const int n = N * sizeof(uint64_t);
const uint64_t prime = 1099511628211;
uint64_t hash = 0xcbf29ce484222325;
for (int i = 0; i < n; ++i) {
hash *= prime;
hash ^= p[i];
}
return hash;
} // operator
}; // struct uint_hash
struct tbb_uint_hash {
uint64_t hash(const uint_type<1>& x) const { return x.b[0]; }
bool equal(const uint_type<1>& x, const uint_type<1>& y) const { return (x == y); }
// Based on 64-bit FNV
template <int N> uint64_t hash(const uint_type<N>& x) const {
const unsigned char* p = reinterpret_cast<const unsigned char*>(&x.b);
const int n = N * sizeof(uint64_t);
const uint64_t prime = 1099511628211;
uint64_t hash = 0xcbf29ce484222325;
for (int i = 0; i < n; ++i) {
hash *= prime;
hash ^= p[i];
}
return hash;
} // hash
template <int N> bool equal(const uint_type<N>& x, const uint_type<N>& y) const { return (x == y); }
}; // struct tbb_uint_hash
template <int N> inline int fold(const uint_type<N>& x) {
uint64_t res = x.b[0];
for (int i = 1; i < N; ++i) res ^= x.b[i];
res >>= 3;
return res ^ ((res >> 10) ^ (res >> 20));
} // fold
template <int N> inline int msb(const uint_type<N>& x) {
for (int i = N - 1; i >= 0; --i) if (x.b[i] != 0) return (63 - __builtin_clzll(x.b[i]));
return -1;
} // msb
template <typename set_type> constexpr int set_max_size() { return 8 * sizeof(set_type); }
template <typename set_type> inline set_type set_empty() {
set_type S;
std::memset(&S, 0, sizeof(set_type));
return S;
} // set_empty
template <typename set_type> inline set_type set_full(int n) {
set_type S = set_empty<set_type>();
int b = (n >> 6);
S.b[b] = (static_cast<uint64_t>(1) << (n - (b << 6))) - 1;
std::memset(&S, 255, b * sizeof(uint64_t));
return S;
} // set_full
inline uint64_t set_add(uint64_t S, int x) { return S | (static_cast<uint64_t>(1) << x); }
inline uint_type<1> set_add(uint_type<1> S, int x) {
S.b[0] = S.b[0] | (static_cast<uint64_t>(1) << x);
return S;
} // set_add
template <int N> inline uint_type<N> set_add(uint_type<N> S, int x) {
int b = (x >> 6);
S.b[b] = S.b[b] | (static_cast<uint64_t>(1) << (x - (b << 6)));
return S;
} // set_add
inline uint64_t set_remove(uint64_t S, int x) { return S & ~(static_cast<uint64_t>(1) << x); }
inline uint_type<1> set_remove(uint_type<1> S, int x) {
S.b[0] = S.b[0] & ~(static_cast<uint64_t>(1) << x);
return S;
} // set_remove
template <int N> inline uint_type<N> set_remove(uint_type<N> S, int x) {
int b = (x >> 6);
S.b[b] = S.b[b] & ~(static_cast<uint64_t>(1) << (x - (b << 6)));
return S;
} // set_remove
inline uint64_t set_diff(uint64_t S, uint64_t U) { return (S & ~U); };
inline uint_type<1> set_diff(uint_type<1> S, uint_type<1> U) {
S.b[0] = S.b[0] & ~U.b[0];
return S;
} // set_diff
template <int N> inline uint_type<N> set_diff(const uint_type<N>& S, const uint_type<N>& U) {
uint_type<N> res;
for (int i = 0; i < N; ++i) res.b[i] = S.b[i] & ~U.b[i];
return res;
} // set_diff
inline int set_size(uint64_t S) { return __builtin_popcountll(S); }
inline int set_size(uint_type<1> S) { return __builtin_popcountll(S.b[0]); }
template <int N> inline int set_size(const uint_type<N>& S) {
int w = 0;
for (int i = 0; i < N; ++i) w += __builtin_popcountll(S.b[i]);
return w;
} // set_size
inline bool in_set(uint64_t S, int x) { return S & (static_cast<uint64_t>(1) << x); }
inline bool in_set(uint_type<1> S, int x) {
return S.b[0] & (static_cast<uint64_t>(1) << x);
} // in_set
template <int N> inline bool in_set(const uint_type<N>& S, int x) {
int b = (x >> 6);
return S.b[b] & (static_cast<uint64_t>(1) << (x - (b << 6)));
} // in_set
inline bool is_emptyset(uint64_t S) { return (S == 0); }
template <int N> inline bool is_emptyset(const uint_type<N>& S) {
bool empty = true;
for (int i = 0; i < N; ++i) empty = empty && (S.b[i] == 0);
return empty;
} // is_emptyset
// test if S is a superset of U
inline bool is_superset(uint64_t S, uint64_t U) { return ((S & U) == U); }
template <int N> inline bool is_superset(const uint_type<N>& S, const uint_type<N>& U) {
bool super = true;
for (int i = 0; i < N; ++i) super = super && ((S.b[i] & U.b[i]) == U.b[i]);
return super;
} // is_superset
template <typename set_type, typename Iter> inline set_type as_set(Iter first, Iter last) {
set_type S = set_empty<set_type>();
for (; first != last; ++first) S = set_add(S, *first);
return S;
} // as_set
template <typename Sequence, typename set_type>
inline set_type as_set(const Sequence& S) { return as_set(std::begin(S), std::end(S)); }
template <typename set_type>
inline set_type as_set(std::initializer_list<int>&& S) { return as_set<set_type>(std::begin(S), std::end(S)); }
template <typename set_type> inline std::vector<int> as_vector(const set_type& S) {
int s = set_max_size<set_type>();
std::vector<int> v;
v.reserve(s);
for (int i = 0; i < s; ++i) if (in_set(S, i)) v.push_back(i);
return v;
} // as_vector
template <typename set_type> inline void as_vector(const set_type& S, std::vector<int>& v) {
int s = set_max_size<set_type>();
v.clear();
for (int i = 0; i < s; ++i) if (in_set(S, i)) v.push_back(i);
} // as_vector
template <int N> inline std::ostream& operator<<(std::ostream& os, const uint_type<N>& S) {
int s = set_max_size<uint_type<N>>();
for (int i = 0; i < s; ++i) os << (in_set(S, i) ? 1 : 0);
return os;
} // operator<<
#endif // BIT_UTIL_HPP
/***
* $Id$
**
* File: csv.hpp
* Created: Nov 12, 2015
*
* Author: Jaroslaw Zola <[email protected]>
* Copyright (c) 2015 SCoRe Group http://www.score-group.org/
* Distributed under the MIT License.
* See accompanying file LICENSE.
*/
#ifndef CSV_HPP
#define CSV_HPP
#include <fstream>
#include <sstream>
#include <tuple>
#include <vector>
#include <jaz/iterator.hpp>
template <typename T>
std::tuple<bool, int, int> read_csv(std::ifstream& f, std::vector<T>& data) {
jaz::getline_iterator<> it(f);
jaz::getline_iterator<> end;
int n = 0;
int m = 0;
std::istringstream is;
for (; it != end; ++it, ++n) {
auto s = *it;
is.clear();
is.str(s);
// currently we allow only categorical data
std::istream_iterator<int> iit(is);
std::istream_iterator<int> iend;
int l = data.size();
std::copy(iit, iend, std::back_inserter(data));
l = data.size() - l;
if (l == 0) return std::make_tuple(false, -1, -1);
if (m == 0) m = l;
else if (m != l) return std::make_tuple(false, -1, -1);
} // for it
// sanity check
for (int i = 0; i < n; ++i) {
auto mm = std::minmax_element(data.data() + i * m, data.data() + (i + 1) * m);
int d = *mm.second - *mm.first;
if (d > 254) return std::make_tuple(false, -1, -1);
//if ((d < 1) || (d > 254)) return std::make_tuple(false, -1, -1);
}
return std::make_tuple(true, n, m);
} // read_csv
template <typename T>
std::tuple<bool, int, int> read_csv(const std::string& name, std::vector<T>& data) {
std::ifstream f(name.c_str());
if (!f) return std::make_tuple(false, -1, -1);
return read_csv(f, data);
} // read_csv
#endif // CSV_HPP
/***
* $Id$
**
* File: iterator.hpp
* Created: Apr 28, 2010
*
* Author: Jaroslaw Zola <[email protected]>
* Copyright (c) 2010-2012 Jaroslaw Zola
* Distributed under the Boost Software License, Version 1.0.
* See accompanying file LICENSE_BOOST.txt.
*
* This file is part of jaz.
*/
#ifndef JAZ_ITERATOR_HPP
#define JAZ_ITERATOR_HPP
#include <iostream>
#include <iterator>
#include <string>
/** File: iterator.hpp
*/
namespace jaz {
/** Class: ostream_iterator
*/
template <typename T, typename charT = char, typename traits = std::char_traits<charT>, typename dist = std::ptrdiff_t>
class ostream_iterator : public std::iterator<std::output_iterator_tag, T> {
public:
typedef charT char_type;
typedef traits traits_type;
typedef std::basic_ostream<char_type, traits_type> ostream_type;
/** Constructor: ostream_iterator
*/
ostream_iterator(ostream_type& os, unsigned int lsz,
const std::basic_string<charT, traits>& sep = " ")
: os_(&os), lsz_(lsz), sep_(sep), pos_(0) { }
/** Function: operator=
*/
ostream_iterator& operator=(const T& val) {
*os_ << val;
++pos_;
if ((pos_ % lsz_) == 0) *os_ << std::endl;
else *os_ << sep_;
return *this;
} // operator=
/** Function: operator*
*/
ostream_iterator& operator*() { return *this; }
/** Function: operator++
*/
ostream_iterator& operator++() { return *this; }
/** Function: operator++
*/
ostream_iterator& operator++(int) { return *this; }
private:
ostream_type* os_;
unsigned int lsz_;
std::basic_string<charT, traits> sep_;
unsigned int pos_;
}; // ostream_iterator
/** Class: getline_iterator
*/
template <typename charT = char, typename traits = std::char_traits<charT>, typename dist = std::ptrdiff_t>
class getline_iterator : public std::iterator<std::input_iterator_tag, std::basic_string<charT, traits>, dist> {
public:
typedef charT char_type;
typedef traits traits_type;
typedef std::basic_string<charT, traits> value_type;
typedef std::basic_istream<char_type, traits_type> istream_type;
/** Constructor: getline_iterator
*/
getline_iterator() : delim_(), value_(), state_(false), is_(0) { }
/** Constructor: getline_iterator
*/
getline_iterator(istream_type& is) : is_(&is) {
delim_ = std::use_facet<std::ctype<char_type> >(is_->getloc()).widen('\n');
m_read__();
} // getline_iterator
/** Constructor: getline_iterator
*/
getline_iterator(istream_type& is, char_type delim)
: delim_(delim), is_(&is) { m_read__(); }
/** Constructor: getline_iterator
*/
getline_iterator(const getline_iterator& gi)
: delim_(gi.delim_), value_(gi.value_), state_(gi.state_), is_(gi.is_) { }
/** Function: operator*
*/
const value_type& operator*() const { return value_; }
/** Function: operator->
*/
const value_type* operator->() const { return &(operator*()); }
/** Function: operator++
*/
getline_iterator& operator++() {
m_read__();
return *this;
} // operator++
/** Function: operator++
*/
getline_iterator operator++(int) {
getline_iterator tmp = *this;
m_read__();
return tmp;
} // operator++
private:
void m_read__() {
state_ = (is_ && *is_) ? true : false;
if (state_ == true) {
std::getline(*is_, value_, delim_);
state_ = *is_ ? true : false;
}
} // m_read__
char_type delim_;
value_type value_;
bool state_;
istream_type* is_;
friend bool operator==(const getline_iterator& lhs, const getline_iterator& rhs) {
return ((lhs.state_ == rhs.state_) && (!lhs.state_ || (lhs.is_ == rhs.is_)));
} // operator==
friend bool operator!=(const getline_iterator& lhs, const getline_iterator& rhs) {
return !(lhs == rhs);
} // operator!=
}; // class getline_iterator
} // namespace jaz
#endif // JAZ_ITERATOR_HPP
/***
* $Id$
**
* File: string.hpp
* Created: Jun 03, 2007
*
* Author: Jaroslaw Zola <[email protected]>
* Copyright (c) 2004-2012 Jaroslaw Zola
* Distributed under the Boost Software License, Version 1.0.
* See accompanying file LICENSE_BOOST.txt.
*
* This file is part of jaz.
*/
#ifndef JAZ_STRING_HPP
#define JAZ_STRING_HPP
#include <algorithm>
#include <sstream>
#include <string>
namespace jaz {
/** Function: split
* Splits string based on a separator.
*
* Parameters:
* pat - Separator character.
* s - String to split.
* out - Iterator to store resulting sub-strings.
*/
template <typename charT, typename traits, typename Alloc, typename Iter>
void split(charT pat, const std::basic_string<charT, traits, Alloc>& s, Iter out) {
int pos = 0;
auto len = s.size();
for (auto i = 0; i < len; ++i) {
if (s[i] == pat) {
if (i - pos > 0) {
*(out++) = std::basic_string<charT, traits, Alloc>(s, pos, i - pos);
}
pos = i + 1;
}
} // for
*(out++) = std::basic_string<charT, traits, Alloc>(s, pos, s.size() - pos);
} // split
/** Function: join
* Merges a sequence of strings into a single string.
*
* Parameters:
* pat - Separator string.
* first - Beginning of the sequence to join.
* last - End of the sequence to join.
* init - Prefix to add to the sequence.
*/
template <typename Iter, typename charT, typename traits, typename Alloc>
std::basic_string<charT, traits, Alloc>
join(const charT* pat, Iter first, Iter last, const std::basic_string<charT, traits, Alloc>& init) {
std::basic_string<charT, traits, Alloc> s(init);
if (s.empty() == true) s = *(first++);
for (; first != last; ++first) s += std::string(pat) + std::basic_string<charT, traits, Alloc>(*first);
return s;
} // join
/** Function: join
* Merges a sequence of strings into a single string.
*
* Parameters:
* pat - Separator string.
* first - Beginning of the sequence to join.
* last - End of the sequence to join.
* init - Prefix to add to the sequence.
*/
template <typename Iter, typename charT, typename traits, typename Alloc>
std::basic_string<charT, traits, Alloc>
join(const std::basic_string<charT, traits, Alloc>& pat, Iter first, Iter last, const std::basic_string<charT, traits, Alloc>& init = "") {
std::basic_string<charT, traits, Alloc> s(init);
if (s.empty() == true) s = *(first++);
for (; first != last; ++first) s += pat + std::basic_string<charT, traits, Alloc>(*first);
return s;
} // join
/** Function: join
* Merges a sequence of strings into a single string.
*
* Parameters:
* pat - Separator character.
* first - Beginning of the sequence to join.
* last - End of the sequence to join.
* init - Prefix to add to the sequence.
*/
template <typename Iter, typename charT, typename traits, typename Alloc>
std::basic_string<charT, traits, Alloc>
join(charT pat, Iter first, Iter last, const std::basic_string<charT, traits, Alloc>& init) {
std::basic_string<charT, traits, Alloc> s(init);
if (s.empty() == true) s = *(first++);
for (; first != last; ++first) s += pat + std::basic_string<charT, traits, Alloc>(*first);
return s;
} // join
/** Function: join
*/
template <typename Iter> inline std::string join(const char* pat, Iter first, Iter last) {
return join(pat, first, last, std::string(""));
} // join
/** Function: join
*/
template <typename Iter> inline std::string join(char pat, Iter first, Iter last) {
return join(pat, first, last, std::string(""));
} // join
/** Function: toupper
*/
template <typename charT, typename traits, typename Alloc>
std::basic_string<charT, traits, Alloc>
toupper(std::basic_string<charT, traits, Alloc> s) {
std::transform(s.begin(), s.end(), s.begin(), ::toupper);
return s;
} // toupper
/** Function: lexical_cast
*/
template <typename T, typename charT, typename traits, typename Alloc>
std::pair<T, bool> lexical_cast(const std::basic_string<charT, traits, Alloc>& s) {
static std::istringstream is;
is.clear();
is.str(s);
T t;
is >> t;
if (!is) return { t, false };
return { t, true };
} // lexical_cast
/** Function: approx_match
* Performs approximate string matching.
*
* Parameters:
* T - Text string.
* P - Pattern string.
* mm - Allowed mismatches.
*
* Returns:
* Pair in which the first element stores position of the first match or
* std::string::npos if no match is found, and the second element is
* true if more than one match exists.
*/
template <typename charT, typename traits, typename Alloc>
std::pair<long int, bool> approx_match(const std::basic_string<charT, traits, Alloc>& T,
const std::basic_string<charT, traits, Alloc>& P,
unsigned int mm) {
auto n = T.size();
auto m = P.size();
auto pos = std::string::npos;
bool mult = false;
auto l = n - m + 1;
auto cmm = mm + 1;
for (auto i = 0; i < l; ++i) {
auto t = 0;
auto j = 0;
for (; j < m; ++j) {
if (T[i + j] != P[j]) t++;
if (t == cmm) break;
}
if (j == m) {
if (t < cmm - 1) {
pos = i;
mult = false;
cmm = t + 1;
} else if (t == cmm - 1) {
if (pos == std::string::npos) pos = i;
else mult = true;
}
}
} // for i
return std::make_pair(pos, mult);
} // approx_match
} // namespace jaz
#endif // JAZ_STRING_HPP
#include <chrono>
#include <fstream>
#include <iostream>
#include <sstream>
#include <vector>
#include <jaz/string.hpp>
#include <BVCounter.hpp>
#include <bit_util.hpp>
#include "ADTree.hpp"
#include "csv.hpp"
class estimator {
public:
void init(int ri, int qi) { state_ = 0.0; }
void finalize(int qi) { }
void operator()(int Nij) { }
void operator()(int Nijk, int Nij) { state_ = static_cast<double>(Nijk) / Nij; }
double state() const { return state_; }
private:
double state_ = 0.0;
}; // class estimator
template <typename SetType>
bool read_queries(const std::string& name, std::vector<std::pair<SetType, SetType>>& Q) {
std::ifstream f(name);
if (!f) return false;
jaz::getline_iterator<> iter(f), end;
std::vector<std::string> tokens;
std::istringstream iss;
int x;
for (; iter != end; ++iter) {
tokens.clear();
jaz::split('|', *iter, std::back_inserter(tokens));
SetType xi = set_empty<SetType>();
iss.clear();
iss.str(tokens[2]);
while (iss >> x) xi = set_add(xi, x);
SetType pa = set_empty<SetType>();
iss.clear();
iss.str(tokens[1]);
while (iss >> x) pa = set_add(pa, x);
Q.push_back({xi, pa});
}
return true;
} // read_queries
int main(int argc, char* argv[]) {
if (argc != 3) {
std::cout << "usage: random_query csv_file dat_file" << std::endl;
return -1;
}
// GET INPUT
std::cout << "reading input..." << std::endl;
const int N = 1;
using set_type = uint_type<N>;
using data_type = uint8_t;
std::vector<data_type> D;
auto [res, n, m] = read_csv(argv[1], D);
if (res == false) {
std::cout << "error: unable to read input data" << std::endl;
return -1;
}
std::cerr << "n=" << n << ", m=" << m << std::endl;
std::vector<std::pair<set_type, set_type>> Q;
if (!read_queries(argv[2], Q)) {
std::cout << "error: unable to read queries" << std::endl;
return -1;
}
std::cerr << "|Q|=" << Q.size() << std::endl;
// CREATE ENGINE
std::cout << "building engine..." << std::endl;
//ADTree<N, data_type> eng(16, n, m, D);
//eng.build_tree();
BVCounter<N> eng = create_BVCounter<N>(n, m, std::begin(D));
// RUN TEST
std::cout << "running queries..." << std::endl;
std::vector<estimator> F(1);
const int R = 5;
for (const auto& q : Q) {
auto t0 = std::chrono::steady_clock::now();
for (int i = 0; i < R; ++i) eng.apply(q.first, q.second, F);
auto t1 = std::chrono::steady_clock::now();
std::cout << (static_cast<double>(std::chrono::duration_cast<std::chrono::microseconds>(t1 - t0).count()) / R) << std::endl;
}
return 0;
} // main
#include <algorithm>
#include <iostream>
#include <vector>
#include <tuple>
#include "bit_util.hpp"
#include "csv.hpp"
#include "ADTree.hpp"
#include "BLANK.hpp"
#include "BLANKEngine.hpp"
#include "MDL.hpp"
#include "MDLEngine.hpp"
#include "BDeu.hpp"
#include "BDeuEngine.hpp"
#include "BVCounter.hpp"
int main(int argc, char* argv[]) {
using data_type = signed char;
using set_type = uint_type<1>;
int n = -1;
int m = -1;
std::vector<data_type> D;
bool st;
std::tie(st, n, m) = read_csv(argv[1], D);
if (!st) {
std::cout << "can't read the file" << std::endl;
return -1;
}
double alpha = 1.0;
set_type pa = set_empty<set_type>();
pa = set_add(pa, 1);
set_type ch = set_empty<set_type>();
ch = set_add(ch, 0);
ch = set_add(ch, 2);
ADTree<1, data_type> adt(0, n, m, D);
std::cout << "min leaf: " << 0 << " n: " << n << " m: " << m << " Dsize: " << D.size() << std::endl;
adt.build_tree();
BVCounter<1> bvc = create_BVCounter<1>(n, m, std::begin(D));
std::vector<MDL> v_mdl1(set_size(ch), MDL(m));
std::vector<MDL> v_mdl2(set_size(ch), MDL(m));
adt.apply(ch, pa, v_mdl1);
bvc.apply(ch, pa, v_mdl2);
for (int idx = 0; idx < set_size(ch); ++idx) {
std::cout << "idx: " << idx << std::endl;
std::cout << " adt " << v_mdl1[idx].score().first << std::endl;
std::cout << " bvc " << v_mdl2[idx].score().first << std::endl;
}
return 0;
}