Commit b8594ff3 authored by Christophe Gonzales's avatar Christophe Gonzales

fixed numerical instability in 3off2

parent 883ce108
Pipeline #20875587 passed with stages
in 54 minutes and 18 seconds
......@@ -512,9 +512,9 @@ namespace gum {
xlabels.reserve ( size );
bool modifications = false;
for ( std::size_t i = std::size_t(0); i < size; ++i ) {
const std::size_t new_val = this->_back_dico.first( labels[i] );
xlabels.push_back ( std::make_pair ( new_val, labels[i] ) );
if ( new_val != i ) modifications = true;
const std::size_t old_val = this->_back_dico.first( labels[i] );
xlabels.push_back ( std::make_pair ( old_val, labels[i] ) );
if ( old_val != i ) modifications = true;
}
......
......@@ -568,19 +568,21 @@ namespace gum {
auto updates = __translators.reorder ( kk );
if ( updates.empty () ) return;
const std::size_t size = updates.size ();
std::size_t size = updates.size ();
std::vector<std::size_t,ALLOC<std::size_t>> new_values ( size );
for ( const auto& update : updates ) {
if ( update.first >= size )
new_values.resize ( update.first + 1 );
if ( update.first >= size ) {
size = update.first + 1;
new_values.resize ( size );
}
new_values[update.first] = update.second;
}
// apply the translations
//auto nb_threads = thread::getMaxNumberOfThreads();
for ( auto& xrow : this->_content () ) {
auto& elt = xrow.row()[kk].discr_val;
if ( elt != std::numeric_limits<std::size_t>::max () )
for ( auto& row : this->_content () ) {
auto& elt = row[kk].discr_val;
if ( elt != std::numeric_limits<std::size_t>::max () )
elt = new_values[elt];
}
}
......
......@@ -224,6 +224,8 @@ namespace gum {
/// variables modalities
std::vector< Size > __modalities;
const double __threshold { 1e-10 };
};
} /* namespace learning */
......
......@@ -249,12 +249,30 @@ namespace gum {
id_yz = _H.addNodeSet(vec_yz);
id_xyz = _H.addNodeSet(vec_xyz);
double Hz = _H.score(id_z);
double Hxz = _H.score(id_xz);
double Hyz = _H.score(id_yz);
double Hxyz = _H.score(id_xyz);
const double Hz = _H.score(id_z);
const double Hxz = _H.score(id_xz);
const double Hyz = _H.score(id_yz);
const double Hxyz = _H.score(id_xyz);
double Hxz_Hyz = Hxz + Hyz;
double Hz_Hxyz = Hz + Hxyz;
// avoid numeric instability due to rounding errors
double ratio = 1;
if ( Hxz_Hyz > 0 ) {
ratio = ( Hxz_Hyz - Hz_Hxyz ) / Hxz_Hyz;
}
else if ( Hz_Hxyz > 0 ) {
ratio = ( Hxz_Hyz - Hz_Hxyz ) / Hz_Hxyz;
}
if ( ratio < 0 ) ratio = -ratio;
if ( ratio < __threshold ) {
Hz_Hxyz = Hxz_Hyz; // ensure that the score is equal to 0
//std::cout << Hxz_Hyz << " " << Hz_Hxyz << " " << ratio << " => ";
}
score = Hxz + Hyz - Hz - Hxyz;
score = Hxz_Hyz - Hz_Hxyz;
//std::cout << score << std::endl;
// shall we put the score into the cache?
if (this->_isUsingCache()) {
......@@ -282,11 +300,28 @@ namespace gum {
id_y = _H.addNodeSet(var2);
id_xy = _H.addNodeSet(var1, var2);
double Hx = _H.score(id_x);
double Hy = _H.score(id_y);
double Hxy = _H.score(id_xy);
const double Hx = _H.score(id_x);
const double Hy = _H.score(id_y);
const double Hxy = _H.score(id_xy);
score = Hx + Hy - Hxy;
double Hx_Hy = Hx + Hy;
// avoid numeric instability due to rounding errors
double ratio = 1;
if ( Hx_Hy > 0 ) {
ratio = ( Hx_Hy - Hxy ) / Hx_Hy;
}
else if ( Hxy > 0 ) {
ratio = ( Hx_Hy - Hxy ) / Hxy;
}
if ( ratio < 0 ) ratio = -ratio;
if ( ratio < __threshold ) {
Hx_Hy = Hxy; // ensure that the score is equal to 0
//std::cout << Hx_Hy << " " << Hxy << " " << ratio << " => ";
}
score = Hx_Hy - Hxy;
//std::cout << score << std::endl;
// shall we put the score into the cache?
if (this->_isUsingCache()) {
......@@ -400,17 +435,38 @@ namespace gum {
id_zyui = _H.addNodeSet(vec_zyui);
id_xyzui = _H.addNodeSet(vec_xyzui);
double Hui = _H.score(id_ui);
double Hxui = _H.score(id_xui);
double Hyui = _H.score(id_yui);
double Hzui = _H.score(id_zui);
double Hxyui = _H.score(id_xyui);
double Hxzui = _H.score(id_xzui);
double Hzyui = _H.score(id_zyui);
double Hxyzui = _H.score(id_xyzui);
score = Hxyzui - Hxyui - Hxzui - Hzyui + Hxui + Hyui + Hzui - Hui;
const double Hui = _H.score(id_ui);
const double Hxui = _H.score(id_xui);
const double Hyui = _H.score(id_yui);
const double Hzui = _H.score(id_zui);
const double Hxyui = _H.score(id_xyui);
const double Hxzui = _H.score(id_xzui);
const double Hzyui = _H.score(id_zyui);
const double Hxyzui = _H.score(id_xyzui);
double Hxyzui_Hxui_Hyui_Hzui = Hxyzui + Hxui + Hyui + Hzui;
double Hxyui_Hxzui_Hzyui_Hui = Hxyui + Hxzui + Hzyui + Hui;
// avoid numeric instability due to rounding errors
double ratio = 1;
if ( Hxyzui_Hxui_Hyui_Hzui > 0 ) {
ratio = ( Hxyzui_Hxui_Hyui_Hzui - Hxyui_Hxzui_Hzyui_Hui ) /
Hxyzui_Hxui_Hyui_Hzui;
}
else if ( Hxyui_Hxzui_Hzyui_Hui > 0 ) {
ratio = ( Hxyzui_Hxui_Hyui_Hzui - Hxyui_Hxzui_Hzyui_Hui ) /
Hxyui_Hxzui_Hzyui_Hui;
}
if ( ratio < 0 ) ratio = -ratio;
if ( ratio < __threshold ) {
// ensure that the score is equal to 0
Hxyzui_Hxui_Hyui_Hzui = Hxyui_Hxzui_Hzyui_Hui;
//std::cout << Hx_Hy << " " << Hxy << " " << ratio << " => ";
}
score = Hxyzui_Hxui_Hyui_Hzui - Hxyui_Hxzui_Hzyui_Hui;
//std::cout << score << std::endl;
// shall we put the score into the cache?
if (this->_isUsingCache()) {
this->_insertIntoCache(var1, var2, var3, conditioning_ids, score);
......@@ -441,16 +497,34 @@ namespace gum {
id_zy = _H.addNodeSet(var3, var2);
id_xyz = _H.addNodeSet(std::vector< Idx >{var1, var2, var3});
double Hx = _H.score(id_x);
double Hy = _H.score(id_y);
double Hz = _H.score(id_z);
double Hxy = _H.score(id_xy);
double Hxz = _H.score(id_xz);
double Hzy = _H.score(id_zy);
double Hxyz = _H.score(id_xyz);
score = Hx + Hy + Hz - Hxy - Hxz - Hzy + Hxyz;
const double Hx = _H.score(id_x);
const double Hy = _H.score(id_y);
const double Hz = _H.score(id_z);
const double Hxy = _H.score(id_xy);
const double Hxz = _H.score(id_xz);
const double Hzy = _H.score(id_zy);
const double Hxyz = _H.score(id_xyz);
double Hx_Hy_Hz_Hxyz = Hx + Hy + Hz + Hxyz;
double Hxy_Hxz_Hzy = Hxy + Hxz + Hzy;
// avoid numeric instability due to rounding errors
double ratio = 1;
if ( Hx_Hy_Hz_Hxyz > 0 ) {
ratio = ( Hx_Hy_Hz_Hxyz - Hxy_Hxz_Hzy ) / Hx_Hy_Hz_Hxyz;
}
else if ( Hxy_Hxz_Hzy > 0 ) {
ratio = ( Hx_Hy_Hz_Hxyz - Hxy_Hxz_Hzy ) / Hxy_Hxz_Hzy;
}
if ( ratio < 0 ) ratio = -ratio;
if ( ratio < __threshold ) {
Hx_Hy_Hz_Hxyz = Hxy_Hxz_Hzy; // ensure that the score is equal to 0
//std::cout << Hx_Hy << " " << Hxy << " " << ratio << " => ";
}
score = Hx_Hy_Hz_Hxyz - Hxy_Hxz_Hzy;
//std::cout << score << std::endl;
// shall we put the score into the cache?
if (this->_isUsingCache()) {
......
......@@ -87,7 +87,7 @@ namespace gum {
/// returns the score corresponding to a given nodeset
double score(Idx nodeset_index);
double score(Idx nodeset_index );
/// sets the range of records taken into account by the counter
/** @param min_range he number of the first record to be taken into
......
......@@ -102,7 +102,7 @@ namespace gum {
}
template < typename IdSetAlloc, typename CountAlloc >
double PartialEntropy< IdSetAlloc, CountAlloc >::score(Idx nodeset_index) {
double PartialEntropy< IdSetAlloc, CountAlloc >::score(Idx nodeset_index ) {
// if the score has already been computed, get its value
if (this->_isInCache(nodeset_index)) {
return this->_cachedScore(nodeset_index);
......@@ -121,12 +121,24 @@ namespace gum {
}
}
/*
for (Idx zyx = 0; zyx < ZXY_size; ++zyx) {
if (Nzyx[zyx]) {
// score -= Nzyx[zyx] / this->__N * log2(Nzyx[zyx] / this->__N);
score -= Nzyx[zyx] / this->__N * log(Nzyx[zyx] / this->__N);
score -= ( Nzyx[zyx] / this->__N ) * log(Nzyx[zyx] / this->__N);
}
}
*/
// score = -sum_xyz ( Nzyx[zyx] / this->__N ) * log(Nzyx[zyx] / this->__N)
for (Idx zyx = 0; zyx < ZXY_size; ++zyx) {
if (Nzyx[zyx]) {
// score -= Nzyx[zyx] / this->__N * log2(Nzyx[zyx] / this->__N);
score -= Nzyx[zyx] * log(Nzyx[zyx]);
}
}
score /= this->__N;
score += log ( this->__N );
// shall we put the score into the cache?
if (this->_isUsingCache()) {
......
......@@ -58,14 +58,16 @@ namespace gum_tests {
database.setVariableNames( initializer.variableNames () );
initializer.fillDatabase ( database );
//database.reorder ();
database.reorder ();
gum::learning::DBRowGeneratorSet<> genset;
gum::learning::DBRowGeneratorParser<>
parser ( database.handler (), genset );
std::vector< gum::Size > modalities(nb_vars, 2);
std::vector< gum::Size > modalities;
for ( auto dom : database.domainSizes () )
modalities.push_back ( dom );
gum::learning::CorrectedMutualInformation<> I(parser, modalities);
I.useNoCorr();
......@@ -81,7 +83,8 @@ namespace gum_tests {
}
}
graph = search.learnMixedStructure(I, graph);
TS_ASSERT_EQUALS(graph.arcs().size(), gum::Size(6));
TS_ASSERT_EQUALS(graph.arcs().size(), gum::Size(0));
}
void test_3off2_asia_MDLcorr() {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment