Commit 4854161f authored by Christophe Gonzales's avatar Christophe Gonzales

added the possibility to have several translators parsing the same database in…

added the possibility to have several translators parsing the same database in DatabaseTable + the possibility to add new columns in DatabaseTables even when they contain records
parent 805032a1
Pipeline #23714190 failed with stages
in 91 minutes 45 seconds
......@@ -350,6 +350,9 @@ namespace gum {
/// indicates whether a translated value corresponds to a missing value
bool isMissingValue(const DBTranslatedValue& val) const;
/// returns the translation of a missing value
virtual DBTranslatedValue missingValue () const = 0;
/// @}
......
......@@ -366,6 +366,9 @@ namespace gum {
/// returns the variable stored into the translator
virtual const IContinuousVariable* variable() const final;
/// returns the translation of a missing value
virtual DBTranslatedValue missingValue () const final;
/// @}
......
......@@ -543,6 +543,14 @@ namespace gum {
return __real_variable;
}
/// returns the translation of a missing value
template < template < typename > class ALLOC >
INLINE DBTranslatedValue
DBTranslator4ContinuousVariable< ALLOC >::missingValue () const {
return DBTranslatedValue{std::numeric_limits< float >::max()};
}
} /* namespace learning */
......
......@@ -315,6 +315,9 @@ namespace gum {
/// returns the variable stored into the translator
virtual const IDiscretizedVariable* variable() const final;
/// returns the translation of a missing value
virtual DBTranslatedValue missingValue () const final;
/// @}
......
......@@ -458,6 +458,14 @@ namespace gum {
}
/// returns the translation of a missing value
template < template < typename > class ALLOC >
INLINE DBTranslatedValue
DBTranslator4DiscretizedVariable< ALLOC >::missingValue () const {
return DBTranslatedValue{std::numeric_limits< std::size_t >::max()};
}
} /* namespace learning */
} /* namespace gum */
......
......@@ -370,6 +370,9 @@ namespace gum {
/// returns the variable stored into the translator
virtual const LabelizedVariable* variable() const final;
/// returns the translation of a missing value
virtual DBTranslatedValue missingValue () const final;
/// @}
#ifndef DOXYGEN_SHOULD_SKIP_THIS
......
......@@ -414,6 +414,14 @@ namespace gum {
return &__variable;
}
/// returns the translation of a missing value
template < template < typename > class ALLOC >
INLINE DBTranslatedValue
DBTranslator4LabelizedVariable< ALLOC >::missingValue () const {
return DBTranslatedValue{std::numeric_limits< std::size_t >::max()};
}
} /* namespace learning */
......
......@@ -366,6 +366,9 @@ namespace gum {
/// returns the variable stored into the translator
virtual const RangeVariable* variable() const final;
/// returns the translation of a missing value
virtual DBTranslatedValue missingValue () const final;
/// @}
......
......@@ -539,6 +539,14 @@ namespace gum {
return &__variable;
}
/// returns the translation of a missing value
template < template < typename > class ALLOC >
INLINE DBTranslatedValue
DBTranslator4RangeVariable< ALLOC >::missingValue () const {
return DBTranslatedValue{std::numeric_limits< std::size_t >::max()};
}
} /* namespace learning */
......
......@@ -181,60 +181,60 @@ namespace gum {
/// @{
/// inserts a new translator in the translator set
/// inserts a new translator at the end of the translator set
/** @param translator a translator that will be copied into the
* translator set
* @param column the index of the column that this new translator should
* read in the database.
* @param unique_column indicates whether the column can be read by
* several translators.
* @return the position of the translator within the translator set.
* @warning Translators are order by increasing column within the
* translator set. As a consequence, their position may change when
* other translators are added.
* @throw DuplicateElement is raised if there already exists a translator
* reading the column passed in argument. */
* reading the column passed in argument and the unique_column
* argument is set to true. */
template < template < template < typename > class > class Translator >
std::size_t insertTranslator(const Translator< ALLOC >& translator,
const std::size_t column);
/// inserts a new translator for a given variable in the translator set
/** The first template parameter (GUM_SCALAR) is necessary only for
* inserting variables of true types DiscretizedVariable and
* ContinuousVariable, which depend on the GUM_SCALAR parameter type.
* However, usually, when you use this function, this is to add into
* the TranslatorSet the variables of a BayesNet<GUM_SCALAR>. As such, you
* can safely call insert all the variables of this Bayesian network
* using inertTranslator<GUM_SCALAR> ( bn.variable() ... ) instructions.
const std::size_t column,
const bool unique_column=true);
/** @brief inserts a new translator for a given variable at the end of
* the translator set
*
* @param var the variable that will be contained into the translator
* @param column the index of the column that this new translator should
* read in the database.
* @param missing_symbols the set of symbols in the database
* representing missing values
* @param unique_column indicates whether the column can be read by
* several translators.
* @throw DuplicateElement is raised if there already exists a translator
* reading the column passed in argument.
* reading the column passed in argument and the unique_column
* argument is set to true.
*/
template < template < typename > class XALLOC >
std::size_t insertTranslator(
const Variable& var,
const std::size_t column,
const std::vector< std::string, XALLOC< std::string > >& missing_symbols);
/// inserts a new translator for a given variable in the translator set
/** The first template parameter (GUM_SCALAR) is necessary only for
* inserting variables of true types DiscretizedVariable and
* ContinuousVariable, which depend on the GUM_SCALAR parameter type.
* However, usually, when you use this function, this is to add into
* the TranslatorSet the variables of a BayesNet<GUM_SCALAR>. As such, you
* can safely call insert all the variables of this Bayesian network
* using inertTranslator<GUM_SCALAR> ( bn.variable() ... ) instructions.
const std::vector< std::string, XALLOC< std::string > >& missing_symbols,
const bool unique_column=true);
/** @brief inserts a new translator for a given variable at the end of
* the translator set
*
* @param var the variable that will be contained into the translator
* @param column the index of the column that this new translator should
* read in the database.
* @param unique_column indicates whether the column can be read by
* several translators.
* @throw DuplicateElement is raised if there already exists a translator
* reading the column passed in argument.
* reading the column passed in argumentt and the unique_column
* argument is set to true.
*/
std::size_t insertTranslator(const Variable& var, const std::size_t column);
std::size_t insertTranslator(const Variable& var,
const std::size_t column,
const bool unique_column=true);
/** @brief erases either the kth translator or that parsing the kth
/** @brief erases either the kth translator or those parsing the kth
* column of the input database
*
* DBTranslatorSets do not necessarily read all the columns of their
......@@ -243,11 +243,13 @@ namespace gum {
* and 5 respectively. When k_is_input_col is set to false, Parameter k
* passed in argument corresponds to either 0 or 1, i.e., to the index of
* one of the two translators stored into the DBTranslatorSet. When
* k_is_input_col is set to true, the translator to be erased is the one
* that parses the kth column of the input database.
* k_is_input_col is set to true, the translators to be erased are the ones
* that parse the kth column of the input database (when several
* translators parse the column k, all of them are removed).
* @warning if the translator does not exists, nothing is done. In
* particular, no exception is raised. */
void eraseTranslator(const std::size_t k, const bool k_is_input_col = false);
void eraseTranslator(const std::size_t k,
const bool k_is_input_col = false);
/// returns the kth translator
/** @warning this method assumes that there are at least k translators.
......
......@@ -211,48 +211,39 @@ namespace gum {
template < template < typename > class ALLOC >
template < template < template < typename > class > class Translator >
std::size_t DBTranslatorSet< ALLOC >::insertTranslator(
const Translator< ALLOC >& translator, const std::size_t column) {
// find where the new_translator should be inserted so that the input
// columns are sorted in increasing order. In addition,
// if there already exists a translator for this column, raise an
const Translator< ALLOC >& translator,
const std::size_t column,
const bool unique_column) {
// if the unique_column parameter is set to true and there exists already
// another translator that parses the column, raise a DuplicateElement
// exception
std::size_t i;
const std::size_t size = __translators.size();
for (i = std::size_t(0); i < size; ++i) {
if (__columns[i] >= column) {
if ( unique_column ) {
for (std::size_t i = std::size_t(0); i < size; ++i) {
if (__columns[i] == column)
GUM_ERROR(DuplicateElement, "the translator already exists");
break;
GUM_ERROR(DuplicateElement,
"There already exists a DBTranslator that parses Column"
<< column);
}
}
// reserve some place for the new translator
__translators.reserve(size + 1);
__columns.reserve(size + 1);
// create the new translator
// create and add the new translator
ALLOC< DBTranslator< ALLOC > > allocator(this->getAllocator());
DBTranslator< ALLOC >* new_translator = translator.clone(allocator);
// reserve some place for the new translator
try {
__translators.reserve(size + 1);
__columns.reserve(size + 1);
} catch (...) {
allocator.destroy(new_translator);
allocator.deallocate(new_translator, 1);
throw;
}
__translators.resize(size + 1);
__columns.resize(size + 1);
__translators[size] = new_translator;
__columns[size] = column;
// update the set of translators and columns
for (std::size_t j = size; j > i; --j) {
__translators[j] = __translators[j - 1];
__columns[j] = __columns[j - 1];
}
__translators[i] = new_translator;
__columns[i] = column;
// update the highest column
if (column > __highest_column) __highest_column = column;
return i;
return size;
}
......@@ -262,7 +253,8 @@ namespace gum {
std::size_t DBTranslatorSet< ALLOC >::insertTranslator(
const Variable& var,
const std::size_t column,
const std::vector< std::string, XALLOC< std::string > >& missing_symbols) {
const std::vector< std::string, XALLOC< std::string > >& missing_symbols,
const bool unique_column) {
// create the translatator, depending on the type of the variable
switch (var.varType()) {
case VarType::Labelized: {
......@@ -270,7 +262,7 @@ namespace gum {
static_cast< const LabelizedVariable& >(var);
DBTranslator4LabelizedVariable< ALLOC > translator(xvar,
missing_symbols);
return insertTranslator(translator, column);
return insertTranslator(translator, column, unique_column);
}
case VarType::Discretized: {
......@@ -278,13 +270,13 @@ namespace gum {
static_cast< const IDiscretizedVariable& >(var);
DBTranslator4DiscretizedVariable< ALLOC > translator(xvar,
missing_symbols);
return insertTranslator(translator, column);
return insertTranslator(translator, column, unique_column);
}
case VarType::Range: {
const RangeVariable& xvar = static_cast< const RangeVariable& >(var);
DBTranslator4RangeVariable< ALLOC > translator(xvar, missing_symbols);
return insertTranslator(translator, column);
return insertTranslator(translator, column, unique_column);
}
case VarType::Continuous: {
......@@ -292,10 +284,13 @@ namespace gum {
static_cast< const IContinuousVariable& >(var);
DBTranslator4ContinuousVariable< ALLOC > translator(xvar,
missing_symbols);
return insertTranslator(translator, column);
return insertTranslator(translator, column, unique_column);
}
default: GUM_ERROR(NotImplementedYet, "not implemented yet");
default: GUM_ERROR(NotImplementedYet,
"The insertion of the translator for Variable " <<
var.name() << " is impossible because a translator "
"for such variable is not implemented yet");
}
}
......@@ -303,10 +298,11 @@ namespace gum {
/// inserts a new translator for a given variable in the translator set
template < template < typename > class ALLOC >
INLINE std::size_t
DBTranslatorSet< ALLOC >::insertTranslator(const Variable& var,
const std::size_t column) {
DBTranslatorSet< ALLOC >::insertTranslator(const Variable& var,
const std::size_t column,
const bool unique_column) {
const std::vector< std::string, ALLOC< std::string > > missing;
return this->insertTranslator(var, column, missing);
return this->insertTranslator(var, column, missing, unique_column);
}
......@@ -314,35 +310,55 @@ namespace gum {
template < template < typename > class ALLOC >
void DBTranslatorSet< ALLOC >::eraseTranslator(const std::size_t k,
const bool k_is_input_col) {
// find the position of the translator that should be erased
ALLOC< DBTranslator< ALLOC > > allocator(this->getAllocator());
const std::size_t nb_trans = __translators.size();
std::size_t kk = k; // kk = the position of the translator
if (k_is_input_col) {
kk = nb_trans + 1; // assign to kk an erroneous value
for (std::size_t i = std::size_t(0); i < nb_trans; ++i) {
if (__columns[i] == k) {
kk = i;
break;
if ( ! k_is_input_col ) {
if ( nb_trans < k ) return;
// remove the translator and its corresponding column
allocator.destroy(__translators[k]);
allocator.deallocate(__translators[k], 1);
const std::size_t colk = __columns[k];
__translators.erase(__translators.begin() + k);
__columns.erase(__columns.begin() + k);
// if the highest column index corresponded to the kth translator,
// we must recomput it
if ( __highest_column == colk ) {
__highest_column = std::size_t(0);
for ( const auto col : __columns )
if ( __highest_column < col )
__highest_column = col;
}
}
else {
// remove all the translators parsing the kth column
auto iter_trans = __translators.rbegin ();
bool translator_found = false;
for ( auto iter_col = __columns.rbegin();
iter_col != __columns.rend(); ++iter_col, ++iter_trans ) {
if ( *iter_col == k ) {
// remove the translator and its corresponding column
allocator.destroy( *iter_trans );
allocator.deallocate( *iter_trans, 1);
__translators.erase( (iter_trans+1).base() );
__columns.erase( (iter_col+1).base() );
translator_found = true;
}
}
// if the highest column index corresponded to one of the translators
// removed, we must recompute it
if ( translator_found && ( k == __highest_column ) ) {
__highest_column = std::size_t(0);
for ( const auto col : __columns )
if ( __highest_column < col )
__highest_column = col;
}
}
if (nb_trans <= kk) return;
// remove the translator and its corresponding column
ALLOC< DBTranslator< ALLOC > > allocator(this->getAllocator());
allocator.destroy(__translators[kk]);
allocator.deallocate(__translators[kk], 1);
__translators.erase(__translators.begin() + kk);
__columns.erase(__columns.begin() + kk);
// if the highest column index corresponded to the kth translator,
// we must recomput it
if (!__columns.empty())
__highest_column = *(__columns.rbegin());
else
__highest_column = std::size_t(0);
}
......
......@@ -291,8 +291,9 @@ namespace gum {
}
if (end > __row->size()) {
GUM_ERROR(SizeError,
"the database has fewer rows than the upper range "
"specified to the handler");
"the database has fewer rows (" << __row->size() <<
") than the upper range (" << end <<
") specified to the handler");
}
__begin_index = begin;
......@@ -760,13 +761,13 @@ namespace gum {
this->setVariableNames(variable_names, from_external_object);
}
/// returns the name of the kth column of the database
template < typename T_DATA, template < typename > class ALLOC >
INLINE const std::string&
IDatabaseTable< T_DATA, ALLOC >::variableName(const std::size_t k) const {
IDatabaseTable< T_DATA, ALLOC >::variableName(const std::size_t k) const {
if (_variable_names.size() <= k)
GUM_ERROR(OutOfBounds, "the database does not contain this column");
GUM_ERROR(OutOfBounds, "the database does not contain Column #" << k);
return _variable_names[k];
}
......@@ -780,7 +781,26 @@ namespace gum {
if (_variable_names[i] == name) return i;
GUM_ERROR(UndefinedElement,
"the database contains no column with this name");
"the database contains no column whose name is " << name );
}
/// returns the indices of the columns whose name is passed in argument
template < typename T_DATA, template < typename > class ALLOC >
INLINE
typename IDatabaseTable< T_DATA, ALLOC >:: template DBVector<std::size_t>
IDatabaseTable< T_DATA, ALLOC >::columnsFromVariableName(
const std::string& name) const {
const std::size_t size = _variable_names.size();
DBVector<std::size_t> cols;
for (std::size_t i = 0; i < size; ++i)
if (_variable_names[i] == name) cols.push_back ( i );
if ( cols.empty () )
GUM_ERROR(UndefinedElement,
"the database contains no column whose name is " << name );
return cols;
}
......@@ -799,6 +819,21 @@ namespace gum {
}
// returns the number of records in the database
template < typename T_DATA, template < typename > class ALLOC >
INLINE std::size_t
IDatabaseTable< T_DATA, ALLOC >::nbRows() const noexcept {
return __data.size();
}
// indicates whether the database contains some records or not
template < typename T_DATA, template < typename > class ALLOC >
INLINE bool IDatabaseTable< T_DATA, ALLOC >::empty () const noexcept {
return __data.empty();
}
// update the handlers when the size of the database changes
template < typename T_DATA, template < typename > class ALLOC >
void IDatabaseTable< T_DATA, ALLOC >::__updateHandlers(
......@@ -884,8 +919,9 @@ namespace gum {
// check that the size of the row is the same as the rest of the database
if (!_isRowSizeOK(new_row.size()))
GUM_ERROR(SizeError,
"the new row has not the same size as the "
"rest of the database");
"the new row is of size " << new_row.size() <<
", which is different from the number of columns " <<
"of the database, i.e., " << _variable_names.size() );
__updateHandlers(__data.size() + 1);
__data.push_back(std::move(new_row));
......@@ -919,14 +955,21 @@ namespace gum {
rows_have_missing_vals) {
if (new_rows.empty()) return;
// check that the missing values indicators vector has the same size
// as the new rows
if ( rows_have_missing_vals.size () != new_rows.size () )
GUM_ERROR(SizeError,
"the number of new rows (i.e., " << new_rows.size () <<
") is different from the number of missing values indicators ("
<< rows_have_missing_vals.size () );
// check that all the rows have the same size
const std::size_t new_size = new_rows[0].size();
for (const auto& row : new_rows) {
if (row.size() != new_size) {
GUM_ERROR(SizeError,
"all the new rows do not have the same "
"nunber of columns");
"all the new rows do not have the same number of columns");
}
}
......@@ -935,8 +978,9 @@ namespace gum {
std::size_t db_size = __data.size();
if (!_isRowSizeOK(new_size)) {
GUM_ERROR(SizeError,
"the new rows have not the same size as the "
"number of columns in the database");
"the new rows have " << new_size <<
" columns, which is different from the number of columns " <<
"of the database, i.e., " << _variable_names.size() );
}
std::size_t nb_new_rows = new_rows.size();
......@@ -971,14 +1015,21 @@ namespace gum {
rows_have_missing_vals) {
if (new_rows.empty()) return;
// check that the missing values indicators vector has the same size
// as the new rows
if ( rows_have_missing_vals.size () != new_rows.size () )
GUM_ERROR(SizeError,
"the number of new rows (i.e., " << new_rows.size () <<
") is different from the number of missing values indicators ("
<< rows_have_missing_vals.size () );
// check that all the rows have the same size
const std::size_t new_size = new_rows[0].size();
for (const auto& row : new_rows) {
if (row.size() != new_size) {
GUM_ERROR(SizeError,
"all the new rows do not have the same "
"nunber of columns");
"all the new rows do not have the same number of columns");
}
}
......@@ -988,8 +1039,9 @@ namespace gum {
if (!_isRowSizeOK(new_size)) {
GUM_ERROR(SizeError,
"the new rows have not the same size as the "
"number of columns in the database");
"the new rows have " << new_size <<
" columns, which is different from the number of columns " <<
"of the database, i.e., " << _variable_names.size() );
}
std::size_t nb_new_rows = new_rows.size();
......
This diff is collapsed.
......@@ -173,11 +173,16 @@ namespace gum {
const std::size_t size = names.size();
const std::size_t ignored_cols_size = __ignored_cols.size();
const auto& data = IDatabaseTable< DBCell, ALLOC >::_content();
if (!from_external_object || !ignored_cols_size) {
if (data.empty() || (size == data[0].size())) {
this->_variable_names = names;
return;
}
else {
GUM_ERROR(SizeError,
"the number of variable's names (i.e., " << size <<
") does not correspond to the number of columns of the " <<
"raw database table (i.e.," << data[0].size() << ")");
}
} else {
// check that the size of the names vector (after removing the ignored
......@@ -208,11 +213,14 @@ namespace gum {
this->_variable_names = std::move(new_names);
return;
}
else {
GUM_ERROR(SizeError,
"the number of variable's names excluding the ignored " <<
"columns (i.e., " << (size - ignored_size) <<
") does not correspond to the number of columns of the " <<
"raw database table (i.e.," << data[0].size() << ")");
}
}
GUM_ERROR(SizeError,
"the number of variable's names does not "
"correspond to the number of columns of the database");
}
......@@ -362,10 +370,12 @@ namespace gum {
ignored_size = ignored_cols_size - ignored_size;
}
if (!this->_isRowSizeOK(row_size - ignored_size))
if (!this->_isRowSizeOK(row_size - ignored_size)) {
GUM_ERROR(SizeError,
"the new row has not the same size as the "
"rest of the database");
"the new row has " << (row_size - ignored_size) <<
" elements whereas the raw database table has " <<
this->_variable_names.size() << " columns" );
}
// create the dbrow that will contain the new data
Row< DBCell > dbrow;
......
......@@ -39,6 +39,10 @@ namespace gum_tests {
TS_ASSERT(
std::stof(translator.translateBack(gum::learning::DBTranslatedValue{3.0f}))
== 3);
TS_ASSERT(translator.missingValue().cont_val ==
std::numeric_limits< float >::max() );
TS_GUM_ASSERT_THROWS_NOTHING(translator.translate("5"));
TS_ASSERT(translator.translate("4.22").cont_val == 4.22f);
TS_ASSERT(translator.translate("-5.34").cont_val == -5.34f);
......
......@@ -46,6 +46,9 @@ namespace gum_tests {
TS_ASSERT_THROWS(translator.translate("11"), gum::UnknownLabelInDatabase);
TS_ASSERT_THROWS(translator.translate("aaa"), gum::TypeError);
TS_ASSERT(translator.missingValue().discr_val ==
std::numeric_limits< std::size_t >::max() );
TS_GUM_ASSERT_THROWS_NOTHING(translator.translate("7"));
TS_ASSERT(translator.translate("10").discr_val == 1);
TS_ASSERT(translator.translate("9").discr_val == 1);
......
......@@ -37,6 +37,9 @@ namespace gum_tests {
TS_ASSERT(translator.translate("toto").discr_val == 0);
TS_ASSERT(translator.translate("toto").discr_val == 0);
TS_ASSERT(translator.missingValue().discr_val ==
std::numeric_limits< std::size_t >::max()