DOELibrary's capability to ignore undefined samples is impaired with MDAJacobi
Summary
The multi-threading used inside the MDAJacobi
class is designed to catch exceptions that happen inside threads with certain workers and then move on to the next worker in the queue. This mechanism is intended to allow for eventual exceptions in individual calculations that should not compromise the complete scenario run.
The problem with our current implementation is that the DOELibrary
class also includes a section that catches ValueError
exceptions for individual samples so that undefined points are ignored and the DOE does not crash. In the particular case of an MDAJacobi
, the exception is caught at the ParallelExecution
level of the MDA
but then the DOE
tries to recover the data from the MDA
and crashes because the values were never stored.
Gemseo version
3.2.1
Steps to reproduce
- Create a DOEScenario with disciplines that raise
ValueError
exceptions at certain values of the design variables. - Run the DOE using an
MDAJacobi
to solve the inner couplings.
What is the current bug behavior?
The scenario run crashes whenever it tries to evaluate a vector that is not defined for a discipline.
What is the expected correct behavior?
The exception should be caught both at the MDAJacobi
level and the DOELibrary
level, the undefined points should be detected and ignored by the algo and in the results.
Relevant logs and/or screenshots
Traceback (most recent call last):
File "C:/Users/gilberto.ruiz-j/Downloads/sellar_analytical.py", line 241, in <module>
scenario.execute({"n_samples": 30, "algo": "OT_OPT_LHS"})
File "c:\users\gilberto.ruiz-j\pycharmprojects\gemseo\src\gemseo\core\discipline.py", line 807, in execute
self._run()
File "c:\users\gilberto.ruiz-j\pycharmprojects\gemseo\src\gemseo\core\scenario.py", line 445, in _run
self._run_algorithm()
File "c:\users\gilberto.ruiz-j\pycharmprojects\gemseo\src\gemseo\core\doe_scenario.py", line 118, in _run_algorithm
self.optimization_result = lib.execute(self.formulation.opt_problem, **options)
File "c:\users\gilberto.ruiz-j\pycharmprojects\gemseo\src\gemseo\algos\driver_lib.py", line 353, in execute
result = self._run(**options)
File "c:\users\gilberto.ruiz-j\pycharmprojects\gemseo\src\gemseo\algos\doe\doe_lib.py", line 160, in _run
self.evaluate_samples(eval_jac, n_processes, wait_time_between_samples)
File "c:\users\gilberto.ruiz-j\pycharmprojects\gemseo\src\gemseo\algos\doe\doe_lib.py", line 391, in evaluate_samples
self.__evaluate_functions(sample)
File "c:\users\gilberto.ruiz-j\pycharmprojects\gemseo\src\gemseo\algos\doe\doe_lib.py", line 307, in __evaluate_functions
return self.problem.evaluate_functions(
File "c:\users\gilberto.ruiz-j\pycharmprojects\gemseo\src\gemseo\algos\opt_problem.py", line 926, in evaluate_functions
outputs[func.name] = func(func_inputs)
File "c:\users\gilberto.ruiz-j\pycharmprojects\gemseo\src\gemseo\core\mdofunctions\mdo_function.py", line 313, in __call__
val = self.evaluate(x_vect, self.force_real)
File "c:\users\gilberto.ruiz-j\pycharmprojects\gemseo\src\gemseo\core\mdofunctions\mdo_function.py", line 330, in evaluate
val = self.__counted_f(x_vect)
File "c:\users\gilberto.ruiz-j\pycharmprojects\gemseo\src\gemseo\core\mdofunctions\mdo_function.py", line 286, in __counted_f
val = self._func(x_vect)
File "c:\users\gilberto.ruiz-j\pycharmprojects\gemseo\src\gemseo\core\mdofunctions\norm_db_function.py", line 122, in _func
value = self.__orig_func(xn_vect)
File "c:\users\gilberto.ruiz-j\pycharmprojects\gemseo\src\gemseo\core\mdofunctions\mdo_function.py", line 313, in __call__
val = self.evaluate(x_vect, self.force_real)
File "c:\users\gilberto.ruiz-j\pycharmprojects\gemseo\src\gemseo\core\mdofunctions\mdo_function.py", line 330, in evaluate
val = self.__counted_f(x_vect)
File "c:\users\gilberto.ruiz-j\pycharmprojects\gemseo\src\gemseo\core\mdofunctions\mdo_function.py", line 286, in __counted_f
val = self._func(x_vect)
File "c:\users\gilberto.ruiz-j\pycharmprojects\gemseo\src\gemseo\core\mdofunctions\norm_function.py", line 93, in _func
return self.__orig_func(x_vect)
File "c:\users\gilberto.ruiz-j\pycharmprojects\gemseo\src\gemseo\core\mdofunctions\mdo_function.py", line 313, in __call__
val = self.evaluate(x_vect, self.force_real)
File "c:\users\gilberto.ruiz-j\pycharmprojects\gemseo\src\gemseo\core\mdofunctions\mdo_function.py", line 330, in evaluate
val = self.__counted_f(x_vect)
File "c:\users\gilberto.ruiz-j\pycharmprojects\gemseo\src\gemseo\core\mdofunctions\mdo_function.py", line 286, in __counted_f
val = self._func(x_vect)
File "c:\users\gilberto.ruiz-j\pycharmprojects\gemseo\src\gemseo\core\mdofunctions\function_from_discipline.py", line 120, in _func
obj_allx_val = self.__out_x_func(x_of_disc)
File "c:\users\gilberto.ruiz-j\pycharmprojects\gemseo\src\gemseo\core\mdofunctions\mdo_function.py", line 313, in __call__
val = self.evaluate(x_vect, self.force_real)
File "c:\users\gilberto.ruiz-j\pycharmprojects\gemseo\src\gemseo\core\mdofunctions\mdo_function.py", line 330, in evaluate
val = self.__counted_f(x_vect)
File "c:\users\gilberto.ruiz-j\pycharmprojects\gemseo\src\gemseo\core\mdofunctions\mdo_function.py", line 286, in __counted_f
val = self._func(x_vect)
File "c:\users\gilberto.ruiz-j\pycharmprojects\gemseo\src\gemseo\core\mdofunctions\make_function.py", line 104, in _func
computed_values = self.__mdo_function.discipline.execute(data)
File "c:\users\gilberto.ruiz-j\pycharmprojects\gemseo\src\gemseo\core\discipline.py", line 807, in execute
self._run()
File "c:\users\gilberto.ruiz-j\pycharmprojects\gemseo\src\gemseo\mda\jacobi.py", line 209, in _run
self.execute_all_disciplines(deepcopy(self.local_data))
File "c:\users\gilberto.ruiz-j\pycharmprojects\gemseo\src\gemseo\mda\jacobi.py", line 167, in execute_all_disciplines
outputs = [discipline.get_output_data() for discipline in self.disciplines]
File "c:\users\gilberto.ruiz-j\pycharmprojects\gemseo\src\gemseo\mda\jacobi.py", line 167, in <listcomp>
outputs = [discipline.get_output_data() for discipline in self.disciplines]
File "c:\users\gilberto.ruiz-j\pycharmprojects\gemseo\src\gemseo\core\discipline.py", line 1832, in get_output_data
(k, v) for k, v in self.local_data.items() if self.is_output_existing(k)
AttributeError: 'NoneType' object has no attribute 'items'
Process finished with exit code 1
Possible fixes
Add an option in ParallelExecution
to allow exceptions to be re-raised, then set it to True
when creating the multi-threading instance inside the MDAJacobi
class.
In parallel_execution.py
, lines 237-243:
# Retrieve outputs on the fly to call the callbacks, typically
# iterates progress bar and stores the data in database or cache
while got_n_outs != n_tasks:
index, output = queue_out.get()
if isinstance(output, Exception):
LOGGER.error("Failed to execute task indexed %s", str(index))
LOGGER.error(output)
if self.re_raise_exceptions:
raise output