.saveBN(...) -> .loadBN(...) round trip fails due to rounding in .toFast()
If I have a network with narrow bins in a DiscretizedVariable, the pickle process will create a BN that can't be loaded with loadBN. Here's some sample code:
import pyagrum as gum
import numpy as np
import tempfile, os, traceback
print(gum.__version__) # 2.2.1
X = gum.DiscretizedVariable("X", "X", [0.0, 100.000001, 100.000002, 200.0])
Y = gum.LabelizedVariable("Y", "Y", 2)
Y.changeLabel(0, "False")
Y.changeLabel(1, "True")
print(X.ticks()) # (0.0, 100.000001, 100.000002, 200.0)
print(X.toFast()) # Precision lost: X[0,100,100,200]
bn = gum.BayesNet()
nx = bn.add(X)
ny = bn.add(Y)
bn.addArc("Y", "X")
bn.cpt("Y").fillWith([0.5, 0.5])
bn.cpt("X").fillWith(1.0)
bn.cpt("X").normalizeAsCPT()
tmp = os.path.join(tempfile.gettempdir(), "tofast_roundtrip.pkl")
gum.saveBN(bn, tmp)
print("saved to", tmp)
# Will error out since `X[0,100,100,200]` looses a cut:
bn2 = gum.loadBN(tmp)
Here's the output:
2.2.1
(0.0, 100.000001, 100.000002, 200.0)
X[0,100,100,200]
saved to /var/folders/fh/xxvl5chx10g7ht__mkgq05t40000gn/T/tofast_roundtrip.pkl
---------------------------------------------------------------------------
SizeError Traceback (most recent call last)
/var/folders/fh/xxvl5chx10g7ht__mkgq05t40000gn/T/ipykernel_70042/2284630757.py in ?()
25 gum.saveBN(bn, tmp)
26 print("saved to", tmp)
27
28 # Will error out since `X[0,100,100,200]` looses a cut:
---> 29 bn2 = gum.loadBN(tmp)
~/.virtualenvs/bayes-validation-hH82x9Th/lib/python3.11/site-packages/pyagrum/pyagrum.py in ?(filename, listeners, verbose, **opts)
26381 warns = bn.loadUAI(filename, listeners)
26382 elif extension == "PKL":
26383 import pickle
26384 with open(filename, "rb") as f:
> 26385 bn = pickle.load(f)
26386 else:
26387 raise InvalidArgument("extension " + filename.split('.')
26388 [-1] + " unknown. Please use among " + availableBNExts())
~/.virtualenvs/bayes-validation-hH82x9Th/lib/python3.11/site-packages/pyagrum/pyagrum.py in ?(self, state)
13285 for father in state['parents'][son]:
13286 self.addArc(father,son)
13287 self.endTopologyTransformation()
13288 for node in state['cpt']:
> 13289 self.cpt(node).fillWith(state['cpt'][node])
...
7801
7802 return self
7803
SizeError: [pyAgrum] incorrect size: Sizes do not match : 4!=6
I believe this is because nodes are serialized using [self.variable(i).toFast() for i in self.nodes()], and the Fast format doesn't preserve full precision.
Edited by Christopher Eveland