Commit a8e522dd authored by Charles Vernerey's avatar Charles Vernerey
Browse files

Update examples

parent 288b216d
Loading
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -113,11 +113,11 @@ After running this bloc of code, we get the following message in the console:
```
List of closed itemsets for the dataset contextPasquier99 w.r.t. freq(x):
[3], freq=4
[2, 3, 5], freq=3
[2, 5], freq=4
[2, 3, 5], freq=3
[1, 3], freq=3
[1, 3, 4], freq=1
[1, 2, 3, 5], freq=2
[1, 3, 4], freq=1
```

We have 6 closed itemsets w.r.t. the frequency in the dataset `contextPasquier99`.
+16 −9
Original line number Diff line number Diff line
@@ -73,24 +73,31 @@ We give below an example of CP encoding for the Closed Itemset Mining problem us
TransactionalDatabase database = new DatReader("data/contextPasquier99.dat").read();
// Create the Choco model
Model model = new Model("Closed Itemset Mining");
/* Array of Boolean variables where x[i] == 1 represents the fact 
that i belongs to the itemset */
/* Array of Boolean variables where x[i] == 1 represents
the fact that i belongs to the itemset */
BoolVar[] x = model.boolVarArray("x", database.getNbItems());
/* Integer variable that represents the frequency of x 
with the bounds [1, nbTransactions] */
IntVar freq = model.intVar("freq", 1, database.getNbTransactions());
/* Integer variable that represents the length of x 
with the bounds [1, nbItems] */
// Integer variable that represents the length of x with the bounds [1, nbItems]
IntVar length = model.intVar("length", 1, database.getNbItems());
// Ensures that length = sum(x)
model.sum(x, "=", length).post();
// Ensures that freq = frequency(x)
model.post(new Constraint("Cover Size", new CoverSize(database, freq, x)));
ConstraintFactory.coverSize(database, freq, x).post();
// Ensures that x is a closed itemset
model.post(new Constraint("Cover Closure", new CoverClosure(database, x)));
ConstraintFactory.coverClosure(database, x).post();
Solver solver = model.getSolver();
// Variable heuristic : select item i such that freq(x U i) is minimal
// Value heuristic : instantiate it first to 0
solver.setSearch(Search.intVarSearch(
        new MinCov(model, database),
        new IntDomainMin(),
        x
));
// Create a list to store all the closed itemsets
List<Pattern> closedPatterns = new LinkedList<>();
while (model.getSolver().solve()) {
while (solver.solve()) {
    int[] itemset = IntStream.range(0, x.length)
            .filter(i -> x[i].getValue() == 1)
            .map(i -> database.getItems()[i])
@@ -109,8 +116,8 @@ for (Pattern closed : closedPatterns) {
The goal is to find all the closed itemsets with a minimum frequency of 1. We start by reading the transactional database using the method `read()` of the `DatReader` instance. Then, we create a model with Choco-solver. Variables `freq` and `length` are created to store respectively the frequency and the length of the itemset. A boolean array of variables `x` represents the itemset, where `x[i] = 1` indicates that item `i` belongs to the itemset. Finally, we post three constraints:

- `model.sum(x, "=", length).post()`: ensures that $length = \sum x$.
- `model.post(new Constraint("Cover Size", new CoverSize(database, freq, x)))`: ensures that $freq = freq(x)$.
- `model.post(new Constraint("Cover Closure", new CoverClosure(database, x)))`: ensures that $x$ is closed w.r.t. the frequency.
- `ConstraintFactory.coverSize(database, freq, x).post()`: ensures that $freq = freq(x)$.
- `ConstraintFactory.coverClosure(database, x).post()`: ensures that $x$ is closed w.r.t. the frequency.

After finding all the solutions, we print them to the user.

+15 −2
Original line number Diff line number Diff line
@@ -14,7 +14,11 @@ import io.gitlab.chaver.mining.patterns.io.DatReader;
import io.gitlab.chaver.mining.patterns.io.TransactionalDatabase;
import io.gitlab.chaver.mining.patterns.io.Pattern;
import io.gitlab.chaver.mining.patterns.measure.Measure;
import io.gitlab.chaver.mining.patterns.search.strategy.selectors.variables.MinCov;
import org.chocosolver.solver.Model;
import org.chocosolver.solver.Solver;
import org.chocosolver.solver.search.strategy.Search;
import org.chocosolver.solver.search.strategy.selectors.values.IntDomainMin;
import org.chocosolver.solver.variables.BoolVar;
import org.chocosolver.solver.variables.IntVar;

@@ -33,6 +37,8 @@ public class ExampleClosedItemsetMining2 {
    public static void main(String[] args) throws Exception {
        // Read the transactional database
        TransactionalDatabase database = new DatReader("data/contextPasquier99.dat").read();
        // List of measures to be closed
        List<Measure> measures = Arrays.asList(freq(), maxFreq());
        // Create the Choco model
        Model model = new Model("Closed Itemset Mining with multiple measures");
        // Create the variables
@@ -56,10 +62,17 @@ public class ExampleClosedItemsetMining2 {
        // The constraint AdequateClosure ensures that x is closed w.r.t. M
        // Two versions are available : Domain Consistency (DC) and Weak Consistency (WC)
        // Note that the WC version is more time efficient than the DC one
        List<Measure> measures = Arrays.asList(freq(), maxFreq());
        ConstraintFactory.adequateClosure(database, measures, x, true).post();
        Solver solver = model.getSolver();
        // Variable heuristic : select item i such that freq(x U i) is minimal
        // Value heuristic : instantiate it first to 0
        solver.setSearch(Search.intVarSearch(
                new MinCov(model, database),
                new IntDomainMin(),
                x
        ));
        List<Pattern> closedPatterns = new LinkedList<>();
        while (model.getSolver().solve()) {
        while (solver.solve()) {
            int[] itemset = IntStream.range(0, x.length)
                    .filter(i -> x[i].getValue() == 1)
                    .map(i -> database.getItems()[i])
+1 −1
Original line number Diff line number Diff line
@@ -43,7 +43,7 @@ public class ExampleDiversity {
        ConstraintFactory.coverClosure(database, x).post();
        double jmax = 0.05;
        // Overlap is a global constraint that ensures that x is a diverse itemset
        // i.e. there exists no y such that jaccard(x,y) > jmax
        // i.e. there exists no y in the history such that jaccard(x,y) > jmax
        Overlap overlap = new Overlap(database, x, jmax, theta);
        model.post(new Constraint("Overlap", overlap));
        Solver solver = model.getSolver();
+3 −1
Original line number Diff line number Diff line
@@ -25,7 +25,9 @@ import java.util.Comparator;
import java.util.stream.IntStream;

/**
 * Example of MFI/MII mining (from paper Belaid et al. - Contraint Programming for Mining Borders of Frequent Itemsets)
 * Example of MFIs/MIIs mining
 * (From Belaid et al. - Contraint Programming for Mining Borders of Frequent Itemsets)
 * , figure 1
 */
public class ExampleMFIsMIIsMining {

Loading