Multiple train calls leads to errors
Multiple train calls leads to the following errors/warnings :
- Warning: Division by zero in vendor\php-ai\php-ml\src\FeatureExtraction\TfIdfTransformer.php on line 29
- Warning: array_count_values(): Can only count STRING and INTEGER values! in vendor\php-ai\php-ml\src\Classification\NaiveBayes.php on line 123
The following code was used to produce the errors:
` <?php
require_once __DIR__ . '/vendor/autoload.php';
use Phpml\Dataset\FilesDataset;
use Phpml\CrossValidation\StratifiedRandomSplit;
use Phpml\Tokenization\WordTokenizer;
use Phpml\FeatureExtraction\StopWords\French;
use Phpml\FeatureExtraction\TokenCountVectorizer;
use Phpml\Metric\Accuracy;
use Phpml\Classification\NaiveBayes;
use Phpml\Tokenization\NGramWordTokenizer;
use Phpml\Tokenization\NGramTokenizer;
use Phpml\FeatureExtraction\TfIdfTransformer;
use Phpml\Pipeline;
$dataset = new FilesDataset('data/spams'); //spams contains 2 folders : ham and spam
$split = new StratifiedRandomSplit($dataset, 0.2);
$pipeline = new Pipeline([
new TokenCountVectorizer(
new WordTokenizer(1, 3),
new French()),
new TfIdfTransformer()
],
new NaiveBayes());
//$pipeline->train($split->getTrainSamples(), $split->getTrainLabels());
$pipeline->train(['authorized words'], ['ham']);
$pipeline->train(['unauthorized words'], ['spam']); //this line causes the errors
$predicted = $pipeline->predict($split->getTestSamples());
echo 'Accuracy: ' . Accuracy::score($split->getTestLabels(), $predicted);
?>`