Commit 9a6aa901 authored by Mike Ryan's avatar Mike Ryan

#58: Move component construction from EtlTask to EtlCommand.

parent a4a1418c
Pipeline #58448141 passed with stage
in 2 minutes and 30 seconds
......@@ -8,6 +8,9 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
## [Unreleased]
### Changed
- `EtlTask` now accepts its extract, key_map, and load components as object instances rather than constructing them from configuration.
### Added
- The `Filter` interface has been added, to determine whether a DataRecord should be processed.
- The `Select` filter has been added, allowing for filtering by comparing DataRecord properties to values using PHP comparison operators.
......
......@@ -4,6 +4,10 @@ declare(strict_types=1);
namespace Soong\Console\Command;
use Noodlehaus\Config;
use Soong\Contracts\Extractor\Extractor;
use Soong\Contracts\KeyMap\KeyMap;
use Soong\Contracts\Loader\Loader;
use Soong\Filter\Select;
use Soong\Task\TaskPipeline;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputArgument;
......@@ -79,18 +83,116 @@ class EtlCommand extends Command
*
* @param array $directoryNames
* List of directories containing task configuration.
* @param array $options
* List of command-line options.
*/
protected function loadConfiguration(array $directoryNames) : void
protected function loadConfiguration(array $directoryNames, array $options = []) : void
{
$this->pipeline = new TaskPipeline();
foreach ($directoryNames as $directoryName) {
$conf = Config::load($directoryName);
foreach ($conf->all() as $id => $taskConfiguration) {
foreach ($conf->all() as $id => $configuration) {
$taskClass = $configuration['class'];
$taskConfiguration = $configuration['configuration'];
$taskConfiguration['extract'] = $this->getExtractor($taskConfiguration['extract'], $options);
$taskConfiguration['load'] = $this->getLoader($taskConfiguration['load']);
if (isset($taskConfiguration['key_map'])) {
$taskConfiguration['key_map'] = $this->getKeyMap(
$taskConfiguration['key_map'],
$taskConfiguration['extract']->getKeyProperties(),
$taskConfiguration['load']->getKeyProperties()
);
}
// Inject the pipeline into each task so it knows its parent.
$task = new $taskConfiguration['class']($taskConfiguration['configuration']
+ ['pipeline' => $this->pipeline]);
$task = new $taskClass($taskConfiguration + ['pipeline' => $this->pipeline]);
$this->pipeline->addTask($id, $task);
}
}
}
/**
* Construct an Extractor instance.
*
* @param array $configuration
* The extractor's configuration.
* @param array $options
* Runtime options which may affect the configuration.
*
* @return \Soong\Contracts\Extractor\Extractor|null
*/
protected function getExtractor(array $configuration, array $options): ?Extractor
{
/** @var \Soong\Contracts\Extractor\Extractor $extractorClass */
$extractorClass = $configuration['class'];
$extractorConfiguration = $configuration['configuration'];
if (!empty($options['select'])) {
// Each expression arrives in the form "$name$op$value" = we need to
// turn that into an array [$name, $op, $value].
$criteria = [];
// Note that if '=' is before '==' in the operator array, 'a==b'
// will be parsed as 'a', '=', '=b'. To prevent this, make sure the
// operators are sorted longest first.
$operatorList = Select::OPERATORS;
usort($operatorList, function ($a, $b) {
return $b <=> $a;
});
$operatorExpression = implode('|', $operatorList);
foreach ($options['select'] as $expression) {
if (!preg_match("/(.*?)($operatorExpression)(.*)/", $expression, $matches)) {
throw new \InvalidArgumentException("--select: Invalid expression $expression");
}
$criteria[] = [$matches[1], $matches[2], $matches[3]];
}
$extractorConfiguration['filters'][] = [
'class' => 'Soong\Filter\Select',
'configuration' => [
'criteria' => $criteria,
]
];
}
// Replace filter configuration with actual instances.
if (!empty($extractorConfiguration['filters'])) {
foreach ($extractorConfiguration['filters'] as $key => $filter) {
$extractorConfiguration['filters'][$key] =
new $filter['class']($filter['configuration']);
}
}
$extractor = new $extractorClass($extractorConfiguration);
return $extractor;
}
/**
* @param array $configuration
*
* @return \Soong\Contracts\Loader\Loader|null
*/
protected function getLoader(array $configuration): ?Loader
{
/** @var \Soong\Contracts\Loader\Loader $loaderClass */
$loaderClass = $configuration['class'];
$loader = new $loaderClass($configuration['configuration']);
return $loader;
}
/**
* @param array $configuration
*
* @return \Soong\Contracts\KeyMap\KeyMap|null
*/
protected function getKeyMap(array $configuration, array $extractorKeys, array $loaderKeys): ?KeyMap
{
$keyMapConfiguration = $configuration['configuration'] ?? [];
$keyMapConfiguration = array_merge(
['extractor_keys' => $extractorKeys],
$keyMapConfiguration
);
$keyMapConfiguration = array_merge(
['loader_keys' => $loaderKeys],
$keyMapConfiguration
);
/** @var \Soong\Contracts\KeyMap\KeyMap $keyMapClass */
$keyMapClass = $configuration['class'];
$keyMap = new $keyMapClass($keyMapConfiguration);
return $keyMap;
}
}
......@@ -37,7 +37,7 @@ EOT
{
$directoryNames = $input->getOption('directory');
$options = ['select' => $input->getOption('select')];
$this->loadConfiguration($directoryNames);
$this->loadConfiguration($directoryNames, $options);
foreach ($input->getArgument('tasks') as $id) {
if ($task = $this->pipeline->getTask($id)) {
$output->writeln("<info>Executing $id</info>");
......
......@@ -16,13 +16,10 @@ interface EtlTask extends Task
/**
* Retrieves the configured extractor for this task, if any.
*
* @param array $options
* List of configuration options to set on the extractor.
*
* @return Extractor
* The extractor, or NULL if none.
*/
public function getExtractor(array $options = []) : ?Extractor;
public function getExtractor() : ?Extractor;
/**
* Retrieves the configured loader for this task, if any.
......
......@@ -8,8 +8,6 @@ use Soong\Contracts\KeyMap\KeyMap;
use Soong\Contracts\Loader\Loader;
use Soong\Contracts\Task\EtlTask as EtlTaskInterface;
use Soong\Contracts\Transformer\Transformer;
use Soong\Filter\Select;
use Symfony\Component\OptionsResolver\OptionsResolver;
/**
* Implementation of operations for a full ETL process.
......@@ -25,32 +23,21 @@ class EtlTask extends Task implements EtlTaskInterface
$options = parent::optionDefinitions();
$options['extract'] = [
'required' => true,
'allowed_types' => 'array',
'default_value' => function (OptionsResolver $resolver) {
$resolver->setDefined(['class', 'configuration']);
$resolver->setAllowedTypes('class', 'string');
$resolver->setAllowedTypes('configuration', 'array');
},
'allowed_types' => 'Soong\Contracts\Extractor\Extractor',
];
$options['transform'] = [
'allowed_types' => 'array',
];
$options['load'] = [
'required' => true,
'allowed_types' => 'array',
'default_value' => function (OptionsResolver $resolver) {
$resolver->setDefined(['class', 'configuration']);
$resolver->setAllowedTypes('class', 'string');
$resolver->setAllowedTypes('configuration', 'array');
},
'allowed_types' => 'Soong\Contracts\Loader\Loader',
];
$options['key_map'] = [
'allowed_types' => 'array',
'default_value' => function (OptionsResolver $resolver) {
$resolver->setDefined(['class', 'configuration']);
$resolver->setAllowedTypes('class', 'string');
$resolver->setAllowedTypes('configuration', 'array');
},
'allowed_types' => 'Soong\Contracts\KeyMap\KeyMap',
];
$options['record_class'] = [
'required' => true,
'allowed_types' => 'string',
];
return $options;
}
......@@ -60,50 +47,7 @@ class EtlTask extends Task implements EtlTaskInterface
*/
public function getExtractor(array $options = []): ?Extractor
{
$taskConfiguration = $this->getAllConfigurationValues();
if (empty($taskConfiguration['extract'])) {
return null;
}
/** @var \Soong\Contracts\Extractor\Extractor $extractorClass */
$extractorClass = $taskConfiguration['extract']['class'];
$extractorConfiguration = $taskConfiguration['extract']['configuration'];
// @todo Belongs in the Command rather than the Task. Not really
// practical until we make the Command rather than the Task responsible
// for instantiating everything.
if (!empty($options['select'])) {
// Each expression arrives in the form "$name$op$value" = we need to
// turn that into an array [$name, $op, $value].
$criteria = [];
// Note that if '=' is before '==' in the operator array, 'a==b'
// will be parsed as 'a', '=', '=b'. To prevent this, make sure the
// operators are sorted longest first.
$operatorList = Select::OPERATORS;
usort($operatorList, function ($a, $b) {
return $b <=> $a;
});
$operatorExpression = implode('|', $operatorList);
foreach ($options['select'] as $expression) {
if (!preg_match("/(.*?)($operatorExpression)(.*)/", $expression, $matches)) {
// @todo Throw exception - should be Command exception.
}
$criteria[] = [$matches[1], $matches[2], $matches[3]];
}
$extractorConfiguration['filters'][] = [
'class' => 'Soong\Filter\Select',
'configuration' => [
'criteria' => $criteria,
]
];
}
// Replace filter configuration with actual instances.
if (!empty($extractorConfiguration['filters'])) {
foreach ($extractorConfiguration['filters'] as $key => $filter) {
$extractorConfiguration['filters'][$key] =
new $filter['class']($filter['configuration']);
}
}
$extractor = new $extractorClass($extractorConfiguration);
return $extractor;
return $this->getConfigurationValue('extract');
}
/**
......@@ -111,14 +55,7 @@ class EtlTask extends Task implements EtlTaskInterface
*/
public function getLoader(): ?Loader
{
$taskConfiguration = $this->getAllConfigurationValues();
if (empty($taskConfiguration['load'])) {
return null;
}
/** @var \Soong\Contracts\Loader\Loader $loaderClass */
$loaderClass = $taskConfiguration['load']['class'];
$loader = new $loaderClass($taskConfiguration['load']['configuration']);
return $loader;
return $this->getConfigurationValue('load');
}
/**
......@@ -126,34 +63,7 @@ class EtlTask extends Task implements EtlTaskInterface
*/
public function getKeyMap() : ?KeyMap
{
$taskConfiguration = $this->getAllConfigurationValues();
if (empty($taskConfiguration['key_map'])) {
return null;
}
/** @var \Soong\Contracts\Extractor\Extractor $extractor */
$extractor = $this->getExtractor();
$extractorKeys = $extractor->getKeyProperties();
/** @var \Soong\Contracts\Loader\Loader $loader */
$loader = $this->getLoader();
$loaderKeys = $loader->getKeyProperties();
$keyMapConfiguration = $taskConfiguration['key_map']['configuration'] ?? [];
if (empty($keyMapConfiguration)) {
return null;
}
$keyMapConfiguration = array_merge(
['extractor_keys' => $extractorKeys],
$keyMapConfiguration
);
$keyMapConfiguration = array_merge(
['loader_keys' => $loaderKeys],
$keyMapConfiguration
);
/** @var \Soong\Contracts\KeyMap\KeyMap $keyMapClass */
$keyMapClass = $taskConfiguration['key_map']['class'];
$keyMap = new $keyMapClass($keyMapConfiguration);
return $keyMap;
return $this->getConfigurationValue('key_map');
}
/**
......
......@@ -18,10 +18,6 @@ class Task extends OptionsResolverComponent implements TaskInterface
protected function optionDefinitions(): array
{
$options = parent::optionDefinitions();
$options['record_class'] = [
'required' => true,
'allowed_types' => 'string',
];
$options['pipeline'] = [
'required' => true,
'allowed_types' => 'Soong\Contracts\Task\TaskPipeline',
......
......@@ -109,4 +109,19 @@ class MigrateCommandTest extends CommandTestBase
$commandOutput = $this->commandTester->getDisplay();
$this->assertEquals($expectedCommandOutput, $commandOutput);
}
/**
* Test passing an invalid select option.
*/
public function testInvalidSelect() : void
{
$commandOptions = [
'tasks' => ['test1'],
'--directory' => ['tests/test_config_1'],
'--select' => ['foo!bar'],
];
$this->expectException(\InvalidArgumentException::class);
$this->expectExceptionMessage('Invalid expression foo!bar');
$this->commandTester->execute($commandOptions);
}
}
......@@ -2,6 +2,10 @@
namespace Soong\Tests\Contracts\Task;
use Soong\Contracts\Extractor\Extractor;
use Soong\Contracts\Loader\Loader;
use Soong\Contracts\Task\TaskPipeline;
/**
* Base class for testing EtlTask implementations.
*
......@@ -18,4 +22,23 @@ namespace Soong\Tests\Contracts\Task;
abstract class EtlTaskTestBase extends TaskTestBase
{
/**
* Test task execute() of a non-existent operation.
*
* Any particular Task implementation which defines any operations is
* responsible for testing execute() on those operations.
*/
public function testTaskExecuteNonExistent()
{
/** @var \Soong\Contracts\Task\EtlTask $task */
$task = new $this->taskClass([
'pipeline' => $this->createMock(TaskPipeline::class),
'record_class' => 'class\does\not\matter',
'extract' => $this->createMock(Extractor::class),
'load' => $this->createMock(Loader::class),
]);
$this->expectException('\BadMethodCallException');
$this->expectExceptionMessage("No i_dont_exist method exists.");
$task->execute('i_dont_exist');
}
}
......@@ -37,11 +37,9 @@ abstract class TaskTestBase extends TestCase
*/
public function testTaskExecuteNonExistent()
{
$pipeline = $this->createMock(TaskPipeline::class);
/** @var Task $task */
$task = new $this->taskClass([
'pipeline' => $pipeline,
'record_class' => 'class\does\not\matter',
'pipeline' => $this->createMock(TaskPipeline::class),
]);
$this->expectException('\BadMethodCallException');
$this->expectExceptionMessage("No i_dont_exist method exists.");
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment