Commit c1ded782 authored by Mike Ryan's avatar Mike Ryan

#58: Add RecordFactory and PropertyFactory, and use them for constructing Records and Properties.

parent 1f2c7355
Pipeline #58633587 passed with stage
in 2 minutes and 23 seconds
beeraccounts:
class: Soong\Task\EtlTask
configuration:
record_class: Soong\Data\Record
record_factory: Soong\Data\RecordFactory
key_map:
class: Soong\KeyMap\DBAL
configuration:
......@@ -16,7 +16,7 @@ beeraccounts:
extract:
class: Soong\Extractor\Csv
configuration:
record_class: Soong\Data\Record
record_factory: Soong\Data\RecordFactory
csv_file_path: data/migrate_example_beer_account.csv
key_properties:
aid:
......
beercontent:
class: Soong\Task\EtlTask
configuration:
record_class: Soong\Data\Record
record_factory: Soong\Data\RecordFactory
key_map:
class: Soong\KeyMap\DBAL
configuration:
......@@ -16,7 +16,7 @@ beercontent:
extract:
class: Soong\Extractor\Csv
configuration:
record_class: Soong\Data\Record
record_factory: Soong\Data\RecordFactory
csv_file_path: data/migrate_example_beer_content.csv
key_properties:
bid:
......
beertopics:
class: Soong\Task\EtlTask
configuration:
record_class: Soong\Data\Record
record_factory: Soong\Data\RecordFactory
key_map:
class: Soong\KeyMap\DBAL
configuration:
......@@ -16,7 +16,7 @@ beertopics:
extract:
class: Soong\Extractor\Csv
configuration:
record_class: Soong\Data\Record
record_factory: Soong\Data\RecordFactory
csv_file_path: data/migrate_example_beer_topic.csv
key_properties:
style:
......
arraytosql:
class: Soong\Task\EtlTask
configuration:
record_class: Soong\Data\Record
record_factory: Soong\Data\RecordFactory
key_map:
class: Soong\KeyMap\DBAL
configuration:
......@@ -16,7 +16,7 @@ arraytosql:
extract:
class: Soong\Extractor\ArrayExtractor
configuration:
#data_record_class: Soong\Data\Record
#record_factory: Soong\Data\RecordFactory
key_properties:
id:
type: integer
......@@ -90,12 +90,12 @@ arraytosql:
sqltocsv:
class: Soong\Task\EtlTask
configuration:
record_class: Soong\Data\Record
record_factory: Soong\Data\RecordFactory
# Note we have no key_map - we are using this migration as an exporter.
extract:
class: Soong\Extractor\DBAL
configuration:
data_record_class: Soong\Data\Record
record_factory: Soong\Data\RecordFactory
connection:
# Replace with your test database credentials.
dbname: etltemp
......
......@@ -2,11 +2,11 @@
"sqltocsv_json": {
"class": "Soong\\Task\\EtlTask",
"configuration": {
"record_class": "Soong\\Data\\Record",
"record_factory": "Soong\\Data\\RecordFactory",
"extract": {
"class": "Soong\\Extractor\\DBAL",
"configuration": {
"data_record_class": "Soong\\Data\\Record",
"record_factory": "Soong\\Data\\RecordFactory",
"connection": {
"dbname": "etltemp",
"user": "root",
......
......@@ -3,11 +3,11 @@
<sqltocsv_xml>
<class>Soong\Task\EtlTask</class>
<configuration>
<record_class>Soong\Data\Record</record_class>
<record_factory>Soong\Data\RecordFactory</record_factory>
<extract>
<class>Soong\Extractor\DBAL</class>
<configuration>
<data_record_class>Soong\Data\Record</data_record_class>
<record_factory>Soong\Data\RecordFactory</record_factory>
<connection>
<dbname>etltemp</dbname>
<user>root</user>
......
......@@ -11,12 +11,16 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
### Changed
- `EtlTask` now accepts its extract, key_map, and load components as object instances rather than constructing them from configuration.
- `DataProperty` interface renamed to `Property`, and `DataRecord` interface renamed to `Record`.
- Derivatives of `ExtractorBase` now must accept a `record_factory` configuration option, which is an instance of `RecordFactory`.
- `EtlTask` replaced the string `record_class` with `RecordFactory` instance `record_factory`.
### Added
- The `Filter` interface has been added, to determine whether a DataRecord should be processed.
- The `Select` filter has been added, allowing for filtering by comparing DataRecord properties to values using PHP comparison operators.
- The `--select` option has been added to the `migrate` command, allow for ad-hoc filtering of extracted data at runtime.
- `PropertyFactory` and `RecordFactory` interfaces/classes added for creation of `Property` and `Record` instances.
- Added basic console command tests.
- `property_factory` configuration option added to `EtlTask`.
## [0.5.3] - 2019-04-12
......
......@@ -12,7 +12,7 @@ arraytosql:
# Configuration passed to the Task class at creation time.
configuration:
# Record class the Task will create to hold destination properties.
record_class: Soong\Data\Record
record_factory: Soong\Data\RecordFactory
# The KeyMap object stores the mappings from source record keys to
# destination record keys.
key_map:
......@@ -34,7 +34,7 @@ arraytosql:
class: Soong\Extractor\ArrayExtractor
configuration:
# The concrete Record class we will return for each source record.
data_record_class: Soong\Data\Record
record_factory: Soong\Data\RecordFactory
# Within the source data, the unique key is named "id" and is an integer.
# The KeyMap uses this information to create a map table and populate it.
key_properties:
......
......@@ -7,6 +7,8 @@ use Noodlehaus\Config;
use Soong\Contracts\Extractor\Extractor;
use Soong\Contracts\KeyMap\KeyMap;
use Soong\Contracts\Loader\Loader;
use Soong\Data\PropertyFactory;
use Soong\Data\RecordFactory;
use Soong\Filter\Select;
use Soong\Task\TaskPipeline;
use Symfony\Component\Console\Command\Command;
......@@ -89,11 +91,17 @@ class EtlCommand extends Command
protected function loadConfiguration(array $directoryNames, array $options = []) : void
{
$this->pipeline = new TaskPipeline();
$propertyFactory = new PropertyFactory();
$recordFactory = new RecordFactory($propertyFactory);
foreach ($directoryNames as $directoryName) {
$conf = Config::load($directoryName);
foreach ($conf->all() as $id => $configuration) {
$taskClass = $configuration['class'];
$taskConfiguration = $configuration['configuration'];
$taskConfiguration['record_factory'] = $recordFactory;
$taskConfiguration['property_factory'] = $propertyFactory;
$taskConfiguration['extract']['configuration']['record_factory'] =
$taskConfiguration['record_factory'];
$taskConfiguration['extract'] = $this->getExtractor($taskConfiguration['extract'], $options);
$taskConfiguration['load'] = $this->getLoader($taskConfiguration['load']);
if (isset($taskConfiguration['key_map'])) {
......@@ -104,7 +112,11 @@ class EtlCommand extends Command
);
}
// Inject the pipeline into each task so it knows its parent.
$task = new $taskClass($taskConfiguration + ['pipeline' => $this->pipeline]);
$task = new $taskClass($taskConfiguration +
[
'pipeline' => $this->pipeline,
]
);
$this->pipeline->addTask($id, $task);
}
}
......
<?php
declare(strict_types=1);
namespace Soong\Contracts\Data;
/**
* Factory for creating Property instances.
*/
interface PropertyFactory
{
/**
* Create an immutable property instance for the given value.
*
* @param mixed $value
*/
public function create($value) : Property;
}
<?php
declare(strict_types=1);
namespace Soong\Contracts\Data;
/**
* Factory for creating Record instances.
*/
interface RecordFactory
{
/**
* Create a record populated with a set of named data properties.
*
* @param array $data
* Associative array of property values, keyed by property name.
*/
public function create(array $data = []) : Record;
}
<?php
declare(strict_types=1);
namespace Soong\Data;
/**
* Basic implementation of a Property factory.
*/
class PropertyFactory implements \Soong\Contracts\Data\PropertyFactory
{
/**
* @inheritdoc
*/
public function create($value): \Soong\Contracts\Data\Property
{
return new Property($value);
}
}
......@@ -20,17 +20,38 @@ class Record implements \Soong\Contracts\Data\Record
*/
protected $data = [];
/**
* @internal
*
* Factory for creating Property instances.
*
* @var PropertyFactory $propertyFactory
*/
protected $propertyFactory;
/**
* @internal
*
* A null-valued property.
*
* @var Property $nullProperty
*/
protected $nullProperty;
/**
* Create a record populated with a set of named data properties.
*
* @param \Soong\Contracts\Data\PropertyFactory $propertyFactory
* Factory for populating property instances.
* @param array $data
* Associative array of property values, keyed by property name.
*/
public function __construct(array $data = [])
public function __construct(\Soong\Contracts\Data\PropertyFactory $propertyFactory, $data = [])
{
$this->propertyFactory = $propertyFactory;
$this->nullProperty = $this->propertyFactory->create(null);
foreach ($data as $propertyName => $propertyValue) {
// @todo Inject Property implementation.
$this->setProperty($propertyName, new \Soong\Data\Property($propertyValue));
$this->setProperty($propertyName, $this->propertyFactory->create($propertyValue));
}
}
......@@ -47,21 +68,7 @@ class Record implements \Soong\Contracts\Data\Record
*/
public function getProperty(string $propertyName) : Property
{
return isset($this->data[$propertyName]) ? $this->data[$propertyName] : $this->nullProperty();
}
/**
* @internal
*
* Provide a property with a null value.
*
* @return Property
* A property object containing a null value.
*/
protected function nullProperty() : Property
{
// @todo Inject property implementation.
return new \Soong\Data\Property(null);
return isset($this->data[$propertyName]) ? $this->data[$propertyName] : $this->nullProperty;
}
/**
......
<?php
declare(strict_types=1);
namespace Soong\Data;
/**
* Basic implementation of a Record factory.
*/
class RecordFactory implements \Soong\Contracts\Data\RecordFactory
{
/**
* Factory for creating all properties.
*
* @var \Soong\Contracts\Data\PropertyFactory
*/
protected $propertyFactory;
/**
* Create a record factory instance using a property factory.
*
* @param \Soong\Contracts\Data\PropertyFactory|null $propertyFactory
*/
public function __construct(\Soong\Contracts\Data\PropertyFactory $propertyFactory = null)
{
$this->propertyFactory = $propertyFactory ?? new PropertyFactory();
}
/**
* @inheritdoc
*/
public function create(array $data = []): \Soong\Contracts\Data\Record
{
return new Record($this->propertyFactory, $data);
}
}
......@@ -3,8 +3,6 @@ declare(strict_types=1);
namespace Soong\Extractor;
use Soong\Data\Record;
/**
* Extractor for in-memory arrays.
*/
......@@ -29,9 +27,10 @@ class ArrayExtractor extends CountableExtractorBase
*/
public function extractAll() : iterable
{
/** @var \Soong\Contracts\Data\RecordFactory $recordFactory */
$recordFactory = $this->getConfigurationValue('record_factory');
foreach ($this->getConfigurationValue('data') as $data) {
// @todo: Inject Record implementation.
yield new Record($data);
yield $recordFactory->create($data);
}
}
......
......@@ -4,7 +4,7 @@ declare(strict_types=1);
namespace Soong\Extractor;
use League\Csv\Reader;
use Soong\Contracts\Data\Record;
use Soong\Contracts\Data\RecordFactory;
/**
* CSV extractor based on The League CSV library.
......@@ -18,10 +18,6 @@ class Csv extends ExtractorBase
protected function optionDefinitions(): array
{
$options = parent::optionDefinitions();
$options['record_class'] = [
'required' => true,
'allowed_types' => 'string',
];
$options['csv_file_path'] = [
'required' => true,
'allowed_types' => 'string',
......@@ -35,10 +31,11 @@ class Csv extends ExtractorBase
public function extractAll(): iterable
{
$csv = $this->loadCsv();
/** @var Record $recordClass */
$recordClass = $this->getConfigurationValue('record_class');
$recordFactoryClass = $this->getConfigurationValue('record_factory');
/** @var RecordFactory $recordFactory */
$recordFactory = new $recordFactoryClass;
foreach ($csv->getRecords() as $record) {
yield new $recordClass($record);
yield $recordFactory->create($record);
}
}
......
......@@ -6,6 +6,7 @@ namespace Soong\Extractor;
use Doctrine\DBAL\DBALException;
use Doctrine\DBAL\FetchMode;
use Soong\Contracts\Data\Record;
use Soong\Contracts\Data\RecordFactory;
/**
* Extractor for DBAL SQL queries.
......@@ -25,10 +26,6 @@ class DBAL extends CountableExtractorBase
'required' => true,
'allowed_types' => 'array',
];
$options['data_record_class'] = [
'required' => true,
'allowed_types' => 'string',
];
$options['query'] = [
'required' => true,
'allowed_types' => 'string',
......@@ -45,10 +42,11 @@ class DBAL extends CountableExtractorBase
// @todo: don't accept raw SQL from configuration
/** @var \Doctrine\DBAL\Driver\Statement $statement */
$statement = $this->connection()->executeQuery($this->getConfigurationValue('query'));
$recordFactoryClass = $this->getConfigurationValue('record_factory');
/** @var RecordFactory $recordFactory */
$recordFactory = new $recordFactoryClass;
while ($row = $statement->fetch(FetchMode::ASSOCIATIVE)) {
/** @var Record $recordClass */
$recordClass = $this->getConfigurationValue('data_record_class');
yield new $recordClass($row);
yield $recordFactory->create($row);
}
} catch (DBALException $e) {
// @todo
......
......@@ -33,6 +33,10 @@ abstract class ExtractorBase extends OptionsResolverComponent implements Extract
'default_value' => [],
'allowed_types' => 'Soong\Contracts\Filter\Filter[]',
];
$options['record_factory'] = [
'required' => true,
'allowed_types' => 'Soong\Contracts\Data\RecordFactory',
];
return $options;
}
......
......@@ -5,6 +5,7 @@ namespace Soong\Loader;
use Soong\Contracts\Data\Record;
use Soong\Data\Property;
use Soong\Data\PropertyFactory;
/**
* Real dumb demo of a simple loader.
......@@ -28,10 +29,11 @@ class Csv extends LoaderBase
*/
public function load(Record $data) : void
{
// @todo: Don't use concrete Property class.
// @todo: Don't use concrete PropertyFactory class.
$propertyFactory = new PropertyFactory();
$data->setProperty(
array_keys($this->getConfigurationValue('key_properties'))[0],
new Property($this->counter++)
$propertyFactory->create($this->counter++)
);
$properties = $data->toArray();
if (count($properties) > 1) {
......
......@@ -6,6 +6,7 @@ namespace Soong\Loader;
use Doctrine\DBAL\DBALException;
use Soong\Contracts\Data\Record;
use Soong\Data\Property;
use Soong\Data\PropertyFactory;
/**
* Loader for DBAL SQL tables.
......@@ -45,8 +46,9 @@ class DBAL extends LoaderBase
$id = $this->connection()->lastInsertId();
if ($id) {
$keyKeys = array_keys($this->getKeyProperties());
// @todo Inject Property instance.
$data->setProperty(reset($keyKeys), new Property($id));
// @todo: Don't use concrete PropertyFactory class.
$propertyFactory = new PropertyFactory();
$data->setProperty(reset($keyKeys), $propertyFactory->create($id));
}
} catch (DBALException $e) {
print $e->getMessage();
......
......@@ -35,9 +35,13 @@ class EtlTask extends Task implements EtlTaskInterface
$options['key_map'] = [
'allowed_types' => 'Soong\Contracts\KeyMap\KeyMap',
];
$options['record_class'] = [
$options['record_factory'] = [
'required' => true,
'allowed_types' => 'string',
'allowed_types' => 'Soong\Contracts\Data\RecordFactory',
];
$options['property_factory'] = [
'required' => true,
'allowed_types' => 'Soong\Contracts\Data\PropertyFactory',
];
return $options;
}
......@@ -80,6 +84,9 @@ class EtlTask extends Task implements EtlTaskInterface
$transformerConfiguration = $transformerStuff['configuration'] ?? [];
// @todo Better way to provide context
$transformerConfiguration['pipeline'] = $this->getConfigurationValue('pipeline');
$transformerConfiguration['property_factory'] =
$this->getConfigurationValue('property_factory');
/** @var \Soong\Contracts\Transformer\Transformer $transformerClass */
$transformerClass = $transformerStuff['class'];
$transformer = new $transformerClass($transformerConfiguration);
......@@ -101,13 +108,13 @@ class EtlTask extends Task implements EtlTaskInterface
$extractor = $this->getExtractor($options);
$loader = $this->getLoader();
$keyMap = $this->getKeyMap();
/** @var \Soong\Contracts\Data\Record $recordClass */
$recordClass = $taskConfiguration['record_class'];
/** @var \Soong\Contracts\Data\RecordFactory $recordFactory */
$recordFactory = $taskConfiguration['record_factory'];
/** @var \Soong\Contracts\Data\Record $data */
foreach ($extractor->extractFiltered() as $data) {
/** @var \Soong\Contracts\Data\Record $resultData */
$resultData = new $recordClass();
$resultData = $recordFactory->create();
if (isset($taskConfiguration['transform'])) {
foreach ($taskConfiguration['transform'] as $property => $transformerList) {
// Shortcut for directly mapping properties.
......
......@@ -16,6 +16,7 @@ class Copy extends TransformerBase
*/
public function transform(Property $data) : Property
{
return clone $data;
// Properties are immutable, so it's safe to return directly.
return $data;
}
}
......@@ -16,7 +16,6 @@ class Double extends TransformerBase
*/
public function transform(Property $data) : Property
{
// @todo Don't use concrete class
return new \Soong\Data\Property(2 * $data->getValue());
return $this->getConfigurationValue('property_factory')->create(2 * $data->getValue());
}
}
......@@ -16,7 +16,6 @@ class Increment extends TransformerBase
*/
public function transform(Property $data) : Property
{
// @todo Don't use concrete class
return new \Soong\Data\Property($data->getValue() + 1);
return $this->getConfigurationValue('property_factory')->create($data->getValue() + 1);
}
}
......@@ -50,11 +50,10 @@ class KeyMapLookup extends TransformerBase
$loadedKey = $keyMap->lookupLoadedKey([$data->getValue()]);
if (!empty($loadedKey)) {
// @todo: Handle multi-value keys properly.
// @todo Don't use concrete class
return new \Soong\Data\Property(reset($loadedKey));
return $this->getConfigurationValue('property_factory')->create(reset($loadedKey));
}
// @todo: Support creation of stubs when nothing found.
}
return new \Soong\Data\Property(null);
return $this->getConfigurationValue('property_factory')->create(null);
}
}
......@@ -22,6 +22,10 @@ abstract class TransformerBase extends OptionsResolverComponent implements Trans
'required' => true,
'allowed_types' => 'Soong\Contracts\Task\TaskPipeline',
];
$options['property_factory'] = [
'required' => true,
'allowed_types' => 'Soong\Contracts\Data\PropertyFactory',
];
return $options;
}
}
......@@ -16,7 +16,6 @@ class UcFirst extends TransformerBase
*/
public function transform(Property $data) : Property
{
// @todo Don't use concrete class
return new \Soong\Data\Property(ucfirst($data->getValue()));
return $this->getConfigurationValue('property_factory')->create(ucfirst($data->getValue()));
}
}
......@@ -29,6 +29,8 @@ class ValueLookup extends TransformerBase
*/
public function transform(Property $data) : Property