Commit 085d9ebc authored by Micaël Bergeron's avatar Micaël Bergeron

loading from a .ktl file works

parent ce4580cd
......@@ -4,20 +4,48 @@ from xml.etree import ElementTree
from elt.schema import Schema, Column, DBType
data_type_map = {
'String': DBType.String,
'Number': DBType.Long,
'Boolean': DBType.Boolean,
'Date': DBType.Date
}
def loads(schema_name: str, raw: str) -> Schema:
tree = ElementTree.fromstring(raw)
schema = Schema(schema_name)
def schema_column(table_name,
column_name,
data_type,
is_nullable,
is_mapping_key):
return Column(table_schema=schema_name,
table_name=table_name,
column_name=column_name,
data_type=data_type.value,
is_nullable=is_nullable,
is_mapping_key=is_mapping_key)
for table in
sfdc_input_step = tree.find("step[type='SalesforceInput']")
table_name = sfdc_input_step.find("module").text
for field in sfdc_input_step.iterfind("fields/field"):
schema.add_column(
field_column(schema_name, table_name, field)
)
return schema
def field_column(table_schema, table_name, element):
is_mapping_key = element.find("idlookup").text == "Y"
return Column(table_schema=table_schema,
table_name=table_name,
column_name=element.find("field").text,
data_type=field_data_type(element).value,
is_nullable=not is_mapping_key,
is_mapping_key=is_mapping_key)
def field_data_type(element):
raw_type = element.find("type").text
raw_format = element.find("format").text
dt_type = data_type_map[raw_type]
# date time can have a timezone or not, it depends on the format
if dt_type == DBType.Date:
dt_type = dt_type if raw_format == "yyyy-MM-dd" else DBType.Timestamp
return dt_type
import elt.schema.serializers.kettle as kettle
def test_config():
schema = kettle.loads("sfdc", SAMPLE_KETTLE_CONFIG)
import pdb; pdb.set_trace()
assert(("User", "Id") in schema.columns)
assert(("User", "LastName") in schema.columns)
SAMPLE_KETTLE_CONFIG = """<?xml version="1.0" encoding="UTF-8"?>
<transformation>
<step>
<name>Insert / Update</name>
<type>InsertUpdate</type>
</step>
<step>
<name>Salesforce Input</name>
<type>SalesforceInput</type>
<module>User</module>
<fields>
<field>
<name>User ID</name>
<field>Id</field>
<idlookup>Y</idlookup>
<type>String</type>
<format />
</field>
<field>
<name>Username</name>
<field>Username</field>
<idlookup>Y</idlookup>
<type>String</type>
<format />
</field>
<field>
<name>Last Name</name>
<field>LastName</field>
<idlookup>N</idlookup>
<type>String</type>
<format />
</field>
<field>
<name>First Name</name>
<field>FirstName</field>
<idlookup>N</idlookup>
<type>String</type>
<format />
</field>
<field>
<name>Suffix</name>
<field>Suffix</field>
<idlookup>N</idlookup>
<type>String</type>
<format />
</field>
<field>
<name></name>
<field>Name</field>
<idlookup>N</idlookup>
<type>String</type>
<format />
</field>
<field>
<name>Marketo Sales Insight Welcome Counter</name>
<field>mkto_si__Sales_Insight_Counter__c</field>
<idlookup>N</idlookup>
<type>Number</type>
<format />
</field>
<field>
<name>Has Profile Photo</name>
<field>IsProfilePhotoActive</field>
<idlookup>N</idlookup>
<type>Boolean</type>
<format />
</field>
<field>
<name>Start Date</name>
<field>Start_Date__c</field>
<idlookup>N</idlookup>
<type>Date</type>
<format>yyyy-MM-dd</format>
</field>
<field>
<name>Last Login</name>
<field>LastLoginDate</field>
<idlookup>N</idlookup>
<type>Date</type>
<format>yyyy-MM-dd'T'HH:mm:ss'.000'XXX</format>
</field>
</fields>
</step>
</transformation>
"""
......@@ -151,9 +151,7 @@ def test_loads():
"""
schema = serializer.loads("singer", singer_catalog)
assert(len(schema.tables) == 1)
import pdb; pdb.set_trace()
def test_load():
......@@ -162,5 +160,3 @@ def test_load():
schema = serializer.load("singer", open(singer_catalog_path))
assert(len(schema.tables) > 20)
assert("Usage_Ping_Data__c" in schema.tables)
import pdb; pdb.set_trace()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment