Commit bdf7cd03 authored by Mitar's avatar Mitar

Merge branch 'cmu-2019.5.22-1525' into 'master'

Enhancement for NLP primitives

See merge request !124
parents 29d81e8f 22606c9d
......@@ -24,7 +24,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/autonlab/fastlvm.git@f3229d5190a6eb5f0b3d20bb3e45f20f49feea92#egg=fastlvm"
"package_uri": "git+https://github.com/autonlab/fastlvm.git@6f59a1ace60912dfb97a90d65c13234853adb413#egg=fastlvm"
}
],
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/primitive.json",
......@@ -216,5 +216,5 @@
}
},
"structural_type": "fastlvm.covertree_classifier.CoverTreeClassifier",
"digest": "3346d6c52e2ad646d95acd68e85b0b08aa9000ce24c27ff34bad65adca95be3c"
"digest": "00cf758b4ed4eb3f75e2d6448b24c1301982b483f2a5cf33ee3b2da25a979c0c"
}
......@@ -23,7 +23,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/autonlab/fastlvm.git@f3229d5190a6eb5f0b3d20bb3e45f20f49feea92#egg=fastlvm"
"package_uri": "git+https://github.com/autonlab/fastlvm.git@6f59a1ace60912dfb97a90d65c13234853adb413#egg=fastlvm"
}
],
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/primitive.json",
......@@ -240,5 +240,5 @@
}
},
"structural_type": "fastlvm.gmm.GMM",
"digest": "1c18b0887841096424a199a9adf13dc95f5c3c34500114bb16cb2cc58f3aabdd"
"digest": "6baf0ca5b077c13dd78a696271a924c41e07a93ff7a511ee259181877bd16783"
}
......@@ -23,7 +23,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/autonlab/fastlvm.git@f3229d5190a6eb5f0b3d20bb3e45f20f49feea92#egg=fastlvm"
"package_uri": "git+https://github.com/autonlab/fastlvm.git@6f59a1ace60912dfb97a90d65c13234853adb413#egg=fastlvm"
}
],
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/primitive.json",
......@@ -240,5 +240,5 @@
}
},
"structural_type": "fastlvm.kmeans.KMeans",
"digest": "8a0784246835d7258eb3622f548662c5c61d8221f432d25d01f68dd1437e469f"
"digest": "71877e27c75760ed6f4218d3256c74d63a1bdc594d6f9d9921fed7d64e2fd7ac"
}
{
"id": "1161bfb1-4e72-4760-b9be-029fa41427fe",
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json",
"created": "2019-05-22T19:16:38.622954Z",
"inputs": [
{
"name": "dataset inputs"
}
],
"outputs": [
{
"data": "steps.7.produce",
"name": "output predictions"
}
],
"steps": [
{
"type": "PRIMITIVE",
"primitive": {
"id": "f31f8c1f-d1c5-43e5-a4b2-2ae4a761ef2e",
"version": "0.2.0",
"python_path": "d3m.primitives.data_transformation.denormalize.Common",
"name": "Denormalize datasets"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "inputs.0"
}
},
"outputs": [
{
"id": "produce"
}
]
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65",
"version": "0.3.0",
"python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common",
"name": "Extract a DataFrame from a Dataset"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.0.produce"
}
},
"outputs": [
{
"id": "produce"
}
]
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "d510cb7a-1782-4f51-b44c-58f0236e47c7",
"version": "0.5.0",
"python_path": "d3m.primitives.data_transformation.column_parser.DataFrameCommon",
"name": "Parses strings into their types"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.1.produce"
}
},
"outputs": [
{
"id": "produce"
}
]
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1",
"version": "0.2.0",
"python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.DataFrameCommon",
"name": "Extracts columns by semantic type"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.2.produce"
}
},
"outputs": [
{
"id": "produce"
}
],
"hyperparams": {
"semantic_types": {
"type": "VALUE",
"data": [
"https://metadata.datadrivendiscovery.org/types/Attribute"
]
}
}
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1",
"version": "0.2.0",
"python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.DataFrameCommon",
"name": "Extracts columns by semantic type"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.2.produce"
}
},
"outputs": [
{
"id": "produce"
}
],
"hyperparams": {
"semantic_types": {
"type": "VALUE",
"data": [
"https://metadata.datadrivendiscovery.org/types/TrueTarget"
]
}
}
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "a3d490a4-ef39-4de1-be02-4c43726b3b24",
"version": "3.0.1",
"python_path": "d3m.primitives.natural_language_processing.glda.Fastlvm",
"name": "Gaussian Latent Dirichlet Allocation Topic Modelling"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.3.produce"
}
},
"outputs": [
{
"id": "produce"
}
],
"hyperparams": {
"k": {
"type": "VALUE",
"data": 100
},
"iters": {
"type": "VALUE",
"data": 10
},
"frac": {
"type": "VALUE",
"data": 0.001
}
}
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "01d2c086-91bf-3ca5-b023-5139cf239c77",
"version": "2019.4.4",
"python_path": "d3m.primitives.classification.gradient_boosting.SKlearn",
"name": "sklearn.ensemble.gradient_boosting.GradientBoostingClassifier"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.5.produce"
},
"outputs": {
"type": "CONTAINER",
"data": "steps.4.produce"
}
},
"outputs": [
{
"id": "produce"
}
]
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "8d38b340-f83f-4877-baaa-162f8e551736",
"version": "0.3.0",
"python_path": "d3m.primitives.data_transformation.construct_predictions.DataFrameCommon",
"name": "Construct pipeline predictions output"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.6.produce"
},
"reference": {
"type": "CONTAINER",
"data": "steps.1.produce"
}
},
"outputs": [
{
"id": "produce"
}
]
}
]
}
\ No newline at end of file
{
"problem": "LL0_acled_reduced_problem",
"full_inputs": [
"LL0_acled_reduced_dataset"
],
"train_inputs": [
"LL0_acled_reduced_dataset_TRAIN"
],
"test_inputs": [
"LL0_acled_reduced_dataset_TEST"
],
"score_inputs": [
"LL0_acled_reduced_dataset_SCORE"
]
}
......@@ -24,7 +24,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/autonlab/fastlvm.git@f3229d5190a6eb5f0b3d20bb3e45f20f49feea92#egg=fastlvm"
"package_uri": "git+https://github.com/autonlab/fastlvm.git@6f59a1ace60912dfb97a90d65c13234853adb413#egg=fastlvm"
}
],
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/primitive.json",
......@@ -315,5 +315,5 @@
}
},
"structural_type": "fastlvm.glda.GLDA",
"digest": "bca9e8f7fdd8a15f279a4467efa178bc739d20d2159e1083516177a3c290a39a"
"digest": "d3751cd8e9ecfbd24ac5ccedf3abb1deb18d32556c6470165271332640168bb9"
}
{
"id": "b226fda4-fef7-4e10-adf2-dae995f87aed",
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json",
"created": "2019-05-22T19:11:00.269910Z",
"inputs": [
{
"name": "dataset inputs"
}
],
"outputs": [
{
"data": "steps.7.produce",
"name": "output predictions"
}
],
"steps": [
{
"type": "PRIMITIVE",
"primitive": {
"id": "f31f8c1f-d1c5-43e5-a4b2-2ae4a761ef2e",
"version": "0.2.0",
"python_path": "d3m.primitives.data_transformation.denormalize.Common",
"name": "Denormalize datasets"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "inputs.0"
}
},
"outputs": [
{
"id": "produce"
}
]
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65",
"version": "0.3.0",
"python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common",
"name": "Extract a DataFrame from a Dataset"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.0.produce"
}
},
"outputs": [
{
"id": "produce"
}
]
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "d510cb7a-1782-4f51-b44c-58f0236e47c7",
"version": "0.5.0",
"python_path": "d3m.primitives.data_transformation.column_parser.DataFrameCommon",
"name": "Parses strings into their types"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.1.produce"
}
},
"outputs": [
{
"id": "produce"
}
]
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1",
"version": "0.2.0",
"python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.DataFrameCommon",
"name": "Extracts columns by semantic type",
"digest": "a0d61e03605d9e487c29de5d88a1c7f32d9aef8e23a413b794dad2f4da8ee836"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.2.produce"
}
},
"outputs": [
{
"id": "produce"
}
],
"hyperparams": {
"semantic_types": {
"type": "VALUE",
"data": [
"https://metadata.datadrivendiscovery.org/types/Attribute"
]
}
}
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1",
"version": "0.2.0",
"python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.DataFrameCommon",
"name": "Extracts columns by semantic type"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.2.produce"
}
},
"outputs": [
{
"id": "produce"
}
],
"hyperparams": {
"semantic_types": {
"type": "VALUE",
"data": [
"https://metadata.datadrivendiscovery.org/types/TrueTarget"
]
}
}
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "e582e738-2f7d-4b5d-964f-022d15f19018",
"version": "3.0.1",
"python_path": "d3m.primitives.natural_language_processing.hdp.Fastlvm",
"name": "Hierarchical Dirichlet Process Topic Modelling"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.3.produce"
}
},
"outputs": [
{
"id": "produce"
}
],
"hyperparams": {
"k": {
"type": "VALUE",
"data": 100
},
"iters": {
"type": "VALUE",
"data": 100
},
"frac": {
"type": "VALUE",
"data": 0.001
}
}
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "01d2c086-91bf-3ca5-b023-5139cf239c77",
"version": "2019.4.4",
"python_path": "d3m.primitives.classification.gradient_boosting.SKlearn",
"name": "sklearn.ensemble.gradient_boosting.GradientBoostingClassifier"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.5.produce"
},
"outputs": {
"type": "CONTAINER",
"data": "steps.4.produce"
}
},
"outputs": [
{
"id": "produce"
}
]
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "8d38b340-f83f-4877-baaa-162f8e551736",
"version": "0.3.0",
"python_path": "d3m.primitives.data_transformation.construct_predictions.DataFrameCommon",
"name": "Construct pipeline predictions output"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.6.produce"
},
"reference": {
"type": "CONTAINER",
"data": "steps.1.produce"
}
},
"outputs": [
{
"id": "produce"
}
]
}
]
}
\ No newline at end of file
{
"problem": "LL0_acled_reduced_problem",
"full_inputs": [
"LL0_acled_reduced_dataset"
],
"train_inputs": [
"LL0_acled_reduced_dataset_TRAIN"
],
"test_inputs": [
"LL0_acled_reduced_dataset_TEST"
],
"score_inputs": [
"LL0_acled_reduced_dataset_SCORE"
]
}
......@@ -25,7 +25,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/autonlab/fastlvm.git@f3229d5190a6eb5f0b3d20bb3e45f20f49feea92#egg=fastlvm"
"package_uri": "git+https://github.com/autonlab/fastlvm.git@6f59a1ace60912dfb97a90d65c13234853adb413#egg=fastlvm"
}
],
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/primitive.json",
......@@ -280,5 +280,5 @@
}
},
"structural_type": "fastlvm.hdp.HDP",
"digest": "7b62af3e17134c5b310945f09733c00bbada32b0357f510e83927fd38bb5895e"
"digest": "b040c8f4c2129c9f6dc62d46ca8e3620bfd1d2e2de2e0a1bc3f3465c7320a391"
}
{
"id": "0fe96efd-821f-446f-a0cf-c0276b5e1f34",
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json",
"created": "2019-05-22T19:01:16.221573Z",
"inputs": [
{
"name": "dataset inputs"
}
],
"outputs": [
{
"data": "steps.7.produce",
"name": "output predictions"
}
],
"steps": [
{
"type": "PRIMITIVE",
"primitive": {
"id": "f31f8c1f-d1c5-43e5-a4b2-2ae4a761ef2e",
"version": "0.2.0",
"python_path": "d3m.primitives.data_transformation.denormalize.Common",
"name": "Denormalize datasets",
"digest": "be65aa8b920356392be6ecf2a3244c06d87d8611a2379e0bbba6ad92b4b5ffb9"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "inputs.0"
}
},
"outputs": [
{
"id": "produce"
}
]
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65",
"version": "0.3.0",
"python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common",
"name": "Extract a DataFrame from a Dataset"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.0.produce"
}
},
"outputs": [
{
"id": "produce"
}
]
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "d510cb7a-1782-4f51-b44c-58f0236e47c7",
"version": "0.5.0",
"python_path": "d3m.primitives.data_transformation.column_parser.DataFrameCommon",
"name": "Parses strings into their types"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.1.produce"
}
},
"outputs": [
{
"id": "produce"
}
]
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1",
"version": "0.2.0",
"python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.DataFrameCommon",
"name": "Extracts columns by semantic type"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.2.produce"
}