Commit 7741b751 authored by Donghan Wang's avatar Donghan Wang

NLP primitives now properly handle metadata

* add pipeline example for 27_wordLevels_problem for LDA
parent 80880915
......@@ -24,7 +24,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/autonlab/fastlvm.git@6f59a1ace60912dfb97a90d65c13234853adb413#egg=fastlvm"
"package_uri": "git+https://github.com/autonlab/fastlvm.git@b81edbb36a15e5c969498fac5dfd2abf336ee2ba#egg=fastlvm"
}
],
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/primitive.json",
......@@ -216,5 +216,5 @@
}
},
"structural_type": "fastlvm.covertree_classifier.CoverTreeClassifier",
"digest": "00cf758b4ed4eb3f75e2d6448b24c1301982b483f2a5cf33ee3b2da25a979c0c"
"digest": "f6576887c775d9a4f5775f892fbb10cff21ee7045593ce472134626274f4214f"
}
......@@ -23,7 +23,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/autonlab/fastlvm.git@6f59a1ace60912dfb97a90d65c13234853adb413#egg=fastlvm"
"package_uri": "git+https://github.com/autonlab/fastlvm.git@b81edbb36a15e5c969498fac5dfd2abf336ee2ba#egg=fastlvm"
}
],
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/primitive.json",
......@@ -240,5 +240,5 @@
}
},
"structural_type": "fastlvm.gmm.GMM",
"digest": "6baf0ca5b077c13dd78a696271a924c41e07a93ff7a511ee259181877bd16783"
"digest": "b31cbfa5cf56b62c801b4b8612defa25717283012e666353f1b21a054c5ffb1d"
}
......@@ -23,7 +23,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/autonlab/fastlvm.git@6f59a1ace60912dfb97a90d65c13234853adb413#egg=fastlvm"
"package_uri": "git+https://github.com/autonlab/fastlvm.git@b81edbb36a15e5c969498fac5dfd2abf336ee2ba#egg=fastlvm"
}
],
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/primitive.json",
......@@ -240,5 +240,5 @@
}
},
"structural_type": "fastlvm.kmeans.KMeans",
"digest": "71877e27c75760ed6f4218d3256c74d63a1bdc594d6f9d9921fed7d64e2fd7ac"
"digest": "1087fa89866e8cb510798121b106ca63b47946951e615b3fbf99833c1ac50d73"
}
......@@ -24,7 +24,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/autonlab/fastlvm.git@6f59a1ace60912dfb97a90d65c13234853adb413#egg=fastlvm"
"package_uri": "git+https://github.com/autonlab/fastlvm.git@b81edbb36a15e5c969498fac5dfd2abf336ee2ba#egg=fastlvm"
}
],
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/primitive.json",
......@@ -315,5 +315,5 @@
}
},
"structural_type": "fastlvm.glda.GLDA",
"digest": "d3751cd8e9ecfbd24ac5ccedf3abb1deb18d32556c6470165271332640168bb9"
"digest": "620cb359e8981a1cecd0cc38ef7bcca4465f9d1351fc0c48cdc5418d7c7f5d3d"
}
......@@ -25,7 +25,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/autonlab/fastlvm.git@6f59a1ace60912dfb97a90d65c13234853adb413#egg=fastlvm"
"package_uri": "git+https://github.com/autonlab/fastlvm.git@b81edbb36a15e5c969498fac5dfd2abf336ee2ba#egg=fastlvm"
}
],
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/primitive.json",
......@@ -280,5 +280,5 @@
}
},
"structural_type": "fastlvm.hdp.HDP",
"digest": "b040c8f4c2129c9f6dc62d46ca8e3620bfd1d2e2de2e0a1bc3f3465c7320a391"
"digest": "20f31e26194aea2b00ac546a86df02f8eb991c5188741396316dbaa864d55deb"
}
{
"id": "ef8aff40-bec3-48bc-8de0-86074b4c97be",
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json",
"created": "2019-05-28T19:28:09.970595Z",
"inputs": [
{
"name": "dataset inputs"
}
],
"outputs": [
{
"data": "steps.7.produce",
"name": "output predictions"
}
],
"steps": [
{
"type": "PRIMITIVE",
"primitive": {
"id": "f31f8c1f-d1c5-43e5-a4b2-2ae4a761ef2e",
"version": "0.2.0",
"python_path": "d3m.primitives.data_transformation.denormalize.Common",
"name": "Denormalize datasets",
"digest": "be65aa8b920356392be6ecf2a3244c06d87d8611a2379e0bbba6ad92b4b5ffb9"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "inputs.0"
}
},
"outputs": [
{
"id": "produce"
}
]
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65",
"version": "0.3.0",
"python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common",
"name": "Extract a DataFrame from a Dataset",
"digest": "b8251591d8f5f03e1c4d3ec740016c386ad6c20216cfd7d7dd9b8b2e3ab0595f"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.0.produce"
}
},
"outputs": [
{
"id": "produce"
}
]
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "d510cb7a-1782-4f51-b44c-58f0236e47c7",
"version": "0.5.0",
"python_path": "d3m.primitives.data_transformation.column_parser.DataFrameCommon",
"name": "Parses strings into their types",
"digest": "d268f582f5bf30206f1e43ef46f435576a30413132bc73279f3fca36937184eb"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.1.produce"
}
},
"outputs": [
{
"id": "produce"
}
]
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1",
"version": "0.2.0",
"python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.DataFrameCommon",
"name": "Extracts columns by semantic type",
"digest": "a0d61e03605d9e487c29de5d88a1c7f32d9aef8e23a413b794dad2f4da8ee836"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.2.produce"
}
},
"outputs": [
{
"id": "produce"
}
],
"hyperparams": {
"semantic_types": {
"type": "VALUE",
"data": [
"https://metadata.datadrivendiscovery.org/types/Attribute"
]
}
}
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1",
"version": "0.2.0",
"python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.DataFrameCommon",
"name": "Extracts columns by semantic type",
"digest": "a0d61e03605d9e487c29de5d88a1c7f32d9aef8e23a413b794dad2f4da8ee836"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.2.produce"
}
},
"outputs": [
{
"id": "produce"
}
],
"hyperparams": {
"semantic_types": {
"type": "VALUE",
"data": [
"https://metadata.datadrivendiscovery.org/types/TrueTarget"
]
}
}
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "f410b951-1cb6-481c-8d95-2d97b31d411d",
"version": "3.0.1",
"python_path": "d3m.primitives.natural_language_processing.lda.Fastlvm",
"name": "Latent Dirichlet Allocation Topic Modelling",
"digest": "84c9d229af02645fc1dceac2e96a3923195a150b4f32a5ca48bf06fdf6fbe274"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.3.produce"
}
},
"outputs": [
{
"id": "produce"
}
],
"hyperparams": {
"k": {
"type": "VALUE",
"data": 100
},
"iters": {
"type": "VALUE",
"data": 100
},
"frac": {
"type": "VALUE",
"data": 0.001
}
}
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "01d2c086-91bf-3ca5-b023-5139cf239c77",
"version": "2019.4.4",
"python_path": "d3m.primitives.classification.gradient_boosting.SKlearn",
"name": "sklearn.ensemble.gradient_boosting.GradientBoostingClassifier",
"digest": "28c1e3094bddacbb334ed96593eb2ee9535e9f3fdd688c3c3673d3e08cf8d565"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.5.produce"
},
"outputs": {
"type": "CONTAINER",
"data": "steps.4.produce"
}
},
"outputs": [
{
"id": "produce"
}
]
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "8d38b340-f83f-4877-baaa-162f8e551736",
"version": "0.3.0",
"python_path": "d3m.primitives.data_transformation.construct_predictions.DataFrameCommon",
"name": "Construct pipeline predictions output",
"digest": "1fb337b1987ecedebfa35bbd80ac6125f4a7f71435b02d3b349ef5547588bb71"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.6.produce"
},
"reference": {
"type": "CONTAINER",
"data": "steps.1.produce"
}
},
"outputs": [
{
"id": "produce"
}
]
}
],
"digest": "f5b02388f119932616f00f9ee97d2f6dafbf9e58f163fd4c1b0f9822aeb16e87"
}
\ No newline at end of file
{
"problem": "27_wordLevels_problem",
"full_inputs": [
"27_wordLevels_dataset"
],
"train_inputs": [
"27_wordLevels_dataset_TRAIN"
],
"test_inputs": [
"27_wordLevels_dataset_TEST"
],
"score_inputs": [
"27_wordLevels_dataset_SCORE"
]
}
\ No newline at end of file
......@@ -24,7 +24,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/autonlab/fastlvm.git@6f59a1ace60912dfb97a90d65c13234853adb413#egg=fastlvm"
"package_uri": "git+https://github.com/autonlab/fastlvm.git@b81edbb36a15e5c969498fac5dfd2abf336ee2ba#egg=fastlvm"
}
],
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/primitive.json",
......@@ -279,5 +279,5 @@
}
},
"structural_type": "fastlvm.lda.LDA",
"digest": "397be841c8ea971992c039576e82c0efaa9e0df742a6d81d65cc4d1da0a92ac0"
"digest": "0546ed8258577d4ef14cb94018219bc8ffeb310ecffd39598333384f009bc225"
}
......@@ -24,7 +24,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/autonlab/fastlvm.git@6f59a1ace60912dfb97a90d65c13234853adb413#egg=fastlvm"
"package_uri": "git+https://github.com/autonlab/fastlvm.git@b81edbb36a15e5c969498fac5dfd2abf336ee2ba#egg=fastlvm"
}
],
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/primitive.json",
......@@ -216,5 +216,5 @@
}
},
"structural_type": "fastlvm.covertree_regressor.CoverTreeRegressor",
"digest": "8b37f6547091f8f87eecf651489460e6d59d8846666b8ff7b431a854440d2f05"
"digest": "ad336718a94f565483f63dacf0e2841259c5121bbe993ea75369868f9af3c6ce"
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment