Commit 68a62635 authored by Donghan Wang's avatar Donghan Wang

add a pipeline for LDA

parent 52354188
{
"id": "8437db04-3913-4a34-aa19-7caa6f32a867",
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json",
"created": "2019-06-20T17:42:53.731233Z",
"inputs": [
{
"name": "dataset inputs"
}
],
"outputs": [
{
"data": "steps.8.produce",
"name": "output predictions"
}
],
"steps": [
{
"type": "PRIMITIVE",
"primitive": {
"id": "f31f8c1f-d1c5-43e5-a4b2-2ae4a761ef2e",
"version": "0.2.0",
"python_path": "d3m.primitives.data_transformation.denormalize.Common",
"name": "Denormalize datasets",
"digest": "60f854a4af97bc2310080d8ef22073cd8716e70cdaf3e426045aac01db7c1cd7"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "inputs.0"
}
},
"outputs": [
{
"id": "produce"
}
]
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65",
"version": "0.3.0",
"python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common",
"name": "Extract a DataFrame from a Dataset",
"digest": "45f8322097914f9c95c4f9a8224d02db5d79b7166c74115e2eea7b23ccc13510"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.0.produce"
}
},
"outputs": [
{
"id": "produce"
}
]
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "d510cb7a-1782-4f51-b44c-58f0236e47c7",
"version": "0.5.0",
"python_path": "d3m.primitives.data_transformation.column_parser.DataFrameCommon",
"name": "Parses strings into their types",
"digest": "d41ad0c56ef55a233b21f4a4d8df1ac782aca7a78ef98dbfb72215690b3e9850"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.1.produce"
}
},
"outputs": [
{
"id": "produce"
}
]
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1",
"version": "0.3.0",
"python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.DataFrameCommon",
"name": "Extracts columns by semantic type",
"digest": "e91e0f7569ad53b6d4b8c01641f80fb0aa764b5dd3ae71dd2fbb433fa62c7f81"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.2.produce"
}
},
"outputs": [
{
"id": "produce"
}
],
"hyperparams": {
"semantic_types": {
"type": "VALUE",
"data": [
"https://metadata.datadrivendiscovery.org/types/Attribute"
]
}
}
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1",
"version": "0.3.0",
"python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.DataFrameCommon",
"name": "Extracts columns by semantic type",
"digest": "e91e0f7569ad53b6d4b8c01641f80fb0aa764b5dd3ae71dd2fbb433fa62c7f81"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.2.produce"
}
},
"outputs": [
{
"id": "produce"
}
],
"hyperparams": {
"semantic_types": {
"type": "VALUE",
"data": [
"https://metadata.datadrivendiscovery.org/types/TrueTarget"
]
}
}
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "0b21fcca-8b35-457d-a65d-36294c6f80a2",
"version": "0.1.0",
"python_path": "d3m.primitives.data_preprocessing.text_reader.DataFrameCommon",
"name": "Columns text reader",
"digest": "6fb4aa17c37c60379e5e773e1c6423acfe50d05d0dcf0e85594520efaec5992e"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.3.produce"
}
},
"outputs": [
{
"id": "produce"
}
],
"hyperparams": {
"return_result": {
"type": "VALUE",
"data": "replace"
}
}
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "f410b951-1cb6-481c-8d95-2d97b31d411d",
"version": "3.0.1",
"python_path": "d3m.primitives.natural_language_processing.lda.Fastlvm",
"name": "Latent Dirichlet Allocation Topic Modelling",
"digest": "39c99a619277e94a465e579574a5002dd0ee7cc996b8f0fe7a2aa063881b42b7"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.5.produce"
}
},
"outputs": [
{
"id": "produce"
}
],
"hyperparams": {
"k": {
"type": "VALUE",
"data": 100
},
"iters": {
"type": "VALUE",
"data": 300
},
"frac": {
"type": "VALUE",
"data": 0.001
}
}
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "1dd82833-5692-39cb-84fb-2455683075f3",
"version": "2019.6.7",
"python_path": "d3m.primitives.classification.random_forest.SKlearn",
"name": "sklearn.ensemble.forest.RandomForestClassifier",
"digest": "d8571a6f70248ceba46c2231a0afeaebaced127bdd7b0ebf087367461c0ad09d"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.6.produce"
},
"outputs": {
"type": "CONTAINER",
"data": "steps.4.produce"
}
},
"outputs": [
{
"id": "produce"
}
],
"hyperparams": {
"n_estimators": {
"type": "VALUE",
"data": 100
},
"n_jobs": {
"type": "VALUE",
"data": {
"case": "all_cores",
"value": -1
}
}
}
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "8d38b340-f83f-4877-baaa-162f8e551736",
"version": "0.3.0",
"python_path": "d3m.primitives.data_transformation.construct_predictions.DataFrameCommon",
"name": "Construct pipeline predictions output",
"digest": "53087aaa6baf0ccc96b6525ca5b79fd4e51b4cce996ab39773a9b0b3e746bf05"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.7.produce"
},
"reference": {
"type": "CONTAINER",
"data": "steps.1.produce"
}
},
"outputs": [
{
"id": "produce"
}
]
}
],
"digest": "3599cf3c246aad664100a6703b1c9ec1f7b44f8b1c12518659f21d3f3181f1d3"
}
\ No newline at end of file
{
"problem": "LL1_TXT_CLS_airline_opinion_problem",
"full_inputs": [
"LL1_TXT_CLS_airline_opinion_dataset"
],
"train_inputs": [
"LL1_TXT_CLS_airline_opinion_dataset_TRAIN"
],
"test_inputs": [
"LL1_TXT_CLS_airline_opinion_dataset_TEST"
],
"score_inputs": [
"LL1_TXT_CLS_airline_opinion_dataset_SCORE"
]
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment