Commit 0c4b96e2 authored by Mitar's avatar Mitar

Merge branch 'common-primitives' into 'master'

Updating common primitives

See merge request !21
parents ba6200c6 49e351b7
......@@ -17,7 +17,7 @@
},
{
"type": "PIP",
"package_uri": "git+https://gitlab.com/datadrivendiscovery/common-primitives.git@4aff2f44ec6f9e574a30f0d474fe9cb5113c1858#egg=common-primitives"
"package_uri": "git+https://gitlab.com/datadrivendiscovery/common-primitives.git@e3fee9ca42448f809a4bc4ff23f0f28b054e1752#egg=common-primitives"
}
],
"name": "common_primitives.BayesianLogisticRegression",
......@@ -282,5 +282,5 @@
},
"structural_type": "common_primitives.logistic_regression.BayesianLogisticRegression",
"description": "Example of a primitive wrapping logistic regression using PyMC3 and its\nTheano backend.\n\nAttributes\n----------\nmetadata : PrimitiveMetadata\n Primitive's metadata. Available as a class attribute.\nlogger : Logger\n Primitive's logger. Available as a class attribute.\nhyperparams : Hyperparams\n Hyperparams passed to the constructor.\nrandom_seed : int\n Random seed passed to the constructor.\ndocker_containers : Dict[str, DockerContainer]\n A dict mapping Docker image keys from primitive's metadata to (named) tuples containing\n container's address under which the container is accessible by the primitive, and a\n dict mapping exposed ports to ports on that address.\nvolumes : Dict[str, str]\n A dict mapping volume keys from primitive's metadata to file and directory paths\n where downloaded and extracted files are available to the primitive.\ntemporary_directory : str\n An absolute path to a temporary directory a primitive can use to store any files\n for the duration of the current pipeline run phase. Directory is automatically\n cleaned up after the current pipeline run phase finishes.",
"digest": "279553b710fe97ccdb0ef8438a3f09f9e5422f41e1f43df45e3251c552134b99"
"digest": "30c543453b3c5050793876db7506a7d94438ef9ef4d69d3380f4b3b65ea26ce9"
}
{
"context": "TESTING",
"created": "2019-02-12T01:09:44.343543Z",
"id": "d2473bbc-7839-4deb-9ba4-4ff4bc9b0bde",
"inputs": [
{
"name": "inputs"
}
],
"outputs": [
{
"data": "steps.7.produce",
"name": "output predictions"
}
],
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json",
"steps": [
{
"arguments": {
"inputs": {
"data": "inputs.0",
"type": "CONTAINER"
}
},
"outputs": [
{
"id": "produce"
}
],
"primitive": {
"id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65",
"name": "Extract a DataFrame from a Dataset",
"python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common",
"version": "0.3.0"
},
"type": "PRIMITIVE"
},
{
"arguments": {
"inputs": {
"data": "steps.0.produce",
"type": "CONTAINER"
}
},
"hyperparams": {
"parse_semantic_types": {
"data": [
"http://schema.org/Boolean",
"http://schema.org/Integer",
"http://schema.org/Float",
"https://metadata.datadrivendiscovery.org/types/FloatVector",
"http://schema.org/DateTime"
],
"type": "VALUE"
}
},
"outputs": [
{
"id": "produce"
}
],
"primitive": {
"id": "d510cb7a-1782-4f51-b44c-58f0236e47c7",
"name": "Parses strings into their types",
"python_path": "d3m.primitives.data_transformation.column_parser.DataFrameCommon",
"version": "0.5.0"
},
"type": "PRIMITIVE"
},
{
"arguments": {
"inputs": {
"data": "steps.1.produce",
"type": "CONTAINER"
}
},
"hyperparams": {
"semantic_types": {
"data": [
"https://metadata.datadrivendiscovery.org/types/CategoricalData"
],
"type": "VALUE"
}
},
"outputs": [
{
"id": "produce"
}
],
"primitive": {
"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1",
"name": "Extracts columns by semantic type",
"python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.DataFrameCommon",
"version": "0.2.0"
},
"type": "PRIMITIVE"
},
{
"arguments": {
"inputs": {
"data": "steps.1.produce",
"type": "CONTAINER"
}
},
"hyperparams": {
"exclude_columns": {
"data": [
0
],
"type": "VALUE"
},
"semantic_types": {
"data": [
"http://schema.org/Integer",
"http://schema.org/Float"
],
"type": "VALUE"
}
},
"outputs": [
{
"id": "produce"
}
],
"primitive": {
"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1",
"name": "Extracts columns by semantic type",
"python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.DataFrameCommon",
"version": "0.2.0"
},
"type": "PRIMITIVE"
},
{
"arguments": {
"inputs": {
"data": "steps.0.produce",
"type": "CONTAINER"
}
},
"hyperparams": {
"semantic_types": {
"data": [
"https://metadata.datadrivendiscovery.org/types/TrueTarget"
],
"type": "VALUE"
}
},
"outputs": [
{
"id": "produce"
}
],
"primitive": {
"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1",
"name": "Extracts columns by semantic type",
"python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.DataFrameCommon",
"version": "0.2.0"
},
"type": "PRIMITIVE"
},
{
"arguments": {
"inputs": {
"data": "steps.3.produce",
"type": "CONTAINER"
}
},
"hyperparams": {
"return_result": {
"data": "replace",
"type": "VALUE"
},
"use_semantic_types": {
"data": true,
"type": "VALUE"
}
},
"outputs": [
{
"id": "produce"
}
],
"primitive": {
"id": "d016df89-de62-3c53-87ed-c06bb6a23cde",
"name": "sklearn.impute.SimpleImputer",
"python_path": "d3m.primitives.data_cleaning.imputer.SKlearn",
"version": "2019.4.4"
},
"type": "PRIMITIVE"
},
{
"arguments": {
"inputs": {
"data": "steps.5.produce",
"type": "CONTAINER"
},
"outputs": {
"data": "steps.4.produce",
"type": "CONTAINER"
}
},
"hyperparams": {
"return_result": {
"data": "replace",
"type": "VALUE"
}
},
"outputs": [
{
"id": "produce"
}
],
"primitive": {
"id": "259aa747-795c-435e-8e33-8c32a4c83c6b",
"name": "LightGBM GBTree classifier",
"python_path": "d3m.primitives.classification.light_gbm.DataFrameCommon",
"version": "0.1.0"
},
"type": "PRIMITIVE"
},
{
"arguments": {
"inputs": {
"data": "steps.6.produce",
"type": "CONTAINER"
},
"reference": {
"data": "steps.1.produce",
"type": "CONTAINER"
}
},
"outputs": [
{
"id": "produce"
}
],
"primitive": {
"id": "8d38b340-f83f-4877-baaa-162f8e551736",
"name": "Construct pipeline predictions output",
"python_path": "d3m.primitives.data_transformation.construct_predictions.DataFrameCommon",
"version": "0.3.0"
},
"type": "PRIMITIVE"
}
]
}
{
"problem": "185_baseball_problem",
"full_inputs": ["185_baseball_dataset"],
"train_inputs": ["185_baseball_dataset_TRAIN"],
"test_inputs": ["185_baseball_dataset_TEST"],
"score_inputs": ["185_baseball_dataset_SCORE"]
}
......@@ -18,7 +18,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://gitlab.com/datadrivendiscovery/common-primitives.git@4aff2f44ec6f9e574a30f0d474fe9cb5113c1858#egg=common-primitives"
"package_uri": "git+https://gitlab.com/datadrivendiscovery/common-primitives.git@e3fee9ca42448f809a4bc4ff23f0f28b054e1752#egg=common-primitives"
}
],
"algorithm_types": [
......@@ -594,5 +594,5 @@
},
"structural_type": "common_primitives.lgbm_classifier.LightGBMClassifierPrimitive",
"description": "A lightGBM classifier using ``lgbm.LGBMClassifier``.\n\nIt uses semantic types to determine which columns to operate on.\n\nAttributes\n----------\nmetadata : PrimitiveMetadata\n Primitive's metadata. Available as a class attribute.\nlogger : Logger\n Primitive's logger. Available as a class attribute.\nhyperparams : Hyperparams\n Hyperparams passed to the constructor.\nrandom_seed : int\n Random seed passed to the constructor.\ndocker_containers : Dict[str, DockerContainer]\n A dict mapping Docker image keys from primitive's metadata to (named) tuples containing\n container's address under which the container is accessible by the primitive, and a\n dict mapping exposed ports to ports on that address.\nvolumes : Dict[str, str]\n A dict mapping volume keys from primitive's metadata to file and directory paths\n where downloaded and extracted files are available to the primitive.\ntemporary_directory : str\n An absolute path to a temporary directory a primitive can use to store any files\n for the duration of the current pipeline run phase. Directory is automatically\n cleaned up after the current pipeline run phase finishes.",
"digest": "02af663b7acc375c86147ec0fdbaf8021aafffab894be44b4a31147b340dfa5a"
"digest": "0939bd0f51f2eba9696f652220a894fec25a0ea4d008442fdd45eecdee0de011"
}
......@@ -18,7 +18,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://gitlab.com/datadrivendiscovery/common-primitives.git@4aff2f44ec6f9e574a30f0d474fe9cb5113c1858#egg=common-primitives"
"package_uri": "git+https://gitlab.com/datadrivendiscovery/common-primitives.git@e3fee9ca42448f809a4bc4ff23f0f28b054e1752#egg=common-primitives"
}
],
"algorithm_types": [
......@@ -628,5 +628,5 @@
},
"structural_type": "common_primitives.random_forest.RandomForestClassifierPrimitive",
"description": "A random forest classifier using ``sklearn.ensemble.forest.RandomForestClassifier``.\n\nIt uses semantic types to determine which columns to operate on.\n\nAttributes\n----------\nmetadata : PrimitiveMetadata\n Primitive's metadata. Available as a class attribute.\nlogger : Logger\n Primitive's logger. Available as a class attribute.\nhyperparams : Hyperparams\n Hyperparams passed to the constructor.\nrandom_seed : int\n Random seed passed to the constructor.\ndocker_containers : Dict[str, DockerContainer]\n A dict mapping Docker image keys from primitive's metadata to (named) tuples containing\n container's address under which the container is accessible by the primitive, and a\n dict mapping exposed ports to ports on that address.\nvolumes : Dict[str, str]\n A dict mapping volume keys from primitive's metadata to file and directory paths\n where downloaded and extracted files are available to the primitive.\ntemporary_directory : str\n An absolute path to a temporary directory a primitive can use to store any files\n for the duration of the current pipeline run phase. Directory is automatically\n cleaned up after the current pipeline run phase finishes.",
"digest": "714518bb1dfc3fdf08aa4c12f2822144383fa4a3fc6e4dd84ea18a12329109fb"
"digest": "1ca872750c97c86c1faed9d7f003e331c99d472ab637ea6e00a25eb9bcecd254"
}
{
"context": "TESTING",
"created": "2019-02-12T01:33:29.921236Z",
"id": "b7a24816-2518-4073-9c45-b97f2b2fee30",
"inputs": [
{
"name": "inputs"
}
],
"outputs": [
{
"data": "steps.7.produce",
"name": "output predictions"
}
],
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json",
"steps": [
{
"arguments": {
"inputs": {
"data": "inputs.0",
"type": "CONTAINER"
}
},
"outputs": [
{
"id": "produce"
}
],
"primitive": {
"id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65",
"name": "Extract a DataFrame from a Dataset",
"python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common",
"version": "0.3.0"
},
"type": "PRIMITIVE"
},
{
"arguments": {
"inputs": {
"data": "steps.0.produce",
"type": "CONTAINER"
}
},
"hyperparams": {
"parse_semantic_types": {
"data": [
"http://schema.org/Boolean",
"http://schema.org/Integer",
"http://schema.org/Float",
"https://metadata.datadrivendiscovery.org/types/FloatVector",
"http://schema.org/DateTime"
],
"type": "VALUE"
}
},
"outputs": [
{
"id": "produce"
}
],
"primitive": {
"id": "d510cb7a-1782-4f51-b44c-58f0236e47c7",
"name": "Parses strings into their types",
"python_path": "d3m.primitives.data_transformation.column_parser.DataFrameCommon",
"version": "0.5.0"
},
"type": "PRIMITIVE"
},
{
"arguments": {
"inputs": {
"data": "steps.1.produce",
"type": "CONTAINER"
}
},
"hyperparams": {
"semantic_types": {
"data": [
"https://metadata.datadrivendiscovery.org/types/CategoricalData"
],
"type": "VALUE"
}
},
"outputs": [
{
"id": "produce"
}
],
"primitive": {
"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1",
"name": "Extracts columns by semantic type",
"python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.DataFrameCommon",
"version": "0.2.0"
},
"type": "PRIMITIVE"
},
{
"arguments": {
"inputs": {
"data": "steps.1.produce",
"type": "CONTAINER"
}
},
"hyperparams": {
"exclude_columns": {
"data": [
0
],
"type": "VALUE"
},
"semantic_types": {
"data": [
"http://schema.org/Integer",
"http://schema.org/Float"
],
"type": "VALUE"
}
},
"outputs": [
{
"id": "produce"
}
],
"primitive": {
"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1",
"name": "Extracts columns by semantic type",
"python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.DataFrameCommon",
"version": "0.2.0"
},
"type": "PRIMITIVE"
},
{
"arguments": {
"inputs": {
"data": "steps.0.produce",
"type": "CONTAINER"
}
},
"hyperparams": {
"semantic_types": {
"data": [
"https://metadata.datadrivendiscovery.org/types/TrueTarget"
],
"type": "VALUE"
}
},
"outputs": [
{
"id": "produce"
}
],
"primitive": {
"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1",
"name": "Extracts columns by semantic type",
"python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.DataFrameCommon",
"version": "0.2.0"
},
"type": "PRIMITIVE"
},
{
"arguments": {
"inputs": {
"data": "steps.3.produce",
"type": "CONTAINER"
}
},
"hyperparams": {
"return_result": {
"data": "replace",
"type": "VALUE"
},
"use_semantic_types": {
"data": true,
"type": "VALUE"
}
},
"outputs": [
{
"id": "produce"
}
],
"primitive": {
"id": "d016df89-de62-3c53-87ed-c06bb6a23cde",
"name": "sklearn.impute.SimpleImputer",
"python_path": "d3m.primitives.data_cleaning.imputer.SKlearn",
"version": "2019.4.4"
},
"type": "PRIMITIVE"
},
{
"arguments": {
"inputs": {
"data": "steps.5.produce",
"type": "CONTAINER"
},
"outputs": {
"data": "steps.4.produce",
"type": "CONTAINER"
}
},
"hyperparams": {
"return_result": {
"data": "replace",
"type": "VALUE"
}
},
"outputs": [
{
"id": "produce"
}
],
"primitive": {
"id": "7476950e-4373-4cf5-a852-7e16afb8e098",
"name": "XGBoost DART classifier",
"python_path": "d3m.primitives.classification.xgboost_dart.DataFrameCommon",
"version": "0.1.0"
},
"type": "PRIMITIVE"
},
{
"arguments": {
"inputs": {
"data": "steps.6.produce",
"type": "CONTAINER"
},
"reference": {
"data": "steps.1.produce",
"type": "CONTAINER"
}
},
"outputs": [
{
"id": "produce"
}
],
"primitive": {
"id": "8d38b340-f83f-4877-baaa-162f8e551736",
"name": "Construct pipeline predictions output",
"python_path": "d3m.primitives.data_transformation.construct_predictions.DataFrameCommon",
"version": "0.3.0"
},
"type": "PRIMITIVE"
}
]
}
{
"problem": "185_baseball_problem",
"full_inputs": ["185_baseball_dataset"],
"train_inputs": ["185_baseball_dataset_TRAIN"],
"test_inputs": ["185_baseball_dataset_TEST"],
"score_inputs": ["185_baseball_dataset_SCORE"]
}
......@@ -18,7 +18,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://gitlab.com/datadrivendiscovery/common-primitives.git@4aff2f44ec6f9e574a30f0d474fe9cb5113c1858#egg=common-primitives"
"package_uri": "git+https://gitlab.com/datadrivendiscovery/common-primitives.git@e3fee9ca42448f809a4bc4ff23f0f28b054e1752#egg=common-primitives"
}
],
"algorithm_types": [
......@@ -636,5 +636,5 @@
},
"structural_type": "common_primitives.xgboost_dart.XGBoostDartClassifierPrimitive",
"description": "A XGBoost classifier using ``xgb.XGBoostClassifier`` with Dart Boosting type.\n\nIt uses semantic types to determine which columns to operate on.\n\nAttributes\n----------\nmetadata : PrimitiveMetadata\n Primitive's metadata. Available as a class attribute.\nlogger : Logger\n Primitive's logger. Available as a class attribute.\nhyperparams : Hyperparams\n Hyperparams passed to the constructor.\nrandom_seed : int\n Random seed passed to the constructor.\ndocker_containers : Dict[str, DockerContainer]\n A dict mapping Docker image keys from primitive's metadata to (named) tuples containing\n container's address under which the container is accessible by the primitive, and a\n dict mapping exposed ports to ports on that address.\nvolumes : Dict[str, str]\n A dict mapping volume keys from primitive's metadata to file and directory paths\n where downloaded and extracted files are available to the primitive.\ntemporary_directory : str\n An absolute path to a temporary directory a primitive can use to store any files\n for the duration of the current pipeline run phase. Directory is automatically\n cleaned up after the current pipeline run phase finishes.",
"digest": "f8a4dbca6ddac1847ef0a74d2ad47e4a02a7c7e3980ef60a3f083ae255caf810"
"digest": "6e09696a85a499a9d5bf1496765905a8f6508ad1f4cc7efe50ce204a46cc5dc5"
}