Commit 378ac767 authored by Mitar's avatar Mitar
Browse files

Merge branch 'byu-dml' into 'master'

Use MIN_METADATA datasets with primitives

See merge request !158
parents d5ec1a3b 3ac8649c
Pipeline #111853853 passed with stages
in 113 minutes and 39 seconds
{
"id": "168d3fbf-a3fe-456a-93a3-d2720ef8cb42",
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json",
"created": "2020-01-23T21:11:30.121518Z",
"inputs": [
{
"name": "inputs"
}
],
"outputs": [
{
"data": "steps.7.produce",
"name": "predictions"
}
],
"steps": [
{
"type": "PRIMITIVE",
"primitive": {
"id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65",
"version": "0.3.0",
"python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common",
"name": "Extract a DataFrame from a Dataset",
"digest": "422744651afd5995d029a227a1dd7b1696038816b7eb9601f37d661757812aee"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "inputs.0"
}
},
"outputs": [
{
"id": "produce"
}
]
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "e193afa1-b45e-4d29-918f-5bb1fa3b88a7",
"version": "0.2.0",
"python_path": "d3m.primitives.schema_discovery.profiler.Common",
"name": "Determine missing semantic types for columns automatically",
"digest": "8b12a9aececdc5b7a4d5ef47cd04cda75592fd24f49922776b614d4bbeeb97f1"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.0.produce"
}
},
"outputs": [
{
"id": "produce"
}
]
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "d510cb7a-1782-4f51-b44c-58f0236e47c7",
"version": "0.6.0",
"python_path": "d3m.primitives.data_transformation.column_parser.Common",
"name": "Parses strings into their types",
"digest": "f1215fe3351a2e8bf495f9ed5ed50e88b30d78d4d7ebd9e13bb544e63a10994b"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.1.produce"
}
},
"outputs": [
{
"id": "produce"
}
]
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1",
"version": "0.3.0",
"python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common",
"name": "Extracts columns by semantic type",
"digest": "30cceb9812b430d6550d54766b4f674b68b92531fc2ad63f56818ea002399c13"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.2.produce"
}
},
"outputs": [
{
"id": "produce"
}
],
"hyperparams": {
"semantic_types": {
"type": "VALUE",
"data": [
"https://metadata.datadrivendiscovery.org/types/Attribute"
]
}
}
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1",
"version": "0.3.0",
"python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common",
"name": "Extracts columns by semantic type",
"digest": "30cceb9812b430d6550d54766b4f674b68b92531fc2ad63f56818ea002399c13"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.2.produce"
}
},
"outputs": [
{
"id": "produce"
}
],
"hyperparams": {
"semantic_types": {
"type": "VALUE",
"data": [
"https://metadata.datadrivendiscovery.org/types/TrueTarget"
]
}
}
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "ebfeb6f0-e366-4082-b1a7-602fd50acc96",
"version": "0.2.0",
"python_path": "d3m.primitives.data_preprocessing.random_sampling_imputer.BYU",
"name": "Random Sampling Imputer",
"digest": "d7799c8b0710126a0fb39d50ac458e8458f87eeedf266d336be915960fb519d1"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.3.produce"
}
},
"outputs": [
{
"id": "produce"
}
]
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "1dd82833-5692-39cb-84fb-2455683075f3",
"version": "2019.11.13",
"python_path": "d3m.primitives.classification.random_forest.SKlearn",
"name": "sklearn.ensemble.forest.RandomForestClassifier",
"digest": "93abe4b22214ba6202c13c6a2fe5b2b4d03cdc28a78fa02b6f2759e7b125eaed"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.5.produce"
},
"outputs": {
"type": "CONTAINER",
"data": "steps.4.produce"
}
},
"outputs": [
{
"id": "produce"
}
],
"hyperparams": {
"use_semantic_types": {
"type": "VALUE",
"data": true
}
}
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "8d38b340-f83f-4877-baaa-162f8e551736",
"version": "0.3.0",
"python_path": "d3m.primitives.data_transformation.construct_predictions.Common",
"name": "Construct pipeline predictions output",
"digest": "2994d9885ddeb72f5a47a8fb04e27fe3f684adebf7c5dc27cb56392475dbb29c"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.6.produce"
},
"reference": {
"type": "CONTAINER",
"data": "steps.0.produce"
}
},
"outputs": [
{
"id": "produce"
}
]
}
],
"digest": "ee731b6b5216ce73ddc8e55d43a08ebbcdedba87698577dc7889cd524ecbdd33"
}
\ No newline at end of file
{
"id": "faeb3eb9-648f-4059-b067-791ebff47bc4",
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json",
"created": "2020-01-23T21:11:39.160318Z",
"inputs": [
{
"name": "inputs"
}
],
"outputs": [
{
"data": "steps.7.produce",
"name": "predictions"
}
],
"steps": [
{
"type": "PRIMITIVE",
"primitive": {
"id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65",
"version": "0.3.0",
"python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common",
"name": "Extract a DataFrame from a Dataset",
"digest": "422744651afd5995d029a227a1dd7b1696038816b7eb9601f37d661757812aee"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "inputs.0"
}
},
"outputs": [
{
"id": "produce"
}
]
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "e193afa1-b45e-4d29-918f-5bb1fa3b88a7",
"version": "0.2.0",
"python_path": "d3m.primitives.schema_discovery.profiler.Common",
"name": "Determine missing semantic types for columns automatically",
"digest": "8b12a9aececdc5b7a4d5ef47cd04cda75592fd24f49922776b614d4bbeeb97f1"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.0.produce"
}
},
"outputs": [
{
"id": "produce"
}
]
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "d510cb7a-1782-4f51-b44c-58f0236e47c7",
"version": "0.6.0",
"python_path": "d3m.primitives.data_transformation.column_parser.Common",
"name": "Parses strings into their types",
"digest": "f1215fe3351a2e8bf495f9ed5ed50e88b30d78d4d7ebd9e13bb544e63a10994b"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.1.produce"
}
},
"outputs": [
{
"id": "produce"
}
]
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1",
"version": "0.3.0",
"python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common",
"name": "Extracts columns by semantic type",
"digest": "30cceb9812b430d6550d54766b4f674b68b92531fc2ad63f56818ea002399c13"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.2.produce"
}
},
"outputs": [
{
"id": "produce"
}
],
"hyperparams": {
"semantic_types": {
"type": "VALUE",
"data": [
"https://metadata.datadrivendiscovery.org/types/Attribute"
]
}
}
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1",
"version": "0.3.0",
"python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common",
"name": "Extracts columns by semantic type",
"digest": "30cceb9812b430d6550d54766b4f674b68b92531fc2ad63f56818ea002399c13"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.2.produce"
}
},
"outputs": [
{
"id": "produce"
}
],
"hyperparams": {
"semantic_types": {
"type": "VALUE",
"data": [
"https://metadata.datadrivendiscovery.org/types/TrueTarget"
]
}
}
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "ebfeb6f0-e366-4082-b1a7-602fd50acc96",
"version": "0.2.0",
"python_path": "d3m.primitives.data_preprocessing.random_sampling_imputer.BYU",
"name": "Random Sampling Imputer",
"digest": "d7799c8b0710126a0fb39d50ac458e8458f87eeedf266d336be915960fb519d1"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.3.produce"
}
},
"outputs": [
{
"id": "produce"
}
]
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "f0fd7a62-09b5-3abc-93bb-f5f999f7cc80",
"version": "2019.11.13",
"python_path": "d3m.primitives.regression.random_forest.SKlearn",
"name": "sklearn.ensemble.forest.RandomForestRegressor",
"digest": "4bf2c5e6dd0cc1baedaf1f1dd601d773ebb716d569582db803908c5926d7c349"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.5.produce"
},
"outputs": {
"type": "CONTAINER",
"data": "steps.4.produce"
}
},
"outputs": [
{
"id": "produce"
}
],
"hyperparams": {
"use_semantic_types": {
"type": "VALUE",
"data": true
}
}
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "8d38b340-f83f-4877-baaa-162f8e551736",
"version": "0.3.0",
"python_path": "d3m.primitives.data_transformation.construct_predictions.Common",
"name": "Construct pipeline predictions output",
"digest": "2994d9885ddeb72f5a47a8fb04e27fe3f684adebf7c5dc27cb56392475dbb29c"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.6.produce"
},
"reference": {
"type": "CONTAINER",
"data": "steps.0.produce"
}
},
"outputs": [
{
"id": "produce"
}
]
}
],
"digest": "c0b88fbd63565bedf783ef545197a6f8caa9013ca551011d7e8e5e4bd91287f8"
}
\ No newline at end of file
{
"algorithm_types": [
"IMPUTATION"
],
"description": "This imputes missing values in a DataFrame by sampling known values from each column independently. If the training\ndata has no known values in a particular column, no values are imputed. Alternatively, columns with missing values\ncan be dropped. By default columns of all missing values are dropped.\n\nAttributes\n----------\nmetadata : PrimitiveMetadata\n Primitive's metadata. Available as a class attribute.\nlogger : Logger\n Primitive's logger. Available as a class attribute.\nhyperparams : Hyperparams\n Hyperparams passed to the constructor.\nrandom_seed : int\n Random seed passed to the constructor.\ndocker_containers : Dict[str, DockerContainer]\n A dict mapping Docker image keys from primitive's metadata to (named) tuples containing\n container's address under which the container is accessible by the primitive, and a\n dict mapping exposed ports to ports on that address.\nvolumes : Dict[str, str]\n A dict mapping volume keys from primitive's metadata to file and directory paths\n where downloaded and extracted files are available to the primitive.\ntemporary_directory : str\n An absolute path to a temporary directory a primitive can use to store any files\n for the duration of the current pipeline run phase. Directory is automatically\n cleaned up after the current pipeline run phase finishes.",
"digest": "d7799c8b0710126a0fb39d50ac458e8458f87eeedf266d336be915960fb519d1",
"effects": [
"NO_MISSING_VALUES"
],
"id": "ebfeb6f0-e366-4082-b1a7-602fd50acc96",
"installation": [
{
"package": "byudml",
"type": "PIP",
"version": "0.6.7"
}
],
"location_uris": [
"https://github.com/byu-dml/d3m-primitives/blob/master/byu_dml/imputer/random_sampling_imputer.py"
],
"name": "Random Sampling Imputer",
"original_python_path": "byudml.imputer.random_sampling_imputer.RandomSamplingImputer",
"primitive_code": {
"arguments": {
"hyperparams": {
"kind": "RUNTIME",
"type": "byudml.imputer.random_sampling_imputer.Hyperparams"
},
"inputs": {
"kind": "PIPELINE",
"type": "d3m.container.pandas.DataFrame"
},
"iterations": {
"default": null,
"kind": "RUNTIME",
"type": "typing.Union[NoneType, int]"
},
"params": {
"kind": "RUNTIME",
"type": "byudml.imputer.random_sampling_imputer.Params"
},
"produce_methods": {
"kind": "RUNTIME",
"type": "typing.Sequence[str]"
},
"random_seed": {
"default": 0,
"kind": "RUNTIME",
"type": "int"
},
"timeout": {
"default": null,
"kind": "RUNTIME",
"type": "typing.Union[NoneType, float]"
}
},
"class_attributes": {
"logger": "logging.Logger",
"metadata": "d3m.metadata.base.PrimitiveMetadata"
},
"class_methods": {},
"class_type_arguments": {
"Hyperparams": "byudml.imputer.random_sampling_imputer.Hyperparams",
"Inputs": "d3m.container.pandas.DataFrame",
"Outputs": "d3m.container.pandas.DataFrame",
"Params": "byudml.imputer.random_sampling_imputer.Params"
},
"hyperparams": {
"drop_missing_values": {
"default": true,
"description": "Determines whether to drop columns containing missing values.",
"semantic_types": [
"https://metadata.datadrivendiscovery.org/types/ControlParameter"
],
"structural_type": "bool",
"type": "d3m.metadata.hyperparams.UniformBool"
},
"how": {
"default": "all",
"description": "Determines how to drop missing values. If \"all\", drops columns where all values are missing. If \"any\", drops columns where any values are missing (note no imputation is performed).",
"semantic_types": [
"https://metadata.datadrivendiscovery.org/types/ControlParameter"
],
"structural_type": "str",
"type": "d3m.metadata.hyperparams.Enumeration",
"values": [
"all",