Commit 20668599 authored by Mitar's avatar Mitar

Merge branch 'migrate/JPL' into 'master'

Migrating JPL primitives to v2019.6.7

See merge request !180
parents 1bf3a58b 8066c56f
{
"id": "2530c430-2cf8-44b1-a7ff-b9bc2c3d4ebd",
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json",
"created": "2019-06-02T03:46:53.157173Z",
"inputs": [
{
"name": "inputs"
}
],
"outputs": [
{
"data": "steps.4.produce",
"name": "output predictions"
}
],
"steps": [
{
"type": "PRIMITIVE",
"primitive": {
"id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65",
"version": "0.3.0",
"python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common",
"name": "Extract a DataFrame from a Dataset"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "inputs.0"
}
},
"outputs": [
{
"id": "produce"
}
]
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "d510cb7a-1782-4f51-b44c-58f0236e47c7",
"version": "0.5.0",
"python_path": "d3m.primitives.data_transformation.column_parser.DataFrameCommon",
"name": "Parses strings into their types"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.0.produce"
}
},
"outputs": [
{
"id": "produce"
}
]
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "d016df89-de62-3c53-87ed-c06bb6a23cde",
"version": "2019.4.4",
"python_path": "d3m.primitives.data_cleaning.imputer.SKlearn",
"name": "sklearn.impute.SimpleImputer"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.1.produce"
}
},
"outputs": [
{
"id": "produce"
}
],
"hyperparams": {
"use_semantic_types": {
"type": "VALUE",
"data": true
},
"return_result": {
"type": "VALUE",
"data": "replace"
}
}
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "1b2a32a6-0ec5-3ca0-9386-b8b1f1b831d1",
"version": "2019.4.4",
"python_path": "d3m.primitives.classification.bagging.SKlearn",
"name": "sklearn.ensemble.bagging.BaggingClassifier"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.2.produce"
},
"outputs": {
"type": "CONTAINER",
"data": "steps.2.produce"
}
},
"outputs": [
{
"id": "produce"
}
],
"hyperparams": {
"use_semantic_types": {
"type": "VALUE",
"data": true
},
"add_index_columns": {
"type": "VALUE",
"data": true
}
}
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "8d38b340-f83f-4877-baaa-162f8e551736",
"version": "0.3.0",
"python_path": "d3m.primitives.data_transformation.construct_predictions.DataFrameCommon",
"name": "Construct pipeline predictions output"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.3.produce"
},
"reference": {
"type": "CONTAINER",
"data": "steps.0.produce"
}
},
"outputs": [
{
"id": "produce"
}
]
}
]
}
\ No newline at end of file
{
"problem": "185_baseball_problem",
"full_inputs": [
"185_baseball_dataset"
],
"train_inputs": [
"185_baseball_dataset_TRAIN"
],
"test_inputs": [
"185_baseball_dataset_TEST"
],
"score_inputs": [
"185_baseball_dataset_SCORE"
]
}
\ No newline at end of file
{
"algorithm_types": [
"ENSEMBLE_LEARNING"
],
"name": "sklearn.ensemble.bagging.BaggingClassifier",
"primitive_family": "CLASSIFICATION",
"python_path": "d3m.primitives.classification.bagging.SKlearn",
"source": {
"name": "JPL",
"contact": "mailto:[email protected]",
"uris": [
"https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues",
"https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.BaggingClassifier.html"
]
},
"version": "2019.4.4",
"id": "1b2a32a6-0ec5-3ca0-9386-b8b1f1b831d1",
"installation": [
{
"type": "PIP",
"package_uri": "git+https://gitlab.com/datadrivendiscovery/[email protected]#egg=sklearn_wrap"
}
],
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/primitive.json",
"original_python_path": "sklearn_wrap.SKBaggingClassifier.SKBaggingClassifier",
"primitive_code": {
"class_type_arguments": {
"Inputs": "d3m.container.pandas.DataFrame",
"Outputs": "d3m.container.pandas.DataFrame",
"Params": "sklearn_wrap.SKBaggingClassifier.Params",
"Hyperparams": "sklearn_wrap.SKBaggingClassifier.Hyperparams"
},
"interfaces_version": "2019.6.7",
"interfaces": [
"supervised_learning.SupervisedLearnerPrimitiveBase",
"base.PrimitiveBase",
"base.ProbabilisticCompositionalityMixin"
],
"hyperparams": {
"n_estimators": {
"type": "d3m.metadata.hyperparams.Bounded",
"default": 10,
"structural_type": "int",
"semantic_types": [
"https://metadata.datadrivendiscovery.org/types/TuningParameter"
],
"description": "The number of base estimators in the ensemble.",
"lower": 1,
"upper": null,
"lower_inclusive": true,
"upper_inclusive": false
},
"max_samples": {
"type": "d3m.metadata.hyperparams.Union",
"default": 1.0,
"structural_type": "typing.Union[float, int]",
"semantic_types": [
"https://metadata.datadrivendiscovery.org/types/TuningParameter"
],
"description": "The number of samples to draw from X to train each base estimator. - If int, then draw `max_samples` samples. - If float, then draw `max_samples * X.shape[0]` samples.",
"configuration": {
"absolute": {
"type": "d3m.metadata.hyperparams.Bounded",
"default": 0,
"structural_type": "int",
"semantic_types": [
"https://metadata.datadrivendiscovery.org/types/TuningParameter"
],
"lower": 0,
"upper": null,
"lower_inclusive": true,
"upper_inclusive": false
},
"percent": {
"type": "d3m.metadata.hyperparams.Bounded",
"default": 1.0,
"structural_type": "float",
"semantic_types": [
"https://metadata.datadrivendiscovery.org/types/TuningParameter"
],
"lower": 0,
"upper": 1,
"lower_inclusive": true,
"upper_inclusive": true
}
}
},
"max_features": {
"type": "d3m.metadata.hyperparams.Union",
"default": 1.0,
"structural_type": "typing.Union[float, int]",
"semantic_types": [
"https://metadata.datadrivendiscovery.org/types/TuningParameter"
],
"description": "The number of features to draw from X to train each base estimator. - If int, then draw `max_features` features. - If float, then draw `max_features * X.shape[1]` features.",
"configuration": {
"absolute": {
"type": "d3m.metadata.hyperparams.Bounded",
"default": 0,
"structural_type": "int",
"semantic_types": [
"https://metadata.datadrivendiscovery.org/types/TuningParameter"
],
"lower": 0,
"upper": null,
"lower_inclusive": true,
"upper_inclusive": false
},
"percent": {
"type": "d3m.metadata.hyperparams.Bounded",
"default": 1.0,
"structural_type": "float",
"semantic_types": [
"https://metadata.datadrivendiscovery.org/types/TuningParameter"
],
"lower": 0,
"upper": 1,
"lower_inclusive": true,
"upper_inclusive": true
}
}
},
"bootstrap": {
"type": "d3m.metadata.hyperparams.UniformBool",
"default": true,
"structural_type": "bool",
"semantic_types": [
"https://metadata.datadrivendiscovery.org/types/TuningParameter"
],
"description": "Whether samples are drawn with replacement."
},
"bootstrap_features": {
"type": "d3m.metadata.hyperparams.UniformBool",
"default": false,
"structural_type": "bool",
"semantic_types": [
"https://metadata.datadrivendiscovery.org/types/TuningParameter"
],
"description": "Whether features are drawn with replacement."
},
"oob_score": {
"type": "d3m.metadata.hyperparams.UniformBool",
"default": false,
"structural_type": "bool",
"semantic_types": [
"https://metadata.datadrivendiscovery.org/types/TuningParameter"
],
"description": "Whether to use out-of-bag samples to estimate the generalization error."
},
"warm_start": {
"type": "d3m.metadata.hyperparams.UniformBool",
"default": false,
"structural_type": "bool",
"semantic_types": [
"https://metadata.datadrivendiscovery.org/types/TuningParameter"
],
"description": "When set to True, reuse the solution of the previous call to fit and add more estimators to the ensemble, otherwise, just fit a whole new ensemble. .. versionadded:: 0.17 *warm_start* constructor parameter."
},
"n_jobs": {
"type": "d3m.metadata.hyperparams.Union",
"default": 1,
"structural_type": "int",
"semantic_types": [
"https://metadata.datadrivendiscovery.org/types/ResourcesUseParameter"
],
"description": "The number of jobs to run in parallel for both `fit` and `predict`. If -1, then the number of jobs is set to the number of cores.",
"configuration": {
"limit": {
"type": "d3m.metadata.hyperparams.Bounded",
"default": 1,
"structural_type": "int",
"semantic_types": [
"https://metadata.datadrivendiscovery.org/types/TuningParameter"
],
"lower": 1,
"upper": null,
"lower_inclusive": true,
"upper_inclusive": false
},
"all_cores": {
"type": "d3m.metadata.hyperparams.Constant",
"default": -1,
"structural_type": "int",
"semantic_types": [
"https://metadata.datadrivendiscovery.org/types/TuningParameter"
]
}
}
},
"use_input_columns": {
"type": "d3m.metadata.hyperparams.Set",
"default": [],
"structural_type": "typing.Sequence[int]",
"semantic_types": [
"https://metadata.datadrivendiscovery.org/types/ControlParameter"
],
"description": "A set of column indices to force primitive to use as training input. If any specified column cannot be parsed, it is skipped.",
"elements": {
"type": "d3m.metadata.hyperparams.Hyperparameter",
"default": -1,
"structural_type": "int",
"semantic_types": []
},
"is_configuration": false,
"min_size": 0
},
"use_output_columns": {
"type": "d3m.metadata.hyperparams.Set",
"default": [],
"structural_type": "typing.Sequence[int]",
"semantic_types": [
"https://metadata.datadrivendiscovery.org/types/ControlParameter"
],
"description": "A set of column indices to force primitive to use as training target. If any specified column cannot be parsed, it is skipped.",
"elements": {
"type": "d3m.metadata.hyperparams.Hyperparameter",
"default": -1,
"structural_type": "int",
"semantic_types": []
},
"is_configuration": false,
"min_size": 0
},
"exclude_input_columns": {
"type": "d3m.metadata.hyperparams.Set",
"default": [],
"structural_type": "typing.Sequence[int]",
"semantic_types": [
"https://metadata.datadrivendiscovery.org/types/ControlParameter"
],
"description": "A set of column indices to not use as training inputs. Applicable only if \"use_columns\" is not provided.",
"elements": {
"type": "d3m.metadata.hyperparams.Hyperparameter",
"default": -1,
"structural_type": "int",
"semantic_types": []
},
"is_configuration": false,
"min_size": 0
},
"exclude_output_columns": {
"type": "d3m.metadata.hyperparams.Set",
"default": [],
"structural_type": "typing.Sequence[int]",
"semantic_types": [
"https://metadata.datadrivendiscovery.org/types/ControlParameter"
],
"description": "A set of column indices to not use as training target. Applicable only if \"use_columns\" is not provided.",
"elements": {
"type": "d3m.metadata.hyperparams.Hyperparameter",
"default": -1,
"structural_type": "int",
"semantic_types": []
},
"is_configuration": false,
"min_size": 0
},
"return_result": {
"type": "d3m.metadata.hyperparams.Enumeration",
"default": "new",
"structural_type": "str",
"semantic_types": [
"https://metadata.datadrivendiscovery.org/types/ControlParameter"
],
"description": "Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.",
"values": [
"append",
"replace",
"new"
]
},
"use_semantic_types": {
"type": "d3m.metadata.hyperparams.UniformBool",
"default": false,
"structural_type": "bool",
"semantic_types": [
"https://metadata.datadrivendiscovery.org/types/ControlParameter"
],
"description": "Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe"
},
"add_index_columns": {
"type": "d3m.metadata.hyperparams.UniformBool",
"default": false,
"structural_type": "bool",
"semantic_types": [
"https://metadata.datadrivendiscovery.org/types/ControlParameter"
],
"description": "Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\"."
},
"error_on_no_input": {
"type": "d3m.metadata.hyperparams.UniformBool",
"default": true,
"structural_type": "bool",
"semantic_types": [
"https://metadata.datadrivendiscovery.org/types/ControlParameter"
],
"description": "Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False."
}
},
"arguments": {
"hyperparams": {
"type": "sklearn_wrap.SKBaggingClassifier.Hyperparams",
"kind": "RUNTIME"
},
"random_seed": {
"type": "int",
"kind": "RUNTIME",
"default": 0
},
"docker_containers": {
"type": "typing.Union[NoneType, typing.Dict[str, d3m.primitive_interfaces.base.DockerContainer]]",
"kind": "RUNTIME",
"default": null
},
"timeout": {
"type": "typing.Union[NoneType, float]",
"kind": "RUNTIME",
"default": null
},
"iterations": {
"type": "typing.Union[NoneType, int]",
"kind": "RUNTIME",
"default": null
},
"produce_methods": {
"type": "typing.Sequence[str]",
"kind": "RUNTIME"
},
"inputs": {
"type": "d3m.container.pandas.DataFrame",
"kind": "PIPELINE"
},
"outputs": {
"type": "d3m.container.pandas.DataFrame",
"kind": "PIPELINE"
},
"params": {
"type": "sklearn_wrap.SKBaggingClassifier.Params",
"kind": "RUNTIME"
}
},
"class_methods": {
"can_accept": {
"arguments": {
"method_name": {
"type": "str"
},
"arguments": {
"type": "typing.Dict[str, typing.Union[d3m.metadata.base.Metadata, type]]"
},
"hyperparams": {
"type": "sklearn_wrap.SKBaggingClassifier.Hyperparams"
}
},
"returns": "typing.Union[NoneType, d3m.metadata.base.DataMetadata]",
"description": "Returns a metadata object describing the output of a call of ``method_name`` method under\n``hyperparams`` with primitive arguments ``arguments``, if such arguments can be accepted by the method.\nOtherwise it returns ``None`` or raises an exception.\n\nDefault implementation checks structural types of ``arguments`` expected arguments' types\nand ignores ``hyperparams``.\n\nBy (re)implementing this method, a primitive can fine-tune which arguments it accepts\nfor its methods which goes beyond just structural type checking. For example, a primitive might\noperate only on images, so it can accept numpy arrays, but only those with semantic type\ncorresponding to an image. Or it might check dimensions of an array to assure it operates\non square matrix.\n\nPrimitive arguments are a superset of method arguments. This method receives primitive arguments and\nnot just method arguments so that it is possible to implement it without a state between calls\nto ``can_accept`` for multiple methods. For example, a call to ``fit`` could during normal execution\ninfluences what a later ``produce`` call outputs. But during ``can_accept`` call we can directly have\naccess to arguments which would have been given to ``fit`` to produce metadata of the ``produce`` call.\n\nNot all primitive arguments have to be provided, only those used by ``fit``, ``set_training_data``,\nand produce methods, and those used by the ``method_name`` method itself.\n\nParameters\n----------\nmethod_name : str\n Name of the method which would be called.\narguments : Dict[str, Union[Metadata, type]]\n A mapping between argument names and their metadata objects (for pipeline arguments) or types (for other).\nhyperparams : Hyperparams\n Hyper-parameters under which the method would be called during regular primitive execution.\n\nReturns\n-------\nDataMetadata\n Metadata object of the method call result, or ``None`` if arguments are not accepted\n by the method."
}
},
"instance_methods": {
"__init__": {
"kind": "OTHER",
"arguments": [
"hyperparams",
"random_seed",
"docker_containers"
],
"returns": "NoneType"
},
"fit": {
"kind": "OTHER",
"arguments": [
"timeout",
"iterations"
],
"returns": "d3m.primitive_interfaces.base.CallResult[NoneType]",
"description": "Fits primitive using inputs and outputs (if any) using currently set training data.\n\nThe returned value should be a ``CallResult`` object with ``value`` set to ``None``.\n\nIf ``fit`` has already been called in the past on different training data,\nthis method fits it **again from scratch** using currently set training data.\n\nOn the other hand, caller can call ``fit`` multiple times on the same training data\nto continue fitting.\n\nIf ``fit`` fully fits using provided training data, there is no point in making further\ncalls to this method with same training data, and in fact further calls can be noops,\nor a primitive can decide to refit from scratch.\n\nIn the case fitting can continue with same training data (even if it is maybe not reasonable,\nbecause the internal metric primitive is using looks like fitting will be degrading), if ``fit``\nis called again (without setting training data), the primitive has to continue fitting.\n\nCaller can provide ``timeout`` information to guide the length of the fitting process.\nIdeally, a primitive should adapt its fitting process to try to do the best fitting possible\ninside the time allocated. If this is not possible and the primitive reaches the timeout\nbefore fitting, it should raise a ``TimeoutError`` exception to signal that fitting was\nunsuccessful in the given time. The state of the primitive after the exception should be\nas the method call has never happened and primitive should continue to operate normally.\nThe purpose of ``timeout`` is to give opportunity to a primitive to cleanly manage\nits state instead of interrupting execution from outside. Maintaining stable internal state\nshould have precedence over respecting the ``timeout`` (caller can terminate the misbehaving\nprimitive from outside anyway). If a longer ``timeout`` would produce different fitting,\nthen ``CallResult``'s ``has_finished`` should be set to ``False``.\n\nSome primitives have internal fitting iterations (for example, epochs). For those, caller\ncan provide how many of primitive's internal iterations should a primitive do before returning.\nPrimitives should make iterations as small as reasonable. If ``iterations`` is ``None``,\nthen there is no limit on how many iterations the primitive should do and primitive should\nchoose the best amount of iterations on its own (potentially controlled through\nhyper-parameters). If ``iterations`` is a number, a primitive has to do those number of\niterations (even if not reasonable), if possible. ``timeout`` should still be respected\nand potentially less iterations can be done because of that. Primitives with internal\niterations should make ``CallResult`` contain correct values.\n\nFor primitives which do not have internal iterations, any value of ``iterations``\nmeans that they should fit fully, respecting only ``timeout``.\n\nParameters\n----------\ntimeout : float\n A maximum time this primitive should be fitting during this method call, in seconds.\niterations : int\n How many of internal iterations should the primitive do.\n\nReturns\n-------\nCallResult[None]\n A ``CallResult`` with ``None`` value."
},
"fit_multi_produce": {
"kind": "OTHER",
"arguments": [
"produce_methods",
"inputs",