Commit 61c59a5b authored by Mitar's avatar Mitar

Merge branch 'ISI' into 'master'

ISI_v5.8

See merge request !102
parents 4efe9b35 c61462ad
{
"id": "dsbox-featurizer-image-resnet50",
"id": "datamart-augmentation",
"version": "1.5.0",
"name": "DSBox Image Featurizer RestNet50",
"description": "Image Feature Generation using pretrained deep neural network Inception V3\nNote that the input image format for this model is different than for the VGG16 and ResNet models\n(299x299 instead of 224x224)\n\nAttributes\n----------\nmetadata : PrimitiveMetadata\n Primitive's metadata. Available as a class attribute.\nlogger : Logger\n Primitive's logger. Available as a class attribute.\nhyperparams : Hyperparams\n Hyperparams passed to the constructor.\nrandom_seed : int\n Random seed passed to the constructor.\ndocker_containers : Dict[str, DockerContainer]\n A dict mapping Docker image keys from primitive's metadata to (named) tuples containing\n container's address under which the container is accessible by the primitive, and a\n dict mapping exposed ports to ports on that address.\nvolumes : Dict[str, str]\n A dict mapping volume keys from primitive's metadata to file and directory paths\n where downloaded and extracted files are available to the primitive.\ntemporary_directory : str\n An absolute path to a temporary directory a primitive can use to store any files\n for the duration of the current pipeline run phase. Directory is automatically\n cleaned up after the current pipeline run phase finishes.\n\nParameters\n----------\n_layer_index : int, default: 0, domain: range(11)\n Layer of the network to use to generate features. Smaller\n indices are closer to the output layers of the network.\n\n_resize_data : Boolean, default: True, domain: {True, False}\n If True resize images to 299x299.",
"python_path": "d3m.primitives.feature_extraction.resnet50_image_feature.DSBOX",
"primitive_family": "FEATURE_EXTRACTION",
"name": "Datamart Augmentation",
"python_path": "d3m.primitives.data_augmentation.datamart_augmentation.DSBOX",
"description": "A primitive that takes a list of datamart dataset and choose 1 or a few best dataframe and perform join, return an accessible d3m.dataframe for further processing\n\nAttributes\n----------\nmetadata : PrimitiveMetadata\n Primitive's metadata. Available as a class attribute.\nlogger : Logger\n Primitive's logger. Available as a class attribute.\nhyperparams : Hyperparams\n Hyperparams passed to the constructor.\nrandom_seed : int\n Random seed passed to the constructor.\ndocker_containers : Dict[str, DockerContainer]\n A dict mapping Docker image keys from primitive's metadata to (named) tuples containing\n container's address under which the container is accessible by the primitive, and a\n dict mapping exposed ports to ports on that address.\nvolumes : Dict[str, str]\n A dict mapping volume keys from primitive's metadata to file and directory paths\n where downloaded and extracted files are available to the primitive.\ntemporary_directory : str\n An absolute path to a temporary directory a primitive can use to store any files\n for the duration of the current pipeline run phase. Directory is automatically\n cleaned up after the current pipeline run phase finishes.",
"primitive_family": "DATA_AUGMENTATION",
"algorithm_types": [
"FEEDFORWARD_NEURAL_NETWORK"
"APPROXIMATE_DATA_AUGMENTATION"
],
"keywords": [
"image",
"featurization",
"resnet50"
"data augmentation",
"datamart",
"join"
],
"source": {
"name": "ISI",
"contact": "mailto:[email protected]",
"contact": "kyao:[email protected]",
"uris": [
"https://github.com/usc-isi-i2/dsbox-primitives"
]
......@@ -23,68 +23,62 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/usc-isi-i2/[email protected]#egg=dsbox-primitives"
},
{
"type": "FILE",
"key": "resnet50_weights_tf_dim_ordering_tf_kernels.h5",
"file_uri": "https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels.h5",
"file_digest": "bdc6c9f787f9f51dffd50d895f86e469cc0eb8ba95fd61f0801b1a264acb4819"
"package_uri": "git+https://github.com/usc-isi-i2/[email protected]#egg=dsbox-primitives"
}
],
"precondition": [],
"hyperparms_to_tune": [],
"pure_primitive": false,
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/primitive.json",
"original_python_path": "dsbox.datapreprocessing.featurizer.image.net_image_feature.ResNet50ImageFeature",
"original_python_path": "dsbox.datapreprocessing.cleaner.datamart_augment.DatamartAugmentation",
"primitive_code": {
"class_type_arguments": {
"Inputs": "d3m.container.list.List",
"Outputs": "d3m.container.pandas.DataFrame",
"Hyperparams": "dsbox.datapreprocessing.featurizer.image.net_image_feature.ResNet50Hyperparams",
"Inputs": "d3m.container.dataset.Dataset",
"Outputs": "d3m.container.dataset.Dataset",
"Hyperparams": "dsbox.datapreprocessing.cleaner.datamart_augment.DatamartAugmentationHyperparams",
"Params": "NoneType"
},
"interfaces_version": "2019.5.8",
"interfaces": [
"featurization.FeaturizationTransformerPrimitiveBase",
"transformer.TransformerPrimitiveBase",
"base.PrimitiveBase"
],
"hyperparams": {
"layer_index": {
"type": "d3m.metadata.hyperparams.UniformInt",
"default": 0,
"structural_type": "int",
"url": {
"type": "d3m.metadata.hyperparams.Hyperparameter",
"default": "https://isi-datamart.edu",
"structural_type": "str",
"semantic_types": [
"http://schema.org/Integer",
"https://metadata.datadrivendiscovery.org/types/TuningParameter"
],
"description": "Specify the layer of the neural network to use for features. Lower numbered layers correspond to higher-level abstract features. The number of features by layer index are [2048, 100352, 25088, 25088, 100352, 25088, 25088, 100352, 25088, 25088, 200704].",
"lower": 0,
"upper": 11,
"lower_inclusive": true,
"upper_inclusive": false
"description": "url indicates which datamart resource to use"
},
"generate_metadata": {
"type": "d3m.metadata.hyperparams.UniformBool",
"default": false,
"structural_type": "bool",
"search_result": {
"type": "d3m.metadata.hyperparams.Hyperparameter",
"default": [],
"structural_type": "list",
"semantic_types": [
"https://metadata.datadrivendiscovery.org/types/TuningParameter"
],
"description": "The list of serialized search result config"
},
"maximum_augment_column_length": {
"type": "d3m.metadata.hyperparams.UniformInt",
"default": 300,
"structural_type": "int",
"semantic_types": [
"http://schema.org/Boolean",
"https://metadata.datadrivendiscovery.org/types/ControlParameter"
],
"description": "A control parameter to set whether to generate metada after the feature extraction. It will be very slow if the columns length is very large. For the default condition, it will turn off to accelerate the program running."
"description": "The maximum extra column number on augmented dataFrame",
"lower": 0,
"upper": 1000,
"lower_inclusive": true,
"upper_inclusive": false
}
},
"arguments": {
"hyperparams": {
"type": "dsbox.datapreprocessing.featurizer.image.net_image_feature.ResNet50Hyperparams",
"type": "dsbox.datapreprocessing.cleaner.datamart_augment.DatamartAugmentationHyperparams",
"kind": "RUNTIME"
},
"volumes": {
"type": "typing.Union[NoneType, typing.Dict[str, str]]",
"kind": "RUNTIME",
"default": null
},
"timeout": {
"type": "typing.Union[NoneType, float]",
"kind": "RUNTIME",
......@@ -100,7 +94,7 @@
"kind": "RUNTIME"
},
"inputs": {
"type": "d3m.container.list.List",
"type": "d3m.container.dataset.Dataset",
"kind": "PIPELINE"
},
"params": {
......@@ -118,7 +112,7 @@
"type": "typing.Dict[str, typing.Union[d3m.metadata.base.Metadata, type]]"
},
"hyperparams": {
"type": "dsbox.datapreprocessing.featurizer.image.net_image_feature.ResNet50Hyperparams"
"type": "dsbox.datapreprocessing.cleaner.datamart_augment.DatamartAugmentationHyperparams"
}
},
"returns": "typing.Union[NoneType, d3m.metadata.base.DataMetadata]",
......@@ -129,8 +123,7 @@
"__init__": {
"kind": "OTHER",
"arguments": [
"hyperparams",
"volumes"
"hyperparams"
],
"returns": "NoneType"
},
......@@ -178,10 +171,10 @@
"timeout",
"iterations"
],
"returns": "d3m.primitive_interfaces.base.CallResult[d3m.container.pandas.DataFrame]",
"returns": "d3m.primitive_interfaces.base.CallResult[d3m.container.dataset.Dataset]",
"singleton": false,
"inputs_across_samples": [],
"description": "Apply neural network-based feature extraction to image_tensor\n\nParameters\n----------\ninputs : Inputs\n The inputs of shape [num_inputs, ...].\ntimeout : float\n A maximum time this primitive should take to produce outputs during this method call, in seconds.\niterations : int\n How many of internal iterations should the primitive do.\n\nReturns\n-------\nCallResult[Outputs]\n The outputs of shape [num_inputs, ...] wrapped inside ``CallResult``."
"description": "Produce primitive's best choice of the output for each of the inputs.\n\nThe output value should be wrapped inside ``CallResult`` object before returning.\n\nIn many cases producing an output is a quick operation in comparison with ``fit``, but not\nall cases are like that. For example, a primitive can start a potentially long optimization\nprocess to compute outputs. ``timeout`` and ``iterations`` can serve as a way for a caller\nto guide the length of this process.\n\nIdeally, a primitive should adapt its call to try to produce the best outputs possible\ninside the time allocated. If this is not possible and the primitive reaches the timeout\nbefore producing outputs, it should raise a ``TimeoutError`` exception to signal that the\ncall was unsuccessful in the given time. The state of the primitive after the exception\nshould be as the method call has never happened and primitive should continue to operate\nnormally. The purpose of ``timeout`` is to give opportunity to a primitive to cleanly\nmanage its state instead of interrupting execution from outside. Maintaining stable internal\nstate should have precedence over respecting the ``timeout`` (caller can terminate the\nmisbehaving primitive from outside anyway). If a longer ``timeout`` would produce\ndifferent outputs, then ``CallResult``'s ``has_finished`` should be set to ``False``.\n\nSome primitives have internal iterations (for example, optimization iterations).\nFor those, caller can provide how many of primitive's internal iterations\nshould a primitive do before returning outputs. Primitives should make iterations as\nsmall as reasonable. If ``iterations`` is ``None``, then there is no limit on\nhow many iterations the primitive should do and primitive should choose the best amount\nof iterations on its own (potentially controlled through hyper-parameters).\nIf ``iterations`` is a number, a primitive has to do those number of iterations,\nif possible. ``timeout`` should still be respected and potentially less iterations\ncan be done because of that. Primitives with internal iterations should make\n``CallResult`` contain correct values.\n\nFor primitives which do not have internal iterations, any value of ``iterations``\nmeans that they should run fully, respecting only ``timeout``.\n\nIf primitive should have been fitted before calling this method, but it has not been,\nprimitive should raise a ``PrimitiveNotFittedError`` exception.\n\nParameters\n----------\ninputs : Inputs\n The inputs of shape [num_inputs, ...].\ntimeout : float\n A maximum time this primitive should take to produce outputs during this method call, in seconds.\niterations : int\n How many of internal iterations should the primitive do.\n\nReturns\n-------\nCallResult[Outputs]\n The outputs of shape [num_inputs, ...] wrapped inside ``CallResult``."
},
"set_params": {
"kind": "OTHER",
......@@ -210,6 +203,6 @@
"temporary_directory": "typing.Union[NoneType, str]"
}
},
"structural_type": "dsbox.datapreprocessing.featurizer.image.net_image_feature.ResNet50ImageFeature",
"digest": "e703a32663c9e5a6e6cf6d230da22df11f8534edb32082f28625a864de00d986"
"structural_type": "dsbox.datapreprocessing.cleaner.datamart_augment.DatamartAugmentation",
"digest": "a81449a1eef6c0d837a6260cbfae796cd0dc0e8e65bf2666a5b30a90e400f3bc"
}
{
"id": "dsbox-featurizer-image-vgg16",
"id": "datamart-download",
"version": "1.5.0",
"name": "DSBox Image Featurizer VGG16",
"description": "Image Feature Generation using pretrained deep neural network VGG16.\n\nAttributes\n----------\nmetadata : PrimitiveMetadata\n Primitive's metadata. Available as a class attribute.\nlogger : Logger\n Primitive's logger. Available as a class attribute.\nhyperparams : Hyperparams\n Hyperparams passed to the constructor.\nrandom_seed : int\n Random seed passed to the constructor.\ndocker_containers : Dict[str, DockerContainer]\n A dict mapping Docker image keys from primitive's metadata to (named) tuples containing\n container's address under which the container is accessible by the primitive, and a\n dict mapping exposed ports to ports on that address.\nvolumes : Dict[str, str]\n A dict mapping volume keys from primitive's metadata to file and directory paths\n where downloaded and extracted files are available to the primitive.\ntemporary_directory : str\n An absolute path to a temporary directory a primitive can use to store any files\n for the duration of the current pipeline run phase. Directory is automatically\n cleaned up after the current pipeline run phase finishes.\n\nParameters\n----------\nlayer_index : int, default: 0, domain: range(5)\n Layer of the network to use to generate features. Smaller\n indices are closer to the output layers of the network.\n\nresize_data : Boolean, default: True, domain: {True, False}\n If True resize images to 224 by 224.",
"python_path": "d3m.primitives.feature_extraction.vgg16_image_feature.DSBOX",
"primitive_family": "FEATURE_EXTRACTION",
"name": "Datamart Download",
"python_path": "d3m.primitives.data_augmentation.datamart_download.DSBOX",
"description": "A primitive that takes a list of datamart dataset and choose 1 or a few best dataframe and perform join, return an accessible d3m.dataframe for further processing\n\nAttributes\n----------\nmetadata : PrimitiveMetadata\n Primitive's metadata. Available as a class attribute.\nlogger : Logger\n Primitive's logger. Available as a class attribute.\nhyperparams : Hyperparams\n Hyperparams passed to the constructor.\nrandom_seed : int\n Random seed passed to the constructor.\ndocker_containers : Dict[str, DockerContainer]\n A dict mapping Docker image keys from primitive's metadata to (named) tuples containing\n container's address under which the container is accessible by the primitive, and a\n dict mapping exposed ports to ports on that address.\nvolumes : Dict[str, str]\n A dict mapping volume keys from primitive's metadata to file and directory paths\n where downloaded and extracted files are available to the primitive.\ntemporary_directory : str\n An absolute path to a temporary directory a primitive can use to store any files\n for the duration of the current pipeline run phase. Directory is automatically\n cleaned up after the current pipeline run phase finishes.",
"primitive_family": "DATA_AUGMENTATION",
"algorithm_types": [
"FEEDFORWARD_NEURAL_NETWORK"
"APPROXIMATE_DATA_AUGMENTATION"
],
"keywords": [
"image",
"featurization",
"vgg16"
"datamart",
"download"
],
"source": {
"name": "ISI",
"contact": "mailto:[email protected]",
"contact": "kyao:[email protected]",
"uris": [
"https://github.com/usc-isi-i2/dsbox-primitives"
]
......@@ -23,68 +22,62 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/usc-isi-i2/[email protected]#egg=dsbox-primitives"
},
{
"type": "FILE",
"key": "vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5",
"file_uri": "https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5",
"file_digest": "bfe5187d0a272bed55ba430631598124cff8e880b98d38c9e56c8d66032abdc1"
"package_uri": "git+https://github.com/usc-isi-i2/[email protected]#egg=dsbox-primitives"
}
],
"precondition": [],
"hyperparms_to_tune": [],
"pure_primitive": false,
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/primitive.json",
"original_python_path": "dsbox.datapreprocessing.featurizer.image.net_image_feature.Vgg16ImageFeature",
"original_python_path": "dsbox.datapreprocessing.cleaner.datamart_download.DatamartDownload",
"primitive_code": {
"class_type_arguments": {
"Inputs": "d3m.container.list.List",
"Outputs": "d3m.container.pandas.DataFrame",
"Hyperparams": "dsbox.datapreprocessing.featurizer.image.net_image_feature.Vgg16Hyperparams",
"Inputs": "d3m.container.dataset.Dataset",
"Outputs": "d3m.container.dataset.Dataset",
"Hyperparams": "dsbox.datapreprocessing.cleaner.datamart_download.DatamartDownloadHyperparams",
"Params": "NoneType"
},
"interfaces_version": "2019.5.8",
"interfaces": [
"featurization.FeaturizationTransformerPrimitiveBase",
"transformer.TransformerPrimitiveBase",
"base.PrimitiveBase"
],
"hyperparams": {
"layer_index": {
"type": "d3m.metadata.hyperparams.UniformInt",
"default": 0,
"structural_type": "int",
"url": {
"type": "d3m.metadata.hyperparams.Hyperparameter",
"default": "https://isi-datamart.edu",
"structural_type": "str",
"semantic_types": [
"http://schema.org/Integer",
"https://metadata.datadrivendiscovery.org/types/TuningParameter"
],
"description": "Specify the layer of the neural network to use for features. Lower numbered layers correspond to higher-level abstract features. The number of features by layer index are [25088, 100352, 200704, 401408]",
"lower": 0,
"upper": 4,
"lower_inclusive": true,
"upper_inclusive": false
"description": "url indicates which datamart resource to use"
},
"generate_metadata": {
"type": "d3m.metadata.hyperparams.UniformBool",
"default": false,
"structural_type": "bool",
"search_result": {
"type": "d3m.metadata.hyperparams.Hyperparameter",
"default": {},
"structural_type": "dict",
"semantic_types": [
"https://metadata.datadrivendiscovery.org/types/TuningParameter"
],
"description": "The list of serialized search result config"
},
"return_format": {
"type": "d3m.metadata.hyperparams.Enumeration",
"default": "ds",
"structural_type": "str",
"semantic_types": [
"http://schema.org/Boolean",
"https://metadata.datadrivendiscovery.org/types/ControlParameter"
],
"description": "A control parameter to set whether to generate metada after the feature extraction. It will be very slow if the columns length is very large. For the default condition, it will turn off to accelerate the program running."
"description": "the return format, ds for dataset, df for dataframe",
"values": [
"ds",
"df"
]
}
},
"arguments": {
"hyperparams": {
"type": "dsbox.datapreprocessing.featurizer.image.net_image_feature.Vgg16Hyperparams",
"type": "dsbox.datapreprocessing.cleaner.datamart_download.DatamartDownloadHyperparams",
"kind": "RUNTIME"
},
"volumes": {
"type": "typing.Union[NoneType, typing.Dict[str, str]]",
"kind": "RUNTIME",
"default": null
},
"timeout": {
"type": "typing.Union[NoneType, float]",
"kind": "RUNTIME",
......@@ -100,7 +93,7 @@
"kind": "RUNTIME"
},
"inputs": {
"type": "d3m.container.list.List",
"type": "d3m.container.dataset.Dataset",
"kind": "PIPELINE"
},
"params": {
......@@ -118,7 +111,7 @@
"type": "typing.Dict[str, typing.Union[d3m.metadata.base.Metadata, type]]"
},
"hyperparams": {
"type": "dsbox.datapreprocessing.featurizer.image.net_image_feature.Vgg16Hyperparams"
"type": "dsbox.datapreprocessing.cleaner.datamart_download.DatamartDownloadHyperparams"
}
},
"returns": "typing.Union[NoneType, d3m.metadata.base.DataMetadata]",
......@@ -129,8 +122,7 @@
"__init__": {
"kind": "OTHER",
"arguments": [
"hyperparams",
"volumes"
"hyperparams"
],
"returns": "NoneType"
},
......@@ -178,10 +170,10 @@
"timeout",
"iterations"
],
"returns": "d3m.primitive_interfaces.base.CallResult[d3m.container.pandas.DataFrame]",
"returns": "d3m.primitive_interfaces.base.CallResult[d3m.container.dataset.Dataset]",
"singleton": false,
"inputs_across_samples": [],
"description": "Apply neural network-based feature extraction to image_tensor\n\nParameters\n----------\ninputs : Inputs\n The inputs of shape [num_inputs, ...].\ntimeout : float\n A maximum time this primitive should take to produce outputs during this method call, in seconds.\niterations : int\n How many of internal iterations should the primitive do.\n\nReturns\n-------\nCallResult[Outputs]\n The outputs of shape [num_inputs, ...] wrapped inside ``CallResult``."
"description": "Produce primitive's best choice of the output for each of the inputs.\n\nThe output value should be wrapped inside ``CallResult`` object before returning.\n\nIn many cases producing an output is a quick operation in comparison with ``fit``, but not\nall cases are like that. For example, a primitive can start a potentially long optimization\nprocess to compute outputs. ``timeout`` and ``iterations`` can serve as a way for a caller\nto guide the length of this process.\n\nIdeally, a primitive should adapt its call to try to produce the best outputs possible\ninside the time allocated. If this is not possible and the primitive reaches the timeout\nbefore producing outputs, it should raise a ``TimeoutError`` exception to signal that the\ncall was unsuccessful in the given time. The state of the primitive after the exception\nshould be as the method call has never happened and primitive should continue to operate\nnormally. The purpose of ``timeout`` is to give opportunity to a primitive to cleanly\nmanage its state instead of interrupting execution from outside. Maintaining stable internal\nstate should have precedence over respecting the ``timeout`` (caller can terminate the\nmisbehaving primitive from outside anyway). If a longer ``timeout`` would produce\ndifferent outputs, then ``CallResult``'s ``has_finished`` should be set to ``False``.\n\nSome primitives have internal iterations (for example, optimization iterations).\nFor those, caller can provide how many of primitive's internal iterations\nshould a primitive do before returning outputs. Primitives should make iterations as\nsmall as reasonable. If ``iterations`` is ``None``, then there is no limit on\nhow many iterations the primitive should do and primitive should choose the best amount\nof iterations on its own (potentially controlled through hyper-parameters).\nIf ``iterations`` is a number, a primitive has to do those number of iterations,\nif possible. ``timeout`` should still be respected and potentially less iterations\ncan be done because of that. Primitives with internal iterations should make\n``CallResult`` contain correct values.\n\nFor primitives which do not have internal iterations, any value of ``iterations``\nmeans that they should run fully, respecting only ``timeout``.\n\nIf primitive should have been fitted before calling this method, but it has not been,\nprimitive should raise a ``PrimitiveNotFittedError`` exception.\n\nParameters\n----------\ninputs : Inputs\n The inputs of shape [num_inputs, ...].\ntimeout : float\n A maximum time this primitive should take to produce outputs during this method call, in seconds.\niterations : int\n How many of internal iterations should the primitive do.\n\nReturns\n-------\nCallResult[Outputs]\n The outputs of shape [num_inputs, ...] wrapped inside ``CallResult``."
},
"set_params": {
"kind": "OTHER",
......@@ -210,6 +202,6 @@
"temporary_directory": "typing.Union[NoneType, str]"
}
},
"structural_type": "dsbox.datapreprocessing.featurizer.image.net_image_feature.Vgg16ImageFeature",
"digest": "0b85f0221f89790990fc165ec419a82de42fd5e5babb350cac26d7da9e9ac2d2"
"structural_type": "dsbox.datapreprocessing.cleaner.datamart_download.DatamartDownload",
"digest": "b95b642d5a94ff09a1b2b830f935e38e6dd1b4b1db940cfaf2e05968382a85b0"
}
{
"id": "d2bd4f3b-fd43-47c2-8ab7-f9fce691e2b2",
"id": "wikidata-wikifier",
"version": "1.5.0",
"name": "DSBox Group Up by Timeseries Primitive",
"description": "Impute the missing value by greedy search of the combinations of standalone simple imputation method.\n\nParameters:\n----------\nverbose: bool\n Control the verbosity\n\nAttributes:\n----------\nimputation_strategies: list of string,\n each is a standalone simple imputation method\n\nbest_imputation: dict. key: column name; value: trained imputation method (parameters)\n which is one of the imputation_strategies\n\nmodel: a sklearn machine learning class\n The machine learning model that will be used to evaluate the imputation strategies\n\nAttributes\n----------\nmetadata : PrimitiveMetadata\n Primitive's metadata. Available as a class attribute.\nlogger : Logger\n Primitive's logger. Available as a class attribute.\nhyperparams : Hyperparams\n Hyperparams passed to the constructor.\nrandom_seed : int\n Random seed passed to the constructor.\ndocker_containers : Dict[str, DockerContainer]\n A dict mapping Docker image keys from primitive's metadata to (named) tuples containing\n container's address under which the container is accessible by the primitive, and a\n dict mapping exposed ports to ports on that address.\nvolumes : Dict[str, str]\n A dict mapping volume keys from primitive's metadata to file and directory paths\n where downloaded and extracted files are available to the primitive.\ntemporary_directory : str\n An absolute path to a temporary directory a primitive can use to store any files\n for the duration of the current pipeline run phase. Directory is automatically\n cleaned up after the current pipeline run phase finishes.",
"python_path": "d3m.primitives.data_transformation.group_up_by_timeseries.DSBOX",
"primitive_family": "DATA_TRANSFORMATION",
"name": "wikidata wikifier",
"python_path": "d3m.primitives.data_augmentation.wikifier.DSBOX",
"description": "A primitive that takes a list of datamart dataset and choose 1 or a few best dataframe and perform join, return an accessible d3m.dataframe for further processing\n\nAttributes\n----------\nmetadata : PrimitiveMetadata\n Primitive's metadata. Available as a class attribute.\nlogger : Logger\n Primitive's logger. Available as a class attribute.\nhyperparams : Hyperparams\n Hyperparams passed to the constructor.\nrandom_seed : int\n Random seed passed to the constructor.\ndocker_containers : Dict[str, DockerContainer]\n A dict mapping Docker image keys from primitive's metadata to (named) tuples containing\n container's address under which the container is accessible by the primitive, and a\n dict mapping exposed ports to ports on that address.\nvolumes : Dict[str, str]\n A dict mapping volume keys from primitive's metadata to file and directory paths\n where downloaded and extracted files are available to the primitive.\ntemporary_directory : str\n An absolute path to a temporary directory a primitive can use to store any files\n for the duration of the current pipeline run phase. Directory is automatically\n cleaned up after the current pipeline run phase finishes.",
"primitive_family": "DATA_AUGMENTATION",
"algorithm_types": [
"NUMERICAL_METHOD"
"APPROXIMATE_DATA_AUGMENTATION"
],
"keywords": [
"data augmentation",
"wikidata",
"wikifier"
],
"source": {
"name": "ISI",
"contact": "mailto:[email protected]",
"contact": "kyao:[email protected]",
"uris": [
"https://github.com/usc-isi-i2/dsbox-primitives"
]
},
"keywords": [
"Transform",
"Timeseries",
"Aggregate"
],
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/usc-isi-i2/[email protected]c3e3514698d60a182764294e10d3af89e423144e#egg=dsbox-primitives"
"package_uri": "git+https://github.com/usc-isi-i2/[email protected]49d0a28f864711a5e53f28fbb8c4fcc84d545796#egg=dsbox-primitives"
}
],
"precondition": [
"NO_CATEGORICAL_VALUES"
],
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/primitive.json",
"original_python_path": "dsbox.datapreprocessing.featurizer.timeseries.group_up_ts.GroupUpByTimeSeries",
"original_python_path": "dsbox.datapreprocessing.cleaner.wikifier.Wikifier",
"primitive_code": {
"class_type_arguments": {
"Inputs": "d3m.container.pandas.DataFrame",
"Outputs": "d3m.container.list.List",
"Hyperparams": "dsbox.datapreprocessing.featurizer.timeseries.group_up_ts.GroupUpHyperparameter",
"Inputs": "d3m.container.dataset.Dataset",
"Outputs": "d3m.container.dataset.Dataset",
"Hyperparams": "dsbox.datapreprocessing.cleaner.wikifier.WikifierHyperparams",
"Params": "NoneType"
},
"interfaces_version": "2019.5.8",
......@@ -44,15 +41,6 @@
"base.PrimitiveBase"
],
"hyperparams": {
"verbose": {
"type": "d3m.metadata.hyperparams.UniformBool",
"default": false,
"structural_type": "bool",
"semantic_types": [
"http://schema.org/Boolean",
"https://metadata.datadrivendiscovery.org/types/ControlParameter"
]
},
"use_columns": {
"type": "d3m.metadata.hyperparams.Set",
"default": [],
......@@ -60,7 +48,7 @@
"semantic_types": [
"https://metadata.datadrivendiscovery.org/types/ControlParameter"
],
"description": "A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.",
"description": "A set of column indices to force primitive to operate on. If any specified column does not match any semantic type, it is skipped.",
"elements": {
"type": "d3m.metadata.hyperparams.Hyperparameter",
"default": -1,
......@@ -70,33 +58,36 @@
"is_configuration": false,
"min_size": 0
},
"return_result": {
"type": "d3m.metadata.hyperparams.Enumeration",
"default": "replace",
"structural_type": "str",
"exclude_columns": {
"type": "d3m.metadata.hyperparams.Set",
"default": [],
"structural_type": "typing.Sequence[int]",
"semantic_types": [
"https://metadata.datadrivendiscovery.org/types/ControlParameter"
],
"description": "Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.",
"values": [
"append",
"replace",
"new"
]
"description": "A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.",
"elements": {
"type": "d3m.metadata.hyperparams.Hyperparameter",
"default": -1,
"structural_type": "int",
"semantic_types": []
},
"is_configuration": false,
"min_size": 0
},
"add_index_columns": {
"type": "d3m.metadata.hyperparams.UniformBool",
"default": true,
"structural_type": "bool",
"specific_q_nodes": {
"type": "d3m.metadata.hyperparams.Hyperparameter",
"default": [],
"structural_type": "list",
"semantic_types": [
"https://metadata.datadrivendiscovery.org/types/ControlParameter"
],
"description": "Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\"."
"description": "specified Q nodes used for searching, if not given, will try to find one"
}
},
"arguments": {
"hyperparams": {
"type": "dsbox.datapreprocessing.featurizer.timeseries.group_up_ts.GroupUpHyperparameter",
"type": "dsbox.datapreprocessing.cleaner.wikifier.WikifierHyperparams",
"kind": "RUNTIME"
},
"timeout": {
......@@ -114,7 +105,7 @@
"kind": "RUNTIME"
},
"inputs": {
"type": "d3m.container.pandas.DataFrame",
"type": "d3m.container.dataset.Dataset",
"kind": "PIPELINE"
},
"params": {
......@@ -132,7 +123,7 @@
"type": "typing.Dict[str, typing.Union[d3m.metadata.base.Metadata, type]]"
},
"hyperparams": {
"type": "dsbox.datapreprocessing.featurizer.timeseries.group_up_ts.GroupUpHyperparameter"
"type": "dsbox.datapreprocessing.cleaner.wikifier.WikifierHyperparams"
}
},
"returns": "typing.Union[NoneType, d3m.metadata.base.DataMetadata]",
......@@ -191,10 +182,10 @@
"timeout",
"iterations"
],
"returns": "d3m.primitive_interfaces.base.CallResult[d3m.container.list.List]",
"returns": "d3m.primitive_interfaces.base.CallResult[d3m.container.dataset.Dataset]",
"singleton": false,
"inputs_across_samples": [],
"description": "precond: run fit() before\n\nParameters:\n----------\ndata: pandas dataframe\n\nParameters\n----------\ninputs : Inputs\n The inputs of shape [num_inputs, ...].\ntimeout : float\n A maximum time this primitive should take to produce outputs during this method call, in seconds.\niterations : int\n How many of internal iterations should the primitive do.\n\nReturns\n-------\nCallResult[Outputs]\n The outputs of shape [num_inputs, ...] wrapped inside ``CallResult``."
"description": "Produce primitive's best choice of the output for each of the inputs.\n\nThe output value should be wrapped inside ``CallResult`` object before returning.\n\nIn many cases producing an output is a quick operation in comparison with ``fit``, but not\nall cases are like that. For example, a primitive can start a potentially long optimization\nprocess to compute outputs. ``timeout`` and ``iterations`` can serve as a way for a caller\nto guide the length of this process.\n\nIdeally, a primitive should adapt its call to try to produce the best outputs possible\ninside the time allocated. If this is not possible and the primitive reaches the timeout\nbefore producing outputs, it should raise a ``TimeoutError`` exception to signal that the\ncall was unsuccessful in the given time. The state of the primitive after the exception\nshould be as the method call has never happened and primitive should continue to operate\nnormally. The purpose of ``timeout`` is to give opportunity to a primitive to cleanly\nmanage its state instead of interrupting execution from outside. Maintaining stable internal\nstate should have precedence over respecting the ``timeout`` (caller can terminate the\nmisbehaving primitive from outside anyway). If a longer ``timeout`` would produce\ndifferent outputs, then ``CallResult``'s ``has_finished`` should be set to ``False``.\n\nSome primitives have internal iterations (for example, optimization iterations).\nFor those, caller can provide how many of primitive's internal iterations\nshould a primitive do before returning outputs. Primitives should make iterations as\nsmall as reasonable. If ``iterations`` is ``None``, then there is no limit on\nhow many iterations the primitive should do and primitive should choose the best amount\nof iterations on its own (potentially controlled through hyper-parameters).\nIf ``iterations`` is a number, a primitive has to do those number of iterations,\nif possible. ``timeout`` should still be respected and potentially less iterations\ncan be done because of that. Primitives with internal iterations should make\n``CallResult`` contain correct values.\n\nFor primitives which do not have internal iterations, any value of ``iterations``\nmeans that they should run fully, respecting only ``timeout``.\n\nIf primitive should have been fitted before calling this method, but it has not been,\nprimitive should raise a ``PrimitiveNotFittedError`` exception.\n\nParameters\n----------\ninputs : Inputs\n The inputs of shape [num_inputs, ...].\ntimeout : float\n A maximum time this primitive should take to produce outputs during this method call, in seconds.\niterations : int\n How many of internal iterations should the primitive do.\n\nReturns\n-------\nCallResult[Outputs]\n The outputs of shape [num_inputs, ...] wrapped inside ``CallResult``."
},
"set_params": {
"kind": "OTHER",
......@@ -223,6 +214,6 @@
"temporary_directory": "typing.Union[NoneType, str]"
}
},
"structural_type": "dsbox.datapreprocessing.featurizer.timeseries.group_up_ts.GroupUpByTimeSeries",
"digest": "c34132ea56b88c4bc8d626aed2867b19dc757373332c8255fcc9b0dfe7ec73da"
"structural_type": "dsbox.datapreprocessing.cleaner.wikifier.Wikifier",
"digest": "0fe81ce95d9eafb794404c552127c88a7a582d86bef2268b1ebb94956441c311"
}
......@@ -25,7 +25,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/usc-isi-i2/[email protected]c3e3514698d60a182764294e10d3af89e423144e#egg=dsbox-primitives"
"package_uri": "git+https://github.com/usc-isi-i2/[email protected]49d0a28f864711a5e53f28fbb8c4fcc84d545796#egg=dsbox-primitives"
}
],
"location_uris": [],
......@@ -298,5 +298,5 @@
}
},
"structural_type": "dsbox.datapreprocessing.cleaner.cleaning_featurizer.CleaningFeaturizer",
"digest": "f88527f1c5b945e55f70823cbb3534a7380724e469da3322a29ccaaaeef693ab"
"digest": "c45eac29f78ce9fd1bf7cf227a13de6c2a07d18af50b7db2ac19d257d9685d83"
}
......@@ -21,7 +21,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/usc-isi-i2/[email protected]c3e3514698d60a182764294e10d3af89e423144e#egg=dsbox-primitives"
"package_uri": "git+https://github.com/usc-isi-i2/[email protected]49d0a28f864711a5e53f28fbb8c4fcc84d545796#egg=dsbox-primitives"
}
],
"location_uris": [],
......@@ -176,5 +176,5 @@
}
},
"structural_type": "dsbox.datapreprocessing.cleaner.column_fold.FoldColumns",
"digest": "bf6a1b593bb0bad0ad17aa2e44cc4c81c327c39cd861e09be84972ff5b69fb34"
"digest": "6e949aa99896bfe87490ac3005535e609ee66b44792e8b41359487c1b7fcc69d"
}
......@@ -22,7 +22,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/usc-isi-i2/[email protected]c3e3514698d60a182764294e10d3af89e423144e#egg=dsbox-primitives"
"package_uri": "git+https://github.com/usc-isi-i2/[email protected]49d0a28f864711a5e53f28fbb8c4fcc84d545796#egg=dsbox-primitives"
}
],
"precondition": [
......@@ -248,5 +248,5 @@
}
},
"structural_type": "dsbox.datapreprocessing.cleaner.labler.Labler",
"digest": "1baa3fdbc2b22e4264c31b24a407bf2eab4793e4a948df4dea64bfc5c00d6d7a"
"digest": "7089105a6916a7656994c3c2fe9fcf5b1cfb268eac0007ea2bbaa414c1bf1a78"
}
......@@ -22,7 +22,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/usc-isi-i2/[email protected]c3e3514698d60a182764294e10d3af89e423144e#egg=dsbox-primitives"
"package_uri": "git+https://github.com/usc-isi-i2/[email protected]49d0a28f864711a5e53f28fbb8c4fcc84d545796#egg=dsbox-primitives"
}
],
"precondition": [],
......@@ -247,5 +247,5 @@
}
},
"structural_type": "dsbox.datapreprocessing.featurizer.image.dataframe_to_tensor.DataFrameToTensor",
"digest": "24401cfe75fda8d95837410ed34bdac3ce41e2ecbe30c194f6892f88a4e1ebe6"
"digest": "3657b61f59f31bd96b8f09a285aec90e7c31d78f8b742c29f738f7b6c1af8da0"
}
......@@ -21,7 +21,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/usc-isi-i2/[email protected]c3e3514698d60a182764294e10d3af89e423144e#egg=dsbox-primitives"
"package_uri": "git+https://github.com/usc-isi-i2/[email protected]49d0a28f864711a5e53f28fbb8c4fcc84d545796#egg=dsbox-primitives"
}
],
"precondition": [],
......@@ -171,5 +171,5 @@
}
},
"structural_type": "dsbox.datapreprocessing.featurizer.pass.do_nothing.DoNothing",
"digest": "2c979cd218c129878c2c7106cb4dce54c9d779613f5ac0dbac511ef1cfea8dc5"
"digest": "36ae2c3f21d46065ab3c59d5c212ea4f04a4c5477478faf5202bbfe65f9efc73"
}
......@@ -21,7 +21,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/usc-isi-i2/[email protected]c3e3514698d60a182764294e10d3af89e423144e#egg=dsbox-primitives"
"package_uri": "git+https://github.com/usc-isi-i2/[email protected]49d0a28f864711a5e53f28fbb8c4fcc84d545796#egg=dsbox-primitives"
}
],