Commit b495c82a authored by Ali Soltani Tehrani's avatar Ali Soltani Tehrani Committed by Sujen

Migrate/michigan

parent b21e6615
{
"id": "21a7b9d6-771c-4cd4-a80c-11e4e8352d61",
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json",
"created": "2019-06-11T16:17:12.582014Z",
"inputs": [
{
"name": "inputs"
}
],
"outputs": [
{
"data": "steps.6.produce",
"name": "output"
}
],
"steps": [
{
"type": "PRIMITIVE",
"primitive": {
"id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65",
"version": "0.3.0",
"python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common",
"name": "Extract a DataFrame from a Dataset",
"digest": "0d46a2c5bc374e305682dc4f1c322518c07638153a8365034a513ea46960802b"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "inputs.0"
}
},
"outputs": [
{
"id": "produce"
}
]
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "d510cb7a-1782-4f51-b44c-58f0236e47c7",
"version": "0.5.0",
"python_path": "d3m.primitives.data_transformation.column_parser.DataFrameCommon",
"name": "Parses strings into their types",
"digest": "312cacc014497dd674e34765f6eb54430e594c591e760da0383c87844753d2ce"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.0.produce"
}
},
"outputs": [
{
"id": "produce"
}
]
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1",
"version": "0.2.0",
"python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.DataFrameCommon",
"name": "Extracts columns by semantic type",
"digest": "297a4943484bcd532650d5727e23b3d11ca702688d7b64dfb5df8bf1282eaa47"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.1.produce"
}
},
"outputs": [
{
"id": "produce"
}
],
"hyperparams": {
"semantic_types": {
"type": "VALUE",
"data": [
"https://metadata.datadrivendiscovery.org/types/Attribute"
]
}
}
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "34f71b2e-17bb-488d-a2ba-b60b8c305539",
"version": "0.1.0",
"python_path": "d3m.primitives.data_transformation.dataframe_to_ndarray.Common",
"name": "DataFrame to ndarray converter",
"digest": "71862085ea87f4f68abc9f49b4fa661b1ee70a4f3a97f1c1bb4343e2d27dbd37"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.2.produce"
}
},
"outputs": [
{
"id": "produce"
}
]
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "6d94cfb0-4225-4446-b5b1-afd8803f2bf5",
"version": "0.0.5",
"python_path": "d3m.primitives.clustering.ekss.Umich",
"name": "EKSS",
"digest": "6ad3d67b76243a2152ca89b657be8e4691fced466ee92b8e4a66a463766c0b82"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.3.produce"
}
},
"outputs": [
{
"id": "produce"
}
],
"hyperparams": {
"n_clusters": {
"type": "VALUE",
"data": 100
}
}
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "f5241b2e-64f7-44ad-9675-df3d08066437",
"version": "0.1.0",
"python_path": "d3m.primitives.data_transformation.ndarray_to_dataframe.Common",
"name": "ndarray to Dataframe converter",
"digest": "801aa292c7b8ceb0a7dba20618aa7949b289551d8cd314e5de14190084200fc9"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.4.produce"
}
},
"outputs": [
{
"id": "produce"
}
]
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "8d38b340-f83f-4877-baaa-162f8e551736",
"version": "0.3.0",
"python_path": "d3m.primitives.data_transformation.construct_predictions.DataFrameCommon",
"name": "Construct pipeline predictions output",
"digest": "cfb2d595652c4ae0d24e67d4cb8e4916c9f3c2753eaccc2935263d054b3682fa"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.5.produce"
},
"reference": {
"type": "CONTAINER",
"data": "steps.0.produce"
}
},
"outputs": [
{
"id": "produce"
}
]
}
],
"digest": "93d3014abc2dcbd429fc42e9fa527867886c48231c05b28426e5522f09b60232"
}
\ No newline at end of file
{
"problem": "1491_one_hundred_plants_margin_clust_problem",
"full_inputs": [
"1491_one_hundred_plants_margin_clust_dataset"
],
"train_inputs": [
"1491_one_hundred_plants_margin_clust_dataset_TRAIN"
],
"test_inputs": [
"1491_one_hundred_plants_margin_clust_dataset_TEST"
],
"score_inputs": [
"1491_one_hundred_plants_margin_clust_dataset_SCORE"
]
}
\ No newline at end of file
{
"id": "6d94cfb0-4225-4446-b5b1-afd8803f2bf5",
"version": "0.0.5",
"name": "EKSS",
"description": "Abstract base class for generic types.\n\nA generic type is typically declared by inheriting from\nthis class parameterized with one or more type variables.\nFor example, a generic mapping type might be defined as::\n\n class Mapping(Generic[KT, VT]):\n def __getitem__(self, key: KT) -> VT:\n ...\n # Etc.\n\nThis class can then be used as follows::\n\n def lookup_name(mapping: Mapping[KT, VT], key: KT, default: VT) -> VT:\n try:\n return mapping[key]\n except KeyError:\n return default\n\nAttributes\n----------\nmetadata : PrimitiveMetadata\n Primitive's metadata. Available as a class attribute.\nlogger : Logger\n Primitive's logger. Available as a class attribute.\nhyperparams : Hyperparams\n Hyperparams passed to the constructor.\nrandom_seed : int\n Random seed passed to the constructor.\ndocker_containers : Dict[str, DockerContainer]\n A dict mapping Docker image keys from primitive's metadata to (named) tuples containing\n container's address under which the container is accessible by the primitive, and a\n dict mapping exposed ports to ports on that address.\nvolumes : Dict[str, str]\n A dict mapping volume keys from primitive's metadata to file and directory paths\n where downloaded and extracted files are available to the primitive.\ntemporary_directory : str\n An absolute path to a temporary directory a primitive can use to store any files\n for the duration of the current pipeline run phase. Directory is automatically\n cleaned up after the current pipeline run phase finishes.",
"keywords": [
"clustering",
"k-subspaces",
"subspace",
"ensemble"
],
"source": {
"name": "Michigan",
"contact": "mailto:[email protected]",
"uris": [
"https://github.com/dvdmjohnson/d3m_michigan_primitives/blob/master/spider/cluster/ekss/ekss.py",
"https://github.com/dvdmjohnson/d3m_michigan_primitives"
],
"citation": "@article{DBLP:journals/corr/abs-1709-04744, author = {John Lipor and David Hong and Dejiao Zhang and Laura Balzano}, title = {Subspace Clustering using Ensembles of {\textdollar}K{\textdollar}-Subspaces}, journal = {CoRR}, volume = {abs/1709.04744}, year = {2017}, url = {http://arxiv.org/abs/1709.04744}, archivePrefix = {arXiv}, eprint = {1709.04744}, timestamp = {Thu, 05 Oct 2017 09:43:01 +0200}, biburl = {https://dblp.org/rec/bib/journals/corr/abs-1709-04744}, bibsource = {dblp computer science bibliography, https://dblp.org}\n}"
},
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/dvdmjohnson/[email protected]f0a6#egg=spider"
},
{
"type": "UBUNTU",
"package": "ffmpeg",
"version": "7:2.8.11-0ubuntu0.16.04.1"
}
],
"python_path": "d3m.primitives.clustering.ekss.Umich",
"hyperparams_to_tune": [
"n_clusters",
"dim_subspaces"
],
"algorithm_types": [
"SUBSPACE_CLUSTERING",
"ENSEMBLE_LEARNING"
],
"primitive_family": "CLUSTERING",
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/primitive.json",
"original_python_path": "spider.cluster.ekss.ekss.EKSS",
"primitive_code": {
"class_type_arguments": {
"Inputs": "d3m.container.numpy.ndarray",
"Outputs": "d3m.container.numpy.ndarray",
"Params": "NoneType",
"Hyperparams": "spider.cluster.ekss.ekss.EKSSHyperparams",
"DistanceMatrixOutput": "d3m.container.numpy.ndarray"
},
"interfaces_version": "2019.6.7",
"interfaces": [
"clustering.ClusteringDistanceMatrixMixin",
"clustering.ClusteringTransformerPrimitiveBase",
"transformer.TransformerPrimitiveBase",
"base.PrimitiveBase"
],
"hyperparams": {
"n_clusters": {
"type": "d3m.metadata.hyperparams.Bounded",
"default": 2,
"structural_type": "int",
"semantic_types": [
"https://metadata.datadrivendiscovery.org/types/ControlParameter"
],
"description": "number of clusters to learn",
"lower": 2,
"upper": null,
"lower_inclusive": true,
"upper_inclusive": false
},
"dim_subspaces": {
"type": "d3m.metadata.hyperparams.Bounded",
"default": 2,
"structural_type": "int",
"semantic_types": [
"https://metadata.datadrivendiscovery.org/types/TuningParameter"
],
"description": "dimensionality of learned subspaces",
"lower": 1,
"upper": 50,
"lower_inclusive": true,
"upper_inclusive": true
},
"n_base": {
"type": "d3m.metadata.hyperparams.Bounded",
"default": 100,
"structural_type": "int",
"semantic_types": [
"https://metadata.datadrivendiscovery.org/types/TuningParameter",
"https://metadata.datadrivendiscovery.org/types/ResourcesUseParameter"
],
"description": "number of 'base' KSS clusterings to use in the ensemble - larger values generally yield better results but longer computation time",
"lower": 10,
"upper": 1000,
"lower_inclusive": true,
"upper_inclusive": true
},
"thresh": {
"type": "d3m.metadata.hyperparams.Union",
"default": 5,
"structural_type": "int",
"semantic_types": [
"https://metadata.datadrivendiscovery.org/types/TuningParameter"
],
"description": "if >=1, only the top <thresh> values from each column/row of the affinity matrix are used in spectral clustering",
"configuration": {
"enum": {
"type": "d3m.metadata.hyperparams.Enumeration",
"default": -1,
"structural_type": "int",
"semantic_types": [],
"values": [
-1
]
},
"bounded": {
"type": "d3m.metadata.hyperparams.Bounded",
"default": 5,
"structural_type": "int",
"semantic_types": [],
"lower": 1,
"upper": 10000,
"lower_inclusive": true,
"upper_inclusive": true
}
}
}
},
"arguments": {
"hyperparams": {
"type": "spider.cluster.ekss.ekss.EKSSHyperparams",
"kind": "RUNTIME"
},
"random_seed": {
"type": "int",
"kind": "RUNTIME",
"default": 0
},
"docker_containers": {
"type": "typing.Union[NoneType, typing.Dict[str, d3m.primitive_interfaces.base.DockerContainer]]",
"kind": "RUNTIME",
"default": null
},
"timeout": {
"type": "typing.Union[NoneType, float]",
"kind": "RUNTIME",
"default": null
},
"iterations": {
"type": "typing.Union[NoneType, int]",
"kind": "RUNTIME",
"default": null
},
"produce_methods": {
"type": "typing.Sequence[str]",
"kind": "RUNTIME"
},
"inputs": {
"type": "d3m.container.numpy.ndarray",
"kind": "PIPELINE"
},
"params": {
"type": "NoneType",
"kind": "RUNTIME"
}
},
"class_methods": {
"can_accept": {
"arguments": {
"method_name": {
"type": "str"
},
"arguments": {
"type": "typing.Dict[str, typing.Union[d3m.metadata.base.Metadata, type]]"
},
"hyperparams": {
"type": "spider.cluster.ekss.ekss.EKSSHyperparams"
}
},
"returns": "typing.Union[NoneType, d3m.metadata.base.DataMetadata]",
"description": "Returns a metadata object describing the output of a call of ``method_name`` method under\n``hyperparams`` with primitive arguments ``arguments``, if such arguments can be accepted by the method.\nOtherwise it returns ``None`` or raises an exception.\n\nDefault implementation checks structural types of ``arguments`` expected arguments' types\nand ignores ``hyperparams``.\n\nBy (re)implementing this method, a primitive can fine-tune which arguments it accepts\nfor its methods which goes beyond just structural type checking. For example, a primitive might\noperate only on images, so it can accept numpy arrays, but only those with semantic type\ncorresponding to an image. Or it might check dimensions of an array to assure it operates\non square matrix.\n\nPrimitive arguments are a superset of method arguments. This method receives primitive arguments and\nnot just method arguments so that it is possible to implement it without a state between calls\nto ``can_accept`` for multiple methods. For example, a call to ``fit`` could during normal execution\ninfluences what a later ``produce`` call outputs. But during ``can_accept`` call we can directly have\naccess to arguments which would have been given to ``fit`` to produce metadata of the ``produce`` call.\n\nNot all primitive arguments have to be provided, only those used by ``fit``, ``set_training_data``,\nand produce methods, and those used by the ``method_name`` method itself.\n\nParameters\n----------\nmethod_name : str\n Name of the method which would be called.\narguments : Dict[str, Union[Metadata, type]]\n A mapping between argument names and their metadata objects (for pipeline arguments) or types (for other).\nhyperparams : Hyperparams\n Hyper-parameters under which the method would be called during regular primitive execution.\n\nReturns\n-------\nDataMetadata\n Metadata object of the method call result, or ``None`` if arguments are not accepted\n by the method."
}
},
"instance_methods": {
"__init__": {
"kind": "OTHER",
"arguments": [
"hyperparams",
"random_seed",
"docker_containers"
],
"returns": "NoneType"
},
"fit": {
"kind": "OTHER",
"arguments": [
"timeout",
"iterations"
],
"returns": "d3m.primitive_interfaces.base.CallResult[NoneType]",
"description": "A noop.\n\nParameters\n----------\ntimeout : float\n A maximum time this primitive should be fitting during this method call, in seconds.\niterations : int\n How many of internal iterations should the primitive do.\n\nReturns\n-------\nCallResult[None]\n A ``CallResult`` with ``None`` value."
},
"fit_multi_produce": {
"kind": "OTHER",
"arguments": [
"produce_methods",
"inputs",
"timeout",
"iterations"
],
"returns": "d3m.primitive_interfaces.base.MultiCallResult",
"description": "A method calling ``fit`` and after that multiple produce methods at once.\n\nParameters\n----------\nproduce_methods : Sequence[str]\n A list of names of produce methods to call.\ninputs : Inputs\n The inputs given to all produce methods.\ntimeout : float\n A maximum time this primitive should take to both fit the primitive and produce outputs\n for all produce methods listed in ``produce_methods`` argument, in seconds.\niterations : int\n How many of internal iterations should the primitive do for both fitting and producing\n outputs of all produce methods.\n\nReturns\n-------\nMultiCallResult\n A dict of values for each produce method wrapped inside ``MultiCallResult``."
},
"get_params": {
"kind": "OTHER",
"arguments": [],
"returns": "NoneType",
"description": "A noop.\n\nReturns\n-------\nParams\n An instance of parameters."
},
"multi_produce": {
"kind": "OTHER",
"arguments": [
"produce_methods",
"inputs",
"timeout",
"iterations"
],
"returns": "d3m.primitive_interfaces.base.MultiCallResult",
"description": "A method calling multiple produce methods at once.\n\nWhen a primitive has multiple produce methods it is common that they might compute the\nsame internal results for same inputs but return different representations of those results.\nIf caller is interested in multiple of those representations, calling multiple produce\nmethods might lead to recomputing same internal results multiple times. To address this,\nthis method allows primitive author to implement an optimized version which computes\ninternal results only once for multiple calls of produce methods, but return those different\nrepresentations.\n\nIf any additional method arguments are added to primitive's produce method(s), they have\nto be added to this method as well. This method should accept an union of all arguments\naccepted by primitive's produce method(s) and then use them accordingly when computing\nresults.\n\nThe default implementation of this method just calls all produce methods listed in\n``produce_methods`` in order and is potentially inefficient.\n\nIf primitive should have been fitted before calling this method, but it has not been,\nprimitive should raise a ``PrimitiveNotFittedError`` exception.\n\nParameters\n----------\nproduce_methods : Sequence[str]\n A list of names of produce methods to call.\ninputs : Inputs\n The inputs given to all produce methods.\ntimeout : float\n A maximum time this primitive should take to produce outputs for all produce methods\n listed in ``produce_methods`` argument, in seconds.\niterations : int\n How many of internal iterations should the primitive do.\n\nReturns\n-------\nMultiCallResult\n A dict of values for each produce method wrapped inside ``MultiCallResult``."
},
"produce": {
"kind": "PRODUCE",
"arguments": [
"inputs",
"iterations"
],
"returns": "d3m.primitive_interfaces.base.CallResult[d3m.container.numpy.ndarray]",
"singleton": false,
"inputs_across_samples": [],
"description": "``produce`` method should return a membership map.\n\nA data structure that for each input sample tells to which cluster that sample was assigned to. So ``Outputs``\nshould have the same number of samples than ``Inputs``, and the value at each output sample should represent\na cluster. Consider representing it with just a simple numeric identifier.\n\nIf an implementation of this method computes clusters based on the whole set of input samples,\nuse ``inputs_across_samples`` decorator to mark ``inputs`` as being computed across samples.\n\nParameters\n----------\ninputs : Inputs\n The inputs of shape [num_inputs, ...].\ntimeout : float\n A maximum time this primitive should take to produce outputs during this method call, in seconds.\niterations : int\n How many of internal iterations should the primitive do.\n\nReturns\n-------\nCallResult[Outputs]\n The outputs of shape [num_inputs, 1] wrapped inside ``CallResult`` for a simple numeric\n cluster identifier."
},
"produce_distance_matrix": {
"kind": "PRODUCE",
"arguments": [
"inputs",
"iterations"
],
"returns": "d3m.primitive_interfaces.base.CallResult[d3m.container.numpy.ndarray]",
"singleton": false,
"inputs_across_samples": [],
"description": "Returns the affinity matrix generated from the ensemble of KSS clustering results.\n\nParameters\n----------\ninputs : Inputs\n The inputs of shape [num_inputs, ...].\ntimeout : float\n A maximum time this primitive should take to produce outputs during this method call, in seconds.\niterations : int\n How many of internal iterations should the primitive do.\n\nReturns\n-------\nCallResult[DistanceMatrixOutput]\n The distance matrix of shape [num_inputs, num_inputs, ...] wrapped inside ``CallResult``, where (i, j) element\n of the matrix represent a distance between i-th and j-th sample in the inputs."
},
"set_params": {
"kind": "OTHER",
"arguments": [
"params"
],
"returns": "NoneType",
"description": "A noop.\n\nParameters\n----------\nparams : Params\n An instance of parameters."
},
"set_training_data": {
"kind": "OTHER",
"arguments": [
"inputs"
],
"returns": "NoneType",
"description": "A noop.\n\nParameters\n----------"
}
},
"class_attributes": {
"logger": "logging.Logger",
"metadata": "d3m.metadata.base.PrimitiveMetadata"
},
"instance_attributes": {
"hyperparams": "d3m.metadata.hyperparams.Hyperparams",
"random_seed": "int",
"docker_containers": "typing.Dict[str, d3m.primitive_interfaces.base.DockerContainer]",
"volumes": "typing.Dict[str, str]",
"temporary_directory": "typing.Union[NoneType, str]"
}
},
"structural_type": "spider.cluster.ekss.ekss.EKSS",
"digest": "6ad3d67b76243a2152ca89b657be8e4691fced466ee92b8e4a66a463766c0b82"
}
{
"id": "cf6ab6b1-c80a-4fc3-ba29-938471c790b7",
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json",
"created": "2019-06-11T16:17:10.114227Z",
"inputs": [
{
"name": "inputs"
}
],
"outputs": [
{
"data": "steps.6.produce",
"name": "output"
}
],
"steps": [
{
"type": "PRIMITIVE",
"primitive": {
"id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65",
"version": "0.3.0",
"python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common",
"name": "Extract a DataFrame from a Dataset",
"digest": "0d46a2c5bc374e305682dc4f1c322518c07638153a8365034a513ea46960802b"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "inputs.0"
}
},
"outputs": [
{
"id": "produce"
}
]
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "d510cb7a-1782-4f51-b44c-58f0236e47c7",
"version": "0.5.0",
"python_path": "d3m.primitives.data_transformation.column_parser.DataFrameCommon",
"name": "Parses strings into their types",
"digest": "312cacc014497dd674e34765f6eb54430e594c591e760da0383c87844753d2ce"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.0.produce"
}
},
"outputs": [
{
"id": "produce"
}
]
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1",
"version": "0.2.0",
"python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.DataFrameCommon",
"name": "Extracts columns by semantic type",
"digest": "297a4943484bcd532650d5727e23b3d11ca702688d7b64dfb5df8bf1282eaa47"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.1.produce"
}
},
"outputs": [
{
"id": "produce"
}
],
"hyperparams": {
"semantic_types": {
"type": "VALUE",
"data": [
"https://metadata.datadrivendiscovery.org/types/Attribute"
]
}
}
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "34f71b2e-17bb-488d-a2ba-b60b8c305539",
"version": "0.1.0",
"python_path": "d3m.primitives.data_transformation.dataframe_to_ndarray.Common",
"name": "DataFrame to ndarray converter",
"digest": "71862085ea87f4f68abc9f49b4fa661b1ee70a4f3a97f1c1bb4343e2d27dbd37"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.2.produce"
}
},
"outputs": [
{
"id": "produce"
}
]
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "044e5c71-7507-4f58-a139-bc5481179d62",
"version": "0.0.5",
"python_path": "d3m.primitives.clustering.kss.Umich",
"name": "KSS",
"digest": "df5fe3439f92ea9dad01666dcf78c0c6057bebf5e181256e3a686d333e4eb8ac"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.3.produce"
}
},
"outputs": [
{
"id": "produce"
}
],
"hyperparams": {
"n_clusters": {
"type": "VALUE",
"data": 100
}
}
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "f5241b2e-64f7-44ad-9675-df3d08066437",
"version": "0.1.0",
"python_path": "d3m.primitives.data_transformation.ndarray_to_dataframe.Common",
"name": "ndarray to Dataframe converter",
"digest": "801aa292c7b8ceb0a7dba20618aa7949b289551d8cd314e5de14190084200fc9"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.4.produce"
}