Commit c94ea7df authored by Muxin Liang's avatar Muxin Liang ⛹🏿 Committed by Mitar

ISI primitive

parent 360979c0
......@@ -3,7 +3,7 @@
"version": "1.4.4",
"name": "DSBox Cleaning Featurizer",
"description": "A base class for primitives which have to be fitted before they can start\nproducing (useful) outputs from inputs, but they are fitted only on input data.\n\nAttributes\n----------\nmetadata : PrimitiveMetadata\n Primitive's metadata. Available as a class attribute.\nlogger : Logger\n Primitive's logger. Available as a class attribute.\nhyperparams : Hyperparams\n Hyperparams passed to the constructor.\nrandom_seed : int\n Random seed passed to the constructor.\ndocker_containers : Dict[str, DockerContainer]\n A dict mapping Docker image keys from primitive's metadata to (named) tuples containing\n container's address under which the container is accessible by the primitive, and a\n dict mapping exposed ports to ports on that address.\nvolumes : Dict[str, str]\n A dict mapping volume keys from primitive's metadata to file and directory paths\n where downloaded and extracted files are available to the primitive.\ntemporary_directory : str\n An absolute path to a temporary directory a primitive can use to store any files\n for the duration of the current pipeline run phase. Directory is automatically\n cleaned up after the current pipeline run phase finishes.",
"python_path": "d3m.primitives.data_cleaning.CleaningFeaturizer.DSBOX",
"python_path": "d3m.primitives.data_cleaning.cleaning_featurizer.DSBOX",
"primitive_family": "DATA_CLEANING",
"algorithm_types": [
"DATA_CONVERSION"
......@@ -25,7 +25,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/usc-isi-i2/dsbox-cleaning@61280ffddf5a40f42f1472f35f6e6df0c88c1129#egg=dsbox-datacleaning"
"package_uri": "git+https://github.com/usc-isi-i2/dsbox-cleaning@de38b3767de94d10ae8ff696f82af87dce14959f#egg=dsbox-datacleaning"
}
],
"location_uris": [],
......@@ -295,5 +295,5 @@
}
},
"structural_type": "dsbox.datapreprocessing.cleaner.cleaning_featurizer.CleaningFeaturizer",
"digest": "26e6ec13f7b97b7955f9a1220ba5a11699216234fb883846ba4102e53606fcd2"
"digest": "16db320ccb2f1ae77fde7be3cb91a22c416e4e2e24e1563d0eb60c7c45091a09"
}
......@@ -3,7 +3,7 @@
"version": "1.4.4",
"name": "DSBox Fold Columns",
"description": "A base class for primitives which have to be fitted before they can start\nproducing (useful) outputs from inputs, but they are fitted only on input data.\n\nAttributes\n----------\nmetadata : PrimitiveMetadata\n Primitive's metadata. Available as a class attribute.\nlogger : Logger\n Primitive's logger. Available as a class attribute.\nhyperparams : Hyperparams\n Hyperparams passed to the constructor.\nrandom_seed : int\n Random seed passed to the constructor.\ndocker_containers : Dict[str, DockerContainer]\n A dict mapping Docker image keys from primitive's metadata to (named) tuples containing\n container's address under which the container is accessible by the primitive, and a\n dict mapping exposed ports to ports on that address.\nvolumes : Dict[str, str]\n A dict mapping volume keys from primitive's metadata to file and directory paths\n where downloaded and extracted files are available to the primitive.\ntemporary_directory : str\n An absolute path to a temporary directory a primitive can use to store any files\n for the duration of the current pipeline run phase. Directory is automatically\n cleaned up after the current pipeline run phase finishes.",
"python_path": "d3m.primitives.data_cleaning.FoldColumns.DSBOX",
"python_path": "d3m.primitives.data_cleaning.column_fold.DSBOX",
"primitive_family": "DATA_CLEANING",
"algorithm_types": [
"DATA_CONVERSION"
......@@ -21,7 +21,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/usc-isi-i2/dsbox-cleaning@61280ffddf5a40f42f1472f35f6e6df0c88c1129#egg=dsbox-datacleaning"
"package_uri": "git+https://github.com/usc-isi-i2/dsbox-cleaning@de38b3767de94d10ae8ff696f82af87dce14959f#egg=dsbox-datacleaning"
}
],
"location_uris": [],
......@@ -176,5 +176,5 @@
}
},
"structural_type": "dsbox.datapreprocessing.cleaner.column_fold.FoldColumns",
"digest": "b0dd73d18812c61afc0d87a88b242cecee2c74492dc79cf9f7b777d2607341be"
"digest": "497d2f682cfbe12f4e00a9f027be641f19e2395f4668507ae194233069918e33"
}
......@@ -3,7 +3,7 @@
"version": "1.4.4",
"name": "DSBox feature labeler",
"description": "A primitive which encode all categorical values into integers. This primitive can\nhandle values not seen during training.\n\nAttributes\n----------\nmetadata : PrimitiveMetadata\n Primitive's metadata. Available as a class attribute.\nlogger : Logger\n Primitive's logger. Available as a class attribute.\nhyperparams : Hyperparams\n Hyperparams passed to the constructor.\nrandom_seed : int\n Random seed passed to the constructor.\ndocker_containers : Dict[str, DockerContainer]\n A dict mapping Docker image keys from primitive's metadata to (named) tuples containing\n container's address under which the container is accessible by the primitive, and a\n dict mapping exposed ports to ports on that address.\nvolumes : Dict[str, str]\n A dict mapping volume keys from primitive's metadata to file and directory paths\n where downloaded and extracted files are available to the primitive.\ntemporary_directory : str\n An absolute path to a temporary directory a primitive can use to store any files\n for the duration of the current pipeline run phase. Directory is automatically\n cleaned up after the current pipeline run phase finishes.",
"python_path": "d3m.primitives.data_cleaning.Labeler.DSBOX",
"python_path": "d3m.primitives.data_cleaning.labeler.DSBOX",
"primitive_family": "DATA_CLEANING",
"algorithm_types": [
"DATA_NORMALIZATION"
......@@ -22,7 +22,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/usc-isi-i2/dsbox-cleaning@61280ffddf5a40f42f1472f35f6e6df0c88c1129#egg=dsbox-datacleaning"
"package_uri": "git+https://github.com/usc-isi-i2/dsbox-cleaning@de38b3767de94d10ae8ff696f82af87dce14959f#egg=dsbox-datacleaning"
}
],
"precondition": [
......@@ -248,5 +248,5 @@
}
},
"structural_type": "dsbox.datapreprocessing.cleaner.labler.Labler",
"digest": "a60d69b0ba3b4fdf8c71982c9ebcc8286d7f732d1a60297f4c1323f358cfcf4c"
"digest": "6b630571ee95379e0991e869d683bb01b890e546e0a991c8b011d45bf857e4fb"
}
......@@ -3,7 +3,7 @@
"version": "1.4.4",
"name": "ISI DSBox Data Encoder",
"description": "An one-hot encoder, which\n1. n_limit: max number of distinct values to one-hot encode,\n remaining values with fewer occurence are put in [colname]_other_ column.\n\n2. feed in data by set_training_data, then apply fit() function to tune the encoder.\n\n3. produce(): input data would be encoded and return.\n\nAttributes\n----------\nmetadata : PrimitiveMetadata\n Primitive's metadata. Available as a class attribute.\nlogger : Logger\n Primitive's logger. Available as a class attribute.\nhyperparams : Hyperparams\n Hyperparams passed to the constructor.\nrandom_seed : int\n Random seed passed to the constructor.\ndocker_containers : Dict[str, DockerContainer]\n A dict mapping Docker image keys from primitive's metadata to (named) tuples containing\n container's address under which the container is accessible by the primitive, and a\n dict mapping exposed ports to ports on that address.\nvolumes : Dict[str, str]\n A dict mapping volume keys from primitive's metadata to file and directory paths\n where downloaded and extracted files are available to the primitive.\ntemporary_directory : str\n An absolute path to a temporary directory a primitive can use to store any files\n for the duration of the current pipeline run phase. Directory is automatically\n cleaned up after the current pipeline run phase finishes.",
"python_path": "d3m.primitives.data_preprocessing.Encoder.DSBOX",
"python_path": "d3m.primitives.data_preprocessing.encoder.DSBOX",
"primitive_family": "DATA_PREPROCESSING",
"algorithm_types": [
"ENCODE_ONE_HOT"
......@@ -22,7 +22,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/usc-isi-i2/dsbox-cleaning@61280ffddf5a40f42f1472f35f6e6df0c88c1129#egg=dsbox-datacleaning"
"package_uri": "git+https://github.com/usc-isi-i2/dsbox-cleaning@de38b3767de94d10ae8ff696f82af87dce14959f#egg=dsbox-datacleaning"
}
],
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/primitive.json",
......@@ -257,5 +257,5 @@
}
},
"structural_type": "dsbox.datapreprocessing.cleaner.encoder.Encoder",
"digest": "b9563bc4c9391c954b7a88efc0b18ef31e0d7d49f20031929675af2068e001d5"
"digest": "c130d57dc07dcff128f04b6ec0b7c48679b1ebb08de19728a2cbdbb00ce53a53"
}
......@@ -3,7 +3,7 @@
"version": "1.4.4",
"name": "DSBox ensemble voting",
"description": "A primitive which generate single prediction result for one index if there is many\n\nAttributes\n----------\nmetadata : PrimitiveMetadata\n Primitive's metadata. Available as a class attribute.\nlogger : Logger\n Primitive's logger. Available as a class attribute.\nhyperparams : Hyperparams\n Hyperparams passed to the constructor.\nrandom_seed : int\n Random seed passed to the constructor.\ndocker_containers : Dict[str, DockerContainer]\n A dict mapping Docker image keys from primitive's metadata to (named) tuples containing\n container's address under which the container is accessible by the primitive, and a\n dict mapping exposed ports to ports on that address.\nvolumes : Dict[str, str]\n A dict mapping volume keys from primitive's metadata to file and directory paths\n where downloaded and extracted files are available to the primitive.\ntemporary_directory : str\n An absolute path to a temporary directory a primitive can use to store any files\n for the duration of the current pipeline run phase. Directory is automatically\n cleaned up after the current pipeline run phase finishes.",
"python_path": "d3m.primitives.data_preprocessing.EnsembleVoting.DSBOX",
"python_path": "d3m.primitives.data_preprocessing.ensemble_voting.DSBOX",
"primitive_family": "DATA_PREPROCESSING",
"algorithm_types": [
"ENSEMBLE_LEARNING"
......@@ -22,7 +22,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/usc-isi-i2/dsbox-cleaning@61280ffddf5a40f42f1472f35f6e6df0c88c1129#egg=dsbox-datacleaning"
"package_uri": "git+https://github.com/usc-isi-i2/dsbox-cleaning@de38b3767de94d10ae8ff696f82af87dce14959f#egg=dsbox-datacleaning"
}
],
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/primitive.json",
......@@ -188,5 +188,5 @@
}
},
"structural_type": "dsbox.datapostprocessing.ensemble_voting.EnsembleVoting",
"digest": "a374b8d1569836938f9f12ae0d19b4a6a77338a8d5b5846f542f916139bcee12"
"digest": "1d0020877f7a8422751eb33d63cb06b2d8463b7414823093f79f0bbf49ce73fc"
}
......@@ -3,7 +3,7 @@
"version": "1.4.4",
"name": "DSBox Greedy Imputer",
"description": "Impute the missing value by greedy search of the combinations of standalone simple imputation method.\n\nParameters:\n----------\nverbose: bool\n Control the verbosity\n\nAttributes:\n----------\nimputation_strategies: list of string,\n each is a standalone simple imputation method\n\nbest_imputation: dict. key: column name; value: trained imputation method (parameters)\n which is one of the imputation_strategies\n\nmodel: a sklearn machine learning class\n The machine learning model that will be used to evaluate the imputation strategies\n\nAttributes\n----------\nmetadata : PrimitiveMetadata\n Primitive's metadata. Available as a class attribute.\nlogger : Logger\n Primitive's logger. Available as a class attribute.\nhyperparams : Hyperparams\n Hyperparams passed to the constructor.\nrandom_seed : int\n Random seed passed to the constructor.\ndocker_containers : Dict[str, DockerContainer]\n A dict mapping Docker image keys from primitive's metadata to (named) tuples containing\n container's address under which the container is accessible by the primitive, and a\n dict mapping exposed ports to ports on that address.\nvolumes : Dict[str, str]\n A dict mapping volume keys from primitive's metadata to file and directory paths\n where downloaded and extracted files are available to the primitive.\ntemporary_directory : str\n An absolute path to a temporary directory a primitive can use to store any files\n for the duration of the current pipeline run phase. Directory is automatically\n cleaned up after the current pipeline run phase finishes.",
"python_path": "d3m.primitives.data_preprocessing.GreedyImputation.DSBOX",
"python_path": "d3m.primitives.data_preprocessing.greedy_imputation.DSBOX",
"primitive_family": "DATA_PREPROCESSING",
"algorithm_types": [
"IMPUTATION"
......@@ -23,7 +23,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/usc-isi-i2/dsbox-cleaning@61280ffddf5a40f42f1472f35f6e6df0c88c1129#egg=dsbox-datacleaning"
"package_uri": "git+https://github.com/usc-isi-i2/dsbox-cleaning@de38b3767de94d10ae8ff696f82af87dce14959f#egg=dsbox-datacleaning"
}
],
"location_uris": [],
......@@ -263,5 +263,5 @@
}
},
"structural_type": "dsbox.datapreprocessing.cleaner.greedy.GreedyImputation",
"digest": "014d48a9a0b0644aecf0d032979820a866cf05b43bd6616ef816800d5647f02a"
"digest": "a69facc6a868eae4056872877449916a3cd21bf1425735129489f8c800bffb8a"
}
......@@ -3,7 +3,7 @@
"version": "1.4.4",
"name": "DSBox horizontal concat",
"description": "A primitive which concat a list of dataframe to a single dataframe horizontally,\nand it will also set metatdata for prediction,\nwe assume that inputs has same length\n\nAttributes\n----------\nmetadata : PrimitiveMetadata\n Primitive's metadata. Available as a class attribute.\nlogger : Logger\n Primitive's logger. Available as a class attribute.\nhyperparams : Hyperparams\n Hyperparams passed to the constructor.\nrandom_seed : int\n Random seed passed to the constructor.\ndocker_containers : Dict[str, DockerContainer]\n A dict mapping Docker image keys from primitive's metadata to (named) tuples containing\n container's address under which the container is accessible by the primitive, and a\n dict mapping exposed ports to ports on that address.\nvolumes : Dict[str, str]\n A dict mapping volume keys from primitive's metadata to file and directory paths\n where downloaded and extracted files are available to the primitive.\ntemporary_directory : str\n An absolute path to a temporary directory a primitive can use to store any files\n for the duration of the current pipeline run phase. Directory is automatically\n cleaned up after the current pipeline run phase finishes.",
"python_path": "d3m.primitives.data_preprocessing.HorizontalConcat.DSBOX",
"python_path": "d3m.primitives.data_preprocessing.horizontal_concat.DSBOX",
"primitive_family": "DATA_PREPROCESSING",
"algorithm_types": [
"DATA_CONVERSION"
......@@ -22,7 +22,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/usc-isi-i2/dsbox-cleaning@61280ffddf5a40f42f1472f35f6e6df0c88c1129#egg=dsbox-datacleaning"
"package_uri": "git+https://github.com/usc-isi-i2/dsbox-cleaning@de38b3767de94d10ae8ff696f82af87dce14959f#egg=dsbox-datacleaning"
}
],
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/primitive.json",
......@@ -220,5 +220,5 @@
}
},
"structural_type": "dsbox.datapostprocessing.horizontal_concat.HorizontalConcat",
"digest": "2209b6bee82592874af05286e5b066099cb5062bf8e8ad244c88b541fa43e481"
"digest": "14fb9c428d2ebf9593c6a28319f461d296f0856413eecaf2c0cc942f1ba8c85f"
}
......@@ -3,7 +3,7 @@
"version": "1.4.4",
"name": "DSBox Iterative Regression Imputer",
"description": "Impute the missing value by iteratively regress using other attributes.\n It will fit and fill the missing value in the training set, and store the learned models.\n In the `produce` phase, it will use the learned models to iteratively regress on the\n testing data again, and return the imputed testing data.\n\nParameters:\n----------\nverbose: bool\n Control the verbosity\n\nAttributes:\n----------\nbest_imputation: dict. key: column name; value: trained imputation method (parameters)\n could be sklearn regression model, or \"mean\" (which means the regression failed)\n\nAttributes\n----------\nmetadata : PrimitiveMetadata\n Primitive's metadata. Available as a class attribute.\nlogger : Logger\n Primitive's logger. Available as a class attribute.\nhyperparams : Hyperparams\n Hyperparams passed to the constructor.\nrandom_seed : int\n Random seed passed to the constructor.\ndocker_containers : Dict[str, DockerContainer]\n A dict mapping Docker image keys from primitive's metadata to (named) tuples containing\n container's address under which the container is accessible by the primitive, and a\n dict mapping exposed ports to ports on that address.\nvolumes : Dict[str, str]\n A dict mapping volume keys from primitive's metadata to file and directory paths\n where downloaded and extracted files are available to the primitive.\ntemporary_directory : str\n An absolute path to a temporary directory a primitive can use to store any files\n for the duration of the current pipeline run phase. Directory is automatically\n cleaned up after the current pipeline run phase finishes.",
"python_path": "d3m.primitives.data_preprocessing.IterativeRegressionImputation.DSBOX",
"python_path": "d3m.primitives.data_preprocessing.iterative_regression_imputation.DSBOX",
"primitive_family": "DATA_PREPROCESSING",
"algorithm_types": [
"IMPUTATION"
......@@ -22,7 +22,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/usc-isi-i2/dsbox-cleaning@61280ffddf5a40f42f1472f35f6e6df0c88c1129#egg=dsbox-datacleaning"
"package_uri": "git+https://github.com/usc-isi-i2/dsbox-cleaning@de38b3767de94d10ae8ff696f82af87dce14959f#egg=dsbox-datacleaning"
}
],
"location_uris": [],
......@@ -260,5 +260,5 @@
}
},
"structural_type": "dsbox.datapreprocessing.cleaner.iterative_regression.IterativeRegressionImputation",
"digest": "4bf68666ab36d9faeb594985b7f7514b08618217bd570d55d5504329fa5e31cd"
"digest": "fcb7e54c51f0dda089f661438ea4cf10c3daa3a4d17b87f312a3e01e02acb8e4"
}
......@@ -3,7 +3,7 @@
"version": "1.4.4",
"name": "DSBox Mean Imputer",
"description": "Impute missing values using the `mean` value of the attribute.\n\nAttributes\n----------\nmetadata : PrimitiveMetadata\n Primitive's metadata. Available as a class attribute.\nlogger : Logger\n Primitive's logger. Available as a class attribute.\nhyperparams : Hyperparams\n Hyperparams passed to the constructor.\nrandom_seed : int\n Random seed passed to the constructor.\ndocker_containers : Dict[str, DockerContainer]\n A dict mapping Docker image keys from primitive's metadata to (named) tuples containing\n container's address under which the container is accessible by the primitive, and a\n dict mapping exposed ports to ports on that address.\nvolumes : Dict[str, str]\n A dict mapping volume keys from primitive's metadata to file and directory paths\n where downloaded and extracted files are available to the primitive.\ntemporary_directory : str\n An absolute path to a temporary directory a primitive can use to store any files\n for the duration of the current pipeline run phase. Directory is automatically\n cleaned up after the current pipeline run phase finishes.",
"python_path": "d3m.primitives.data_preprocessing.MeanImputation.DSBOX",
"python_path": "d3m.primitives.data_preprocessing.mean_imputation.DSBOX",
"primitive_family": "DATA_PREPROCESSING",
"algorithm_types": [
"IMPUTATION"
......@@ -23,7 +23,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/usc-isi-i2/dsbox-cleaning@61280ffddf5a40f42f1472f35f6e6df0c88c1129#egg=dsbox-datacleaning"
"package_uri": "git+https://github.com/usc-isi-i2/dsbox-cleaning@de38b3767de94d10ae8ff696f82af87dce14959f#egg=dsbox-datacleaning"
}
],
"location_uris": [],
......@@ -259,5 +259,5 @@
}
},
"structural_type": "dsbox.datapreprocessing.cleaner.mean.MeanImputation",
"digest": "40774821c19ef2d7864c689552a1070ccf778fb6d9912b29c310a8e000a1c33e"
"digest": "1639076031a91509d977cf82cbcbfe53a726d32e36230e964200004c646050ab"
}
......@@ -3,7 +3,7 @@
"version": "1.4.4",
"name": "DSBox Splitter",
"description": "A primitive that could be used before processing the dataset.\nIf the size of the dataset(or dataframe) is smaller than threshold, it will do nothing but pass through the original dataset\nIf the size if larger than the threshold, it will reduce the amount of column or row or both by splitting the dataset/dataframe.\n\nAttributes\n----------\nmetadata : PrimitiveMetadata\n Primitive's metadata. Available as a class attribute.\nlogger : Logger\n Primitive's logger. Available as a class attribute.\nhyperparams : Hyperparams\n Hyperparams passed to the constructor.\nrandom_seed : int\n Random seed passed to the constructor.\ndocker_containers : Dict[str, DockerContainer]\n A dict mapping Docker image keys from primitive's metadata to (named) tuples containing\n container's address under which the container is accessible by the primitive, and a\n dict mapping exposed ports to ports on that address.\nvolumes : Dict[str, str]\n A dict mapping volume keys from primitive's metadata to file and directory paths\n where downloaded and extracted files are available to the primitive.\ntemporary_directory : str\n An absolute path to a temporary directory a primitive can use to store any files\n for the duration of the current pipeline run phase. Directory is automatically\n cleaned up after the current pipeline run phase finishes.",
"python_path": "d3m.primitives.data_preprocessing.Splitter.DSBOX",
"python_path": "d3m.primitives.data_preprocessing.splitter.DSBOX",
"primitive_family": "DATA_PREPROCESSING",
"algorithm_types": [
"DATA_SPLITTING"
......@@ -22,7 +22,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/usc-isi-i2/dsbox-cleaning@61280ffddf5a40f42f1472f35f6e6df0c88c1129#egg=dsbox-datacleaning"
"package_uri": "git+https://github.com/usc-isi-i2/dsbox-cleaning@de38b3767de94d10ae8ff696f82af87dce14959f#egg=dsbox-datacleaning"
}
],
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/primitive.json",
......@@ -235,5 +235,5 @@
}
},
"structural_type": "dsbox.datapreprocessing.cleaner.splitter.Splitter",
"digest": "f699ebd9f24800101f3c8144f81e6f4e8b13cd5d441d51b5b7acc9be6c92beb7"
"digest": "1fc9512888bb2e2fe42cf9100702618d4353de90bc8a8a27bf16c383f26e6a8a"
}
......@@ -3,7 +3,7 @@
"version": "1.4.4",
"name": "DSBox Unary Data Encoder",
"description": "A primitive which converts the numerical attributes to multi-column attributes.\nEach new column value would be 1 if the original value is larger than this column's name value\nOtherwise the new column value would be 0\nThis encoder only operate when the amount of the numerical data is less than 12, otherwise the column would keep unchanged.\n\nAttributes\n----------\nmetadata : PrimitiveMetadata\n Primitive's metadata. Available as a class attribute.\nlogger : Logger\n Primitive's logger. Available as a class attribute.\nhyperparams : Hyperparams\n Hyperparams passed to the constructor.\nrandom_seed : int\n Random seed passed to the constructor.\ndocker_containers : Dict[str, DockerContainer]\n A dict mapping Docker image keys from primitive's metadata to (named) tuples containing\n container's address under which the container is accessible by the primitive, and a\n dict mapping exposed ports to ports on that address.\nvolumes : Dict[str, str]\n A dict mapping volume keys from primitive's metadata to file and directory paths\n where downloaded and extracted files are available to the primitive.\ntemporary_directory : str\n An absolute path to a temporary directory a primitive can use to store any files\n for the duration of the current pipeline run phase. Directory is automatically\n cleaned up after the current pipeline run phase finishes.",
"python_path": "d3m.primitives.data_preprocessing.UnaryEncoder.DSBOX",
"python_path": "d3m.primitives.data_preprocessing.unary_encoder.DSBOX",
"primitive_family": "DATA_PREPROCESSING",
"algorithm_types": [
"ENCODE_ONE_HOT"
......@@ -22,7 +22,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/usc-isi-i2/dsbox-cleaning@61280ffddf5a40f42f1472f35f6e6df0c88c1129#egg=dsbox-datacleaning"
"package_uri": "git+https://github.com/usc-isi-i2/dsbox-cleaning@de38b3767de94d10ae8ff696f82af87dce14959f#egg=dsbox-datacleaning"
}
],
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/primitive.json",
......@@ -258,5 +258,5 @@
}
},
"structural_type": "dsbox.datapreprocessing.cleaner.unary_encoder.UnaryEncoder",
"digest": "667ae162880c0f459662d49281e03ff442409a8a1ff3f7f6c7fdcdb0f38457f0"
"digest": "5aa38e87ab2bb0d6f9790613ca7d7a6c3ffb26c76721a1adf78fd21309dbf936"
}
......@@ -3,7 +3,7 @@
"version": "1.4.4",
"name": "DSBox unfold",
"description": "A primitive which concat a list of dataframe to a single dataframe vertically\n\nAttributes\n----------\nmetadata : PrimitiveMetadata\n Primitive's metadata. Available as a class attribute.\nlogger : Logger\n Primitive's logger. Available as a class attribute.\nhyperparams : Hyperparams\n Hyperparams passed to the constructor.\nrandom_seed : int\n Random seed passed to the constructor.\ndocker_containers : Dict[str, DockerContainer]\n A dict mapping Docker image keys from primitive's metadata to (named) tuples containing\n container's address under which the container is accessible by the primitive, and a\n dict mapping exposed ports to ports on that address.\nvolumes : Dict[str, str]\n A dict mapping volume keys from primitive's metadata to file and directory paths\n where downloaded and extracted files are available to the primitive.\ntemporary_directory : str\n An absolute path to a temporary directory a primitive can use to store any files\n for the duration of the current pipeline run phase. Directory is automatically\n cleaned up after the current pipeline run phase finishes.",
"python_path": "d3m.primitives.data_preprocessing.Unfold.DSBOX",
"python_path": "d3m.primitives.data_preprocessing.unfold.DSBOX",
"primitive_family": "DATA_PREPROCESSING",
"algorithm_types": [
"DATA_CONVERSION"
......@@ -21,7 +21,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/usc-isi-i2/dsbox-cleaning@61280ffddf5a40f42f1472f35f6e6df0c88c1129#egg=dsbox-datacleaning"
"package_uri": "git+https://github.com/usc-isi-i2/dsbox-cleaning@de38b3767de94d10ae8ff696f82af87dce14959f#egg=dsbox-datacleaning"
}
],
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/primitive.json",
......@@ -198,5 +198,5 @@
}
},
"structural_type": "dsbox.datapostprocessing.unfold.Unfold",
"digest": "9824fd76896fc084bebe6273f3431dbc3301bdac4c28de3b9ebafeaaf3724687"
"digest": "b4393739d42de418c76150b2c7043d2f6cd165f5d7609136decee5850330f20b"
}
......@@ -3,7 +3,7 @@
"version": "1.4.4",
"name": "DSBox vertically concat",
"description": "A primitive which concat a list of dataframe to a single dataframe vertically\n\nAttributes\n----------\nmetadata : PrimitiveMetadata\n Primitive's metadata. Available as a class attribute.\nlogger : Logger\n Primitive's logger. Available as a class attribute.\nhyperparams : Hyperparams\n Hyperparams passed to the constructor.\nrandom_seed : int\n Random seed passed to the constructor.\ndocker_containers : Dict[str, DockerContainer]\n A dict mapping Docker image keys from primitive's metadata to (named) tuples containing\n container's address under which the container is accessible by the primitive, and a\n dict mapping exposed ports to ports on that address.\nvolumes : Dict[str, str]\n A dict mapping volume keys from primitive's metadata to file and directory paths\n where downloaded and extracted files are available to the primitive.\ntemporary_directory : str\n An absolute path to a temporary directory a primitive can use to store any files\n for the duration of the current pipeline run phase. Directory is automatically\n cleaned up after the current pipeline run phase finishes.",
"python_path": "d3m.primitives.data_preprocessing.VerticalConcat.DSBOX",
"python_path": "d3m.primitives.data_preprocessing.vertical_concat.DSBOX",
"primitive_family": "DATA_PREPROCESSING",
"algorithm_types": [
"DATA_CONVERSION"
......@@ -22,7 +22,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/usc-isi-i2/dsbox-cleaning@61280ffddf5a40f42f1472f35f6e6df0c88c1129#egg=dsbox-datacleaning"
"package_uri": "git+https://github.com/usc-isi-i2/dsbox-cleaning@de38b3767de94d10ae8ff696f82af87dce14959f#egg=dsbox-datacleaning"
}
],
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/primitive.json",
......@@ -199,5 +199,5 @@
}
},
"structural_type": "dsbox.datapostprocessing.vertical_concat.VerticalConcat",
"digest": "8de0704d9618e5287a30a78ac36191f0fd87024c45a1028e6bf8a291cb94e9b2"
"digest": "c06ca2bb422717bbc2017f764d11802b47afa71d0fdd7e1fba5d197afd5a2bd8"
}
......@@ -3,7 +3,7 @@
"version": "1.4.4",
"name": "ISI DSBox To Numeric DataFrame",
"desription": "Convert to purely numeric DataFrame",
"python_path": "d3m.primitives.data_transformation.ToNumeric.DSBOX",
"python_path": "d3m.primitives.data_transformation.to_numeric.DSBOX",
"source": {
"name": "ISI",
"contact": "kyao:kyao@isi.edu",
......@@ -18,7 +18,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/usc-isi-i2/dsbox-cleaning@61280ffddf5a40f42f1472f35f6e6df0c88c1129#egg=dsbox-datacleaning"
"package_uri": "git+https://github.com/usc-isi-i2/dsbox-cleaning@de38b3767de94d10ae8ff696f82af87dce14959f#egg=dsbox-datacleaning"
}
],
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/primitive.json",
......@@ -236,5 +236,5 @@
},
"structural_type": "dsbox.datapreprocessing.cleaner.to_numeric.ToNumeric",
"description": "A primitive which returns a DataFrame that is purely numeric. Numeric semantic type\ncolumns are converted to numerical structual type columns. Columns that are not numeric\nsemantic type are droped. Missing values are encoded as NaN. Many SKLearn primitives\nrequire purely numeric DataFrame as input. It useful to run this primitive after\nrunning d3m.primitives.data_preprocessing.Encoder.DSBOX to encode categorical columns\nand d3m.primitives.dsbox.CorexText to encode text columns. This primitve preserves the\nD3M index column.\n\nAttributes\n----------\nmetadata : PrimitiveMetadata\n Primitive's metadata. Available as a class attribute.\nlogger : Logger\n Primitive's logger. Available as a class attribute.\nhyperparams : Hyperparams\n Hyperparams passed to the constructor.\nrandom_seed : int\n Random seed passed to the constructor.\ndocker_containers : Dict[str, DockerContainer]\n A dict mapping Docker image keys from primitive's metadata to (named) tuples containing\n container's address under which the container is accessible by the primitive, and a\n dict mapping exposed ports to ports on that address.\nvolumes : Dict[str, str]\n A dict mapping volume keys from primitive's metadata to file and directory paths\n where downloaded and extracted files are available to the primitive.\ntemporary_directory : str\n An absolute path to a temporary directory a primitive can use to store any files\n for the duration of the current pipeline run phase. Directory is automatically\n cleaned up after the current pipeline run phase finishes.",
"digest": "29e50713fa59bf17028dd45e141a5fdf17e4cb43b7f606d48d2821ca4fe4568c"
"digest": "2f26717d223121d6441fca135447063023e047468181f279a1fd4754ad249b3c"
}
......@@ -3,7 +3,7 @@
"version": "1.4.4",
"name": "DSBox denormalize",
"description": "A primitive which converts a dataset with multiple tabular resources into a dataset with only one tabular resource,\nbased on known relations between tabular resources. Any resource which can be joined is joined, and other resources\nare discarded.\n\nAttributes\n----------\nmetadata : PrimitiveMetadata\n Primitive's metadata. Available as a class attribute.\nlogger : Logger\n Primitive's logger. Available as a class attribute.\nhyperparams : Hyperparams\n Hyperparams passed to the constructor.\nrandom_seed : int\n Random seed passed to the constructor.\ndocker_containers : Dict[str, DockerContainer]\n A dict mapping Docker image keys from primitive's metadata to (named) tuples containing\n container's address under which the container is accessible by the primitive, and a\n dict mapping exposed ports to ports on that address.\nvolumes : Dict[str, str]\n A dict mapping volume keys from primitive's metadata to file and directory paths\n where downloaded and extracted files are available to the primitive.\ntemporary_directory : str\n An absolute path to a temporary directory a primitive can use to store any files\n for the duration of the current pipeline run phase. Directory is automatically\n cleaned up after the current pipeline run phase finishes.",
"python_path": "d3m.primitives.normalization.Denormalize.DSBOX",
"python_path": "d3m.primitives.normalization.denormalize.DSBOX",
"primitive_family": "NORMALIZATION",
"algorithm_types": [
"DATA_NORMALIZATION"
......@@ -22,7 +22,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/usc-isi-i2/dsbox-cleaning@61280ffddf5a40f42f1472f35f6e6df0c88c1129#egg=dsbox-datacleaning"
"package_uri": "git+https://github.com/usc-isi-i2/dsbox-cleaning@de38b3767de94d10ae8ff696f82af87dce14959f#egg=dsbox-datacleaning"
}
],
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/primitive.json",
......@@ -223,5 +223,5 @@
}
},
"structural_type": "dsbox.datapreprocessing.cleaner.denormalize.Denormalize",
"digest": "852d097272bbe022e7308ac623171943bd799de5ae6862c4c29e8d7696465ad2"
"digest": "ceae22a4a884fca9e584002a2bc9306ed15514ba30591ed6a8c2440bb63c47e9"
}
......@@ -3,7 +3,7 @@
"version": "1.4.4",
"name": "DSBox feature scaler",
"description": "A primitive which scales all the Integer & Float variables in the Dataframe.\n\nAttributes\n----------\nmetadata : PrimitiveMetadata\n Primitive's metadata. Available as a class attribute.\nlogger : Logger\n Primitive's logger. Available as a class attribute.\nhyperparams : Hyperparams\n Hyperparams passed to the constructor.\nrandom_seed : int\n Random seed passed to the constructor.\ndocker_containers : Dict[str, DockerContainer]\n A dict mapping Docker image keys from primitive's metadata to (named) tuples containing\n container's address under which the container is accessible by the primitive, and a\n dict mapping exposed ports to ports on that address.\nvolumes : Dict[str, str]\n A dict mapping volume keys from primitive's metadata to file and directory paths\n where downloaded and extracted files are available to the primitive.\ntemporary_directory : str\n An absolute path to a temporary directory a primitive can use to store any files\n for the duration of the current pipeline run phase. Directory is automatically\n cleaned up after the current pipeline run phase finishes.",
"python_path": "d3m.primitives.normalization.IQRScaler.DSBOX",
"python_path": "d3m.primitives.normalization.iqr_scaler.DSBOX",
"primitive_family": "NORMALIZATION",
"algorithm_types": [
"DATA_NORMALIZATION"
......@@ -22,7 +22,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/usc-isi-i2/dsbox-cleaning@61280ffddf5a40f42f1472f35f6e6df0c88c1129#egg=dsbox-datacleaning"
"package_uri": "git+https://github.com/usc-isi-i2/dsbox-cleaning@de38b3767de94d10ae8ff696f82af87dce14959f#egg=dsbox-datacleaning"
}
],
"precondition": [
......@@ -301,5 +301,5 @@
}
},
"structural_type": "dsbox.datapreprocessing.cleaner.IQRScaler.IQRScaler",
"digest": "2ac1a14e784cfaf283868390baa45f3262a05200b0f5f4a5e3102b2767c528b2"
"digest": "7e28f834700c43bd95a4c004d3f040203126b150cf697050935e9eed3432f627"
}
......@@ -3,7 +3,7 @@
"version": "1.4.4",
"name": "DSBox Profiler",
"description": "data profiler moduel. Now only supports csv data.\n\nParameters:\n----------\n_punctuation_outlier_weight: a integer\n the coefficient used in outlier detection for punctuation. default is 3\n\n_numerical_outlier_weight\n\n_token_delimiter: a string\n delimiter that used to seperate tokens, default is blank space \" \".\n\n_detect_language: boolean\n true: do detect language; false: not detect language\n\n_topk: a integer\n\n_verbose: boolean\n control the _verbose\n\nAttributes:\n----------\n\nAttributes\n----------\nmetadata : PrimitiveMetadata\n Primitive's metadata. Available as a class attribute.\nlogger : Logger\n Primitive's logger. Available as a class attribute.\nhyperparams : Hyperparams\n Hyperparams passed to the constructor.\nrandom_seed : int\n Random seed passed to the constructor.\ndocker_containers : Dict[str, DockerContainer]\n A dict mapping Docker image keys from primitive's metadata to (named) tuples containing\n container's address under which the container is accessible by the primitive, and a\n dict mapping exposed ports to ports on that address.\nvolumes : Dict[str, str]\n A dict mapping volume keys from primitive's metadata to file and directory paths\n where downloaded and extracted files are available to the primitive.\ntemporary_directory : str\n An absolute path to a temporary directory a primitive can use to store any files\n for the duration of the current pipeline run phase. Directory is automatically\n cleaned up after the current pipeline run phase finishes.",
"python_path": "d3m.primitives.schema_discovery.Profiler.DSBOX",
"python_path": "d3m.primitives.schema_discovery.profiler.DSBOX",
"primitive_family": "SCHEMA_DISCOVERY",
"algorithm_types": [
"DATA_PROFILING"
......@@ -21,7 +21,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/usc-isi-i2/dsbox-cleaning@61280ffddf5a40f42f1472f35f6e6df0c88c1129#egg=dsbox-datacleaning"
"package_uri": "git+https://github.com/usc-isi-i2/dsbox-cleaning@de38b3767de94d10ae8ff696f82af87dce14959f#egg=dsbox-datacleaning"
}
],
"precondition": [],
......@@ -253,5 +253,5 @@
}
},
"structural_type": "dsbox.datapreprocessing.cleaner.data_profile.Profiler",
"digest": "f424823b3b1961d269e9b48de0c0d8ad398304342c236859dce225215b4dea11"
"digest": "e76efdd8b0a800889e2cd7abd44f6ee07379c893ce460c69fb5abbe6d3d34ec1"
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment