Commit dea3b508 authored by Chris Bethune's avatar Chris Bethune Committed by Mitar

updates to text encoder, bert classification + run files

adds bert classification run
parent d10c7ab1
{
"id": "9e5386a6-7dbf-42e0-8f8b-ba9c9861e950",
"id": "efa53036-a206-4e74-a8fe-488bc9334ccf",
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json",
"created": "2019-11-21T19:05:46.030644Z",
"created": "2019-11-26T02:03:45.154805Z",
"inputs": [
{
"name": "inputs"
......@@ -173,6 +173,10 @@
"doc_col_1": {
"type": "VALUE",
"data": 3
},
"batch_size": {
"type": "VALUE",
"data": 16
}
}
},
......@@ -201,5 +205,5 @@
]
}
],
"digest": "05daff3a48b285e98cd2d3c3fb3f41a5b1dd25021ace0f05955d758f02d29256"
"digest": "872b74f1202af7884c42f9565ef22a05c88792c0153b2cf8bb88be4b655bd1f4"
}
......@@ -14,7 +14,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/uncharted-distil/[email protected]5032d27bd93eb21726e26ecc39c6eeba2f557ae1#egg=distil-primitives"
"package_uri": "git+https://github.com/uncharted-distil/[email protected]e0e37bc1aeb905388ffe7f601efabdb925825188#egg=distil-primitives"
},
{
"type": "FILE",
......@@ -238,5 +238,5 @@
},
"structural_type": "distil.primitives.bert_classification.BertPairClassificationPrimitive",
"description": "Uses a pre-trained pytorch BERT model to predict a label of 0 or 1 for a pair of documents, given training samples\nof document pairs labelled 0/1. Takes a datrame of documents and a dataframe of labels as inputs, and returns\na dataframe containing the predictions as a result.\n\nAttributes\n----------\nmetadata : PrimitiveMetadata\n Primitive's metadata. Available as a class attribute.\nlogger : Logger\n Primitive's logger. Available as a class attribute.\nhyperparams : Hyperparams\n Hyperparams passed to the constructor.\nrandom_seed : int\n Random seed passed to the constructor.\ndocker_containers : Dict[str, DockerContainer]\n A dict mapping Docker image keys from primitive's metadata to (named) tuples containing\n container's address under which the container is accessible by the primitive, and a\n dict mapping exposed ports to ports on that address.\nvolumes : Dict[str, str]\n A dict mapping volume keys from primitive's metadata to file and directory paths\n where downloaded and extracted files are available to the primitive.\ntemporary_directory : str\n An absolute path to a temporary directory a primitive can use to store any files\n for the duration of the current pipeline run phase. Directory is automatically\n cleaned up after the current pipeline run phase finishes.",
"digest": "e22bb57e28e024793eba392bce5e3b468d1b7abcbba9fbbd9d5c0e7174950cd8"
"digest": "b48b883f4298b81b5e3912bb0d325666066b1f5875f1261e19a68c3a2308ae3c"
}
{
"id": "b03763e1-f689-40b4-b832-6a45644b691a",
"id": "1ae6124b-44ce-45c4-b9f2-6ecbd565aff9",
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json",
"created": "2019-11-21T19:05:45.392447Z",
"created": "2019-11-26T02:03:44.513307Z",
"inputs": [
{
"name": "inputs"
......@@ -243,5 +243,5 @@
}
}
],
"digest": "cd674e3b3cd2f4d2c66b3ba41f6aca1d7935870a324995a31a9ce5969fab0aca"
"digest": "ef9fd101968089319b0a3c4081075e2788e87cfb5692b227fec4755677ddb755"
}
......@@ -14,7 +14,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/uncharted-distil/[email protected]5032d27bd93eb21726e26ecc39c6eeba2f557ae1#egg=distil-primitives"
"package_uri": "git+https://github.com/uncharted-distil/[email protected]e0e37bc1aeb905388ffe7f601efabdb925825188#egg=distil-primitives"
}
],
"algorithm_types": [
......@@ -199,5 +199,5 @@
},
"structural_type": "distil.primitives.text_classifier.TextClassifierPrimitive",
"description": "This primitive takes a dataframe containing input texts, performs TFIDF on this text, and then builds a classifier using\nthese features.\n\nAttributes\n----------\nmetadata : PrimitiveMetadata\n Primitive's metadata. Available as a class attribute.\nlogger : Logger\n Primitive's logger. Available as a class attribute.\nhyperparams : Hyperparams\n Hyperparams passed to the constructor.\nrandom_seed : int\n Random seed passed to the constructor.\ndocker_containers : Dict[str, DockerContainer]\n A dict mapping Docker image keys from primitive's metadata to (named) tuples containing\n container's address under which the container is accessible by the primitive, and a\n dict mapping exposed ports to ports on that address.\nvolumes : Dict[str, str]\n A dict mapping volume keys from primitive's metadata to file and directory paths\n where downloaded and extracted files are available to the primitive.\ntemporary_directory : str\n An absolute path to a temporary directory a primitive can use to store any files\n for the duration of the current pipeline run phase. Directory is automatically\n cleaned up after the current pipeline run phase finishes.",
"digest": "4e619b607295485e2d28323758e76b40d5bb32fd85ff0511fc3eecce03702d01"
"digest": "999226f42c74590a28d88b8e7acf2ddaf4eeaf6e3c129e77880de3642ae03683"
}
{
"id": "67601566-6aa6-4037-bfd7-beeaeda4e796",
"id": "25df1b0b-027e-449a-862c-f1fce7f9ce5c",
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json",
"created": "2019-11-21T19:05:45.907786Z",
"created": "2019-11-26T02:03:45.020311Z",
"inputs": [
{
"name": "inputs"
......@@ -332,5 +332,5 @@
]
}
],
"digest": "4e4ad10b9d6efca45605e9d37be8946997dad4d9597c067fb49746a19dd7392b"
"digest": "b5d53007817e104b9bce3b17a0c734aeb3ea8360752742859ca3ba4d4199a942"
}
......@@ -14,7 +14,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/uncharted-distil/[email protected]5032d27bd93eb21726e26ecc39c6eeba2f557ae1#egg=distil-primitives"
"package_uri": "git+https://github.com/uncharted-distil/[email protected]e0e37bc1aeb905388ffe7f601efabdb925825188#egg=distil-primitives"
}
],
"algorithm_types": [
......@@ -196,5 +196,5 @@
},
"structural_type": "distil.primitives.k_means.KMeansPrimitive",
"description": "A wrapper for scikit learn k-means that takes in a dataframe as input and returns a dataframe of (d3mIndex, cluster numbers) tuples as its\noutput. It will ignore columns with a string structural type.\n\nAttributes\n----------\nmetadata : PrimitiveMetadata\n Primitive's metadata. Available as a class attribute.\nlogger : Logger\n Primitive's logger. Available as a class attribute.\nhyperparams : Hyperparams\n Hyperparams passed to the constructor.\nrandom_seed : int\n Random seed passed to the constructor.\ndocker_containers : Dict[str, DockerContainer]\n A dict mapping Docker image keys from primitive's metadata to (named) tuples containing\n container's address under which the container is accessible by the primitive, and a\n dict mapping exposed ports to ports on that address.\nvolumes : Dict[str, str]\n A dict mapping volume keys from primitive's metadata to file and directory paths\n where downloaded and extracted files are available to the primitive.\ntemporary_directory : str\n An absolute path to a temporary directory a primitive can use to store any files\n for the duration of the current pipeline run phase. Directory is automatically\n cleaned up after the current pipeline run phase finishes.",
"digest": "c0877e2a921fb5ce547e66db9db8bd1f4a22a5e7402e6aec86a721d55658264d"
"digest": "4d4e6d8c1b33331afb0f51980944c8f886f7c3a2a842643f69af02ec3bb4afc7"
}
{
"id": "cc4dc481-4056-4026-a379-c80953ad72f6",
"id": "7fd6a2cd-bdf8-49ae-a842-1e8d5efb9a6a",
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json",
"created": "2019-11-21T19:07:48.445261Z",
"created": "2019-11-26T02:03:45.993161Z",
"inputs": [
{
"name": "inputs"
......@@ -171,5 +171,5 @@
]
}
],
"digest": "59f1d57b32902fb31c5ff23c8cd0bcb84f9c5239e588bd1595f8417d1633c4d2"
"digest": "8cf8f50fb753bff7907823754f7ba55a8f753dfa824d367c45d360b4bf06cb78"
}
......@@ -14,7 +14,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/uncharted-distil/[email protected]5032d27bd93eb21726e26ecc39c6eeba2f557ae1#egg=distil-primitives"
"package_uri": "git+https://github.com/uncharted-distil/[email protected]e0e37bc1aeb905388ffe7f601efabdb925825188#egg=distil-primitives"
}
],
"algorithm_types": [
......@@ -193,5 +193,5 @@
},
"structural_type": "distil.primitives.collaborative_filtering.CollaborativeFilteringPrimitive",
"description": "A collaborative filtering primitive based on pytorch. Will use available GPU resources, or run in a CPU mode at a significant\nperformance penalty. Takes a dataframe containing user IDs, item IDs, and ratings as training input, and produces a dataframe\ncontaining rating predictions as output. The primitive encodes labels internally.\n\nAttributes\n----------\nmetadata : PrimitiveMetadata\n Primitive's metadata. Available as a class attribute.\nlogger : Logger\n Primitive's logger. Available as a class attribute.\nhyperparams : Hyperparams\n Hyperparams passed to the constructor.\nrandom_seed : int\n Random seed passed to the constructor.\ndocker_containers : Dict[str, DockerContainer]\n A dict mapping Docker image keys from primitive's metadata to (named) tuples containing\n container's address under which the container is accessible by the primitive, and a\n dict mapping exposed ports to ports on that address.\nvolumes : Dict[str, str]\n A dict mapping volume keys from primitive's metadata to file and directory paths\n where downloaded and extracted files are available to the primitive.\ntemporary_directory : str\n An absolute path to a temporary directory a primitive can use to store any files\n for the duration of the current pipeline run phase. Directory is automatically\n cleaned up after the current pipeline run phase finishes.",
"digest": "30d20d515076625c94f88c6efa441933394a31b4ee39d9a208949a457a361f31"
"digest": "c16dd01076d35c54b2bd216c05b9cc7445caf50523c0b100a4919920dcd5d299"
}
{
"id": "0aa6c604-95fb-4d59-b099-15bb48b926ae",
"id": "0a334da5-4413-4bb0-9454-4af9de7af863",
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json",
"created": "2019-11-21T19:05:45.322453Z",
"created": "2019-11-26T02:03:44.436072Z",
"inputs": [
{
"name": "inputs"
......@@ -68,5 +68,5 @@
}
}
],
"digest": "93c1c23591553b1e24b7d30003f8503b62e14311158617b47ef6d19ab93f1b8c"
"digest": "04c506642a19f88afcbe0efa76cfb783b35baa7c5e491165db104b4743cc8489"
}
......@@ -14,7 +14,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/uncharted-distil/[email protected]5032d27bd93eb21726e26ecc39c6eeba2f557ae1#egg=distil-primitives"
"package_uri": "git+https://github.com/uncharted-distil/[email protected]e0e37bc1aeb905388ffe7f601efabdb925825188#egg=distil-primitives"
}
],
"algorithm_types": [
......@@ -174,5 +174,5 @@
},
"structural_type": "distil.primitives.community_detection.DistilCommunityDetectionPrimitive",
"description": "A primitive that wraps a null model handling of community detection.\n\nAttributes\n----------\nmetadata : PrimitiveMetadata\n Primitive's metadata. Available as a class attribute.\nlogger : Logger\n Primitive's logger. Available as a class attribute.\nhyperparams : Hyperparams\n Hyperparams passed to the constructor.\nrandom_seed : int\n Random seed passed to the constructor.\ndocker_containers : Dict[str, DockerContainer]\n A dict mapping Docker image keys from primitive's metadata to (named) tuples containing\n container's address under which the container is accessible by the primitive, and a\n dict mapping exposed ports to ports on that address.\nvolumes : Dict[str, str]\n A dict mapping volume keys from primitive's metadata to file and directory paths\n where downloaded and extracted files are available to the primitive.\ntemporary_directory : str\n An absolute path to a temporary directory a primitive can use to store any files\n for the duration of the current pipeline run phase. Directory is automatically\n cleaned up after the current pipeline run phase finishes.",
"digest": "0bc1c02dab9790f1ac2e81af6af733a139cb712865acd88d5ea2ed9298fb1bb2"
"digest": "79fe8880279ea0f220fe4047358a6bff0893f4859fa902ebc17c3f543a530a22"
}
{
"id": "ac757cde-ad27-4d43-94b6-3bebf1468d33",
"id": "650d7784-14c0-42e5-977e-59a3525d4c08",
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json",
"created": "2019-11-21T19:05:45.143011Z",
"created": "2019-11-26T02:03:44.170853Z",
"inputs": [
{
"name": "inputs"
......@@ -172,5 +172,5 @@
]
}
],
"digest": "04d3f5a8d58f544a5ff897665ca38906d4466d816f58d7ea34d20c8ffa47dde5"
"digest": "db9433a3508e6ac8aa95dc2ae5ef1c4ae2ea8b0f0f74a30ae31808059886b02a"
}
......@@ -14,7 +14,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/uncharted-distil/[email protected]5032d27bd93eb21726e26ecc39c6eeba2f557ae1#egg=distil-primitives"
"package_uri": "git+https://github.com/uncharted-distil/[email protected]e0e37bc1aeb905388ffe7f601efabdb925825188#egg=distil-primitives"
}
],
"algorithm_types": [
......@@ -206,5 +206,5 @@
},
"structural_type": "distil.primitives.audio_loader.AudioDatasetLoaderPrimitive",
"description": "A primitive which reads columns referencing audio files.\n\nEach column which has ``https://metadata.datadrivendiscovery.org/types/FileName`` semantic type\nand a valid media type (``audio/aiff``, ``audio/flac``, ``audio/ogg``, ``audio/wav``, ``audio/mpeg``)\nhas every filename read into an audio represented as a numpy array. By default the resulting column\nwith read arrays is appended to existing columns.\n\nThe shape of numpy arrays is S x C. S is the number of samples, C is the number of\nchannels in an audio (e.g., C = 1 for mono, C = 2 for stereo). dtype is float32.\n\nAttributes\n----------\nmetadata : PrimitiveMetadata\n Primitive's metadata. Available as a class attribute.\nlogger : Logger\n Primitive's logger. Available as a class attribute.\nhyperparams : Hyperparams\n Hyperparams passed to the constructor.\nrandom_seed : int\n Random seed passed to the constructor.\ndocker_containers : Dict[str, DockerContainer]\n A dict mapping Docker image keys from primitive's metadata to (named) tuples containing\n container's address under which the container is accessible by the primitive, and a\n dict mapping exposed ports to ports on that address.\nvolumes : Dict[str, str]\n A dict mapping volume keys from primitive's metadata to file and directory paths\n where downloaded and extracted files are available to the primitive.\ntemporary_directory : str\n An absolute path to a temporary directory a primitive can use to store any files\n for the duration of the current pipeline run phase. Directory is automatically\n cleaned up after the current pipeline run phase finishes.",
"digest": "85120bc4af21b562872c4b23c9fc7a9120e636b1b56ba0c2c496569cddbdb2f1"
"digest": "b194e9ad133b215f4d74f28318073cc0b011d108a3f022fdf500376b65ede887"
}
{
"id": "79931c54-138a-4c5e-a74e-26e85ff40733",
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json",
"created": "2019-11-21T19:17:12.390224Z",
"inputs": [
{
"name": "inputs"
}
],
"outputs": [
{
"data": "steps.0.produce",
"name": "output"
}
],
"steps": [
{
"type": "PRIMITIVE",
"primitive": {
"id": "1c4aed23-f3d3-4e6b-9710-009a9bc9b694",
"version": "0.1.0",
"python_path": "d3m.primitives.data_preprocessing.data_cleaning.DistilTimeSeriesFormatter",
"name": "Time series formatter"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "inputs.0"
}
},
"outputs": [
{
"id": "produce"
}
]
}
],
"digest": "dd87f26c8d56971d13b0ee77c263fa2677dbd131585bea80fbf9f99701e64947"
}
{"id": "6262be44-e3b2-4770-acf9-6d21c0b348e8", "schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2019-11-22T21:57:39.439450Z", "inputs": [{"name": "inputs"}], "outputs": [{"data": "steps.6.produce", "name": "output predictions"}], "steps": [{"type": "PRIMITIVE", "primitive": {"id": "1c4aed23-f3d3-4e6b-9710-009a9bc9b694", "version": "0.1.0", "python_path": "d3m.primitives.data_preprocessing.data_cleaning.DistilTimeSeriesFormatter", "name": "Time series formatter", "digest": "68c1298e5d64a2604de1838072086e9d344851ec2c9d8cec3de8c02a24143827"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "inputs.0"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65", "version": "0.3.0", "python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common", "name": "Extract a DataFrame from a Dataset", "digest": "bddea02d001c6633722c14643ec2a065fb4a977354ddbdf74282d076da77e530"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.0.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65", "version": "0.3.0", "python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common", "name": "Extract a DataFrame from a Dataset", "digest": "bddea02d001c6633722c14643ec2a065fb4a977354ddbdf74282d076da77e530"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "inputs.0"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "d510cb7a-1782-4f51-b44c-58f0236e47c7", "version": "0.5.0", "python_path": "d3m.primitives.data_transformation.column_parser.Common", "name": "Parses strings into their types", "digest": "c162d57bc73b6f30a0d600af31136b5028fe0efcd852efc15b9ad2826a2f391f"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.2.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"parse_semantic_types": {"type": "VALUE", "data": ["http://schema.org/Boolean", "http://schema.org/Integer", "http://schema.org/Float", "https://metadata.datadrivendiscovery.org/types/FloatVector"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1", "version": "0.3.0", "python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common", "name": "Extracts columns by semantic type", "digest": "465edb8a31169e414dc8ad5b37c8c0bc07625aae9dc72fbfdc385ea10ed6d36f"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.3.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/Target", "https://metadata.datadrivendiscovery.org/types/TrueTarget", "https://metadata.datadrivendiscovery.org/types/SuggestedTarget"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "2d6d3223-1b3c-49cc-9ddd-50f571818268", "version": "1.0.3", "python_path": "d3m.primitives.time_series_classification.k_neighbors.Kanine", "name": "kanine", "digest": "42caa333de0255dca6233e02d0d5152a4d0211824a66eaaacd6c447b10a8fbe6"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.1.produce"}, "outputs": {"type": "CONTAINER", "data": "steps.4.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "8d38b340-f83f-4877-baaa-162f8e551736", "version": "0.3.0", "python_path": "d3m.primitives.data_transformation.construct_predictions.Common", "name": "Construct pipeline predictions output", "digest": "5144bad4fea16168f6667c991e137067721dd0573c68ab1bf172ead9e2c82869"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.5.produce"}, "reference": {"type": "CONTAINER", "data": "steps.2.produce"}}, "outputs": [{"id": "produce"}]}], "digest": "02b11d0a833660462127ef58a934c9ccc9b439cb1c1edce29b296f7d45b14df6"}
\ No newline at end of file
......@@ -18,7 +18,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/uncharted-distil/[email protected]5032d27bd93eb21726e26ecc39c6eeba2f557ae1#egg=distil-primitives"
"package_uri": "git+https://github.com/uncharted-distil/[email protected]e0e37bc1aeb905388ffe7f601efabdb925825188#egg=distil-primitives"
}
],
"algorithm_types": [
......@@ -202,5 +202,5 @@
},
"structural_type": "distil.primitives.timeseries_formatter.TimeSeriesFormatterPrimitive",
"description": "Reads the time series files from a given column in an input dataset resource into a new M x N data resource,\nwhere each value in timeseries occupies one of M rows. Each row has N columns, representing the union of\nthe fields found in the timeseries files and in the main data resource. The loading process assumes that\neach series file has an identical set of timestamps. The `GroupingKey` semantic type will be added to the\ncolumn that contains the file names, and the time column will be marked with the `Time` semantic type.\n\nExample output::\n\n filename | time | value | label |\n -------------------------------------------------\n f1.csv | 0 | 0.1 | alpha |\n f1.csv | 1 | 0.12 | alpha |\n f1.csv | 2 | 0.13 | alpha |\n f2.csv | 0 | 0.72 | bravo |\n f2.csv | 1 | 0.77 | bravo |\n f2.csv | 2 | 0.67 | bravo |\n\nAttributes\n----------\nmetadata : PrimitiveMetadata\n Primitive's metadata. Available as a class attribute.\nlogger : Logger\n Primitive's logger. Available as a class attribute.\nhyperparams : Hyperparams\n Hyperparams passed to the constructor.\nrandom_seed : int\n Random seed passed to the constructor.\ndocker_containers : Dict[str, DockerContainer]\n A dict mapping Docker image keys from primitive's metadata to (named) tuples containing\n container's address under which the container is accessible by the primitive, and a\n dict mapping exposed ports to ports on that address.\nvolumes : Dict[str, str]\n A dict mapping volume keys from primitive's metadata to file and directory paths\n where downloaded and extracted files are available to the primitive.\ntemporary_directory : str\n An absolute path to a temporary directory a primitive can use to store any files\n for the duration of the current pipeline run phase. Directory is automatically\n cleaned up after the current pipeline run phase finishes.",
"digest": "9a45355a593d1d6f6c003a0c1afc864705d297867b113a217056cf70f49e432f"
"digest": "85705909bd0980946a9822b25e6e3611e3c5849415638882eb31ac1eed5e2790"
}
{
"id": "aa7e978d-c816-4b4a-816a-4a164ea027f7",
"id": "25df1b0b-027e-449a-862c-f1fce7f9ce5c",
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json",
"created": "2019-11-21T19:07:48.591926Z",
"created": "2019-11-26T02:03:45.020311Z",
"inputs": [
{
"name": "inputs"
......@@ -9,7 +9,7 @@
],
"outputs": [
{
"data": "steps.14.produce",
"data": "steps.11.produce",
"name": "output"
}
],
......@@ -57,6 +57,7 @@
"parse_semantic_types": {
"type": "VALUE",
"data": [
"http://schema.org/Boolean",
"http://schema.org/Integer",
"http://schema.org/Float",
"https://metadata.datadrivendiscovery.org/types/FloatVector"
......@@ -92,35 +93,6 @@
}
}
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1",
"version": "0.3.0",
"python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common",
"name": "Extracts columns by semantic type"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.1.produce"
}
},
"outputs": [
{
"id": "produce"
}
],
"hyperparams": {
"semantic_types": {
"type": "VALUE",
"data": [
"https://metadata.datadrivendiscovery.org/types/Target",
"https://metadata.datadrivendiscovery.org/types/TrueTarget"
]
}
}
},
{
"type": "PRIMITIVE",
"primitive": {
......@@ -141,26 +113,6 @@
}
]
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "67f53b00-f936-4bb4-873e-4698c4aaa37f",
"version": "0.1.0",
"python_path": "d3m.primitives.data_transformation.list_encoder.DistilListEncoder",
"name": "List encoder"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.2.produce"
}
},
"outputs": [
{
"id": "produce"
}
]
},
{
"type": "PRIMITIVE",
"primitive": {
......@@ -172,7 +124,7 @@
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.5.produce"
"data": "steps.3.produce"
}
},
"outputs": [
......@@ -192,31 +144,7 @@
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.6.produce"
}
},
"outputs": [
{
"id": "produce"
}
]
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "09f252eb-215d-4e0b-9a60-fcd967f5e708",
"version": "0.2.0",
"python_path": "d3m.primitives.data_transformation.encoder.DistilTextEncoder",
"name": "Text encoder"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.7.produce"
},
"outputs": {
"type": "CONTAINER",
"data": "steps.3.produce"
"data": "steps.4.produce"
}
},
"outputs": [
......@@ -236,7 +164,7 @@
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.8.produce"
"data": "steps.5.produce"
}
},
"outputs": [
......@@ -262,7 +190,7 @@
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.9.produce"
"data": "steps.6.produce"
}
},
"outputs": [
......@@ -288,7 +216,7 @@
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.10.produce"
"data": "steps.7.produce"
}
},
"outputs": [
......@@ -326,7 +254,7 @@
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.11.produce"
"data": "steps.8.produce"
}
},
"outputs": [
......@@ -352,19 +280,15 @@
{
"type": "PRIMITIVE",
"primitive": {
"id": "e0ad06ce-b484-46b0-a478-c567e1ea7e02",
"version": "0.2.0",
"python_path": "d3m.primitives.learner.random_forest.DistilEnsembleForest",
"name": "EnsembleForest"
"id": "3b09024e-a83b-418c-8ff4-cf3d30a9609e",
"version": "0.1.0",
"python_path": "d3m.primitives.clustering.k_means.DistilKMeans",
"name": "K means"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.12.produce"
},
"outputs": {
"type": "CONTAINER",
"data": "steps.3.produce"
"data": "steps.9.produce"
}
},
"outputs": [
......@@ -373,9 +297,13 @@
}
],
"hyperparams": {
"metric": {
"n_clusters": {
"type": "VALUE",
"data": "f1Macro"
"data": 100
},
"cluster_col_name": {
"type": "VALUE",
"data": "Class"
}
}
},
......@@ -390,7 +318,7 @@
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.13.produce"
"data": "steps.10.produce"
},
"reference": {
"type": "CONTAINER",
......@@ -404,5 +332,5 @@
]
}
],
"digest": "5f0f19ad64c84ff3a7fab807dfe6927ce1f524661ec50174a86927e171a0f018"
"digest": "b5d53007817e104b9bce3b17a0c734aeb3ea8360752742859ca3ba4d4199a942"
}
......@@ -14,7 +14,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/uncharted-distil/[email protected]5032d27bd93eb21726e26ecc39c6eeba2f557ae1#egg=distil-primitives"
"package_uri": "git+https://github.com/uncharted-distil/[email protected]e0e37bc1aeb905388ffe7f601efabdb925825188#egg=distil-primitives"
}
],
"algorithm_types": [
......@@ -194,5 +194,5 @@
},
"structural_type": "distil.primitives.enrich_dates.EnrichDatesPrimitive",
"description": "Enriches dates by converting to seconds from a base time and computing Z scores. The results\nare appended to the existing dataset, and the original column is left in place for additional\ndownstream processing.\n\nAttributes\n----------\nmetadata : PrimitiveMetadata\n Primitive's metadata. Available as a class attribute.\nlogger : Logger\n Primitive's logger. Available as a class attribute.\nhyperparams : Hyperparams\n Hyperparams passed to the constructor.\nrandom_seed : int\n Random seed passed to the constructor.\ndocker_containers : Dict[str, DockerContainer]\n A dict mapping Docker image keys from primitive's metadata to (named) tuples containing\n container's address under which the container is accessible by the primitive, and a\n dict mapping exposed ports to ports on that address.\nvolumes : Dict[str, str]\n A dict mapping volume keys from primitive's metadata to file and directory paths\n where downloaded and extracted files are available to the primitive.\ntemporary_directory : str\n An absolute path to a temporary directory a primitive can use to store any files\n for the duration of the current pipeline run phase. Directory is automatically\n cleaned up after the current pipeline run phase finishes.",
"digest": "55d02c06b221f3b431430904dbc6203abaf3d3fc51c9139fbbdb5db41d5cebdd"
"digest": "9bbe7087602c7ac0ec4b5ea93c7ae15a8dca34b01d8d85e2b85ef2874c3f796d"
}
......@@ -14,7 +14,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/uncharted-distil/[email protected]5032d27bd93eb21726e26ecc39c6eeba2f557ae1#egg=distil-primitives"
"package_uri": "git+https://github.com/uncharted-distil/[email protected]e0e37bc1aeb905388ffe7f601efabdb925825188#egg=distil-primitives"
}
],
"algorithm_types": [
......@@ -194,5 +194,5 @@
},
"structural_type": "distil.primitives.replace_singletons.ReplaceSingletonsPrimitive",
"description": "Replaces category members with a count of one with a shared singleton token value. Currently applies to columns\nwith semantic type Categorical, Ordinal or DateTime.\n\nAttributes\n----------\nmetadata : PrimitiveMetadata\n Primitive's metadata. Available as a class attribute.\nlogger : Logger\n Primitive's logger. Available as a class attribute.\nhyperparams : Hyperparams\n Hyperparams passed to the constructor.\nrandom_seed : int\n Random seed passed to the constructor.\ndocker_containers : Dict[str, DockerContainer]\n A dict mapping Docker image keys from primitive's metadata to (named) tuples containing\n container's address under which the container is accessible by the primitive, and a\n dict mapping exposed ports to ports on that address.\nvolumes : Dict[str, str]\n A dict mapping volume keys from primitive's metadata to file and directory paths\n where downloaded and extracted files are available to the primitive.\ntemporary_directory : str\n An absolute path to a temporary directory a primitive can use to store any files\n for the duration of the current pipeline run phase. Directory is automatically\n cleaned up after the current pipeline run phase finishes.",
"digest": "7bfcd995ee366b1a166e819e5be3919fdea45065fce68e061d99364695f42a9b"
"digest": "ca67f40cddf5c5202aa9d80f2e717448f6e049a3385273a938802afc42357b23"
}
......@@ -14,7 +14,7 @@
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/uncharted-distil/[email protected]5032d27bd93eb21726e26ecc39c6eeba2f557ae1#egg=distil-primitives"
"package_uri": "git+https://github.com/uncharted-distil/[email protected]e0e37bc1aeb905388ffe7f601efabdb925825188#egg=distil-primitives"
}
],
"algorithm_types": [
......@@ -187,5 +187,5 @@
},
"structural_type": "distil.primitives.binary_encoder.BinaryEncoderPrimitive",
"description": "Performs a binary encoding of categorical columns that are above a caller specified cardinality. The source columns will be replaced by the\nencoding columns. Some information is lost in comparison to a one-hot encoding, but the number of dimensions used is reduced.\nCategorical columns currently include those with the semantic type Categorical, Ordinal or DateTime.\n\nAttributes\n----------\nmetadata : PrimitiveMetadata\n Primitive's metadata. Available as a class attribute.\nlogger : Logger\n Primitive's logger. Available as a class attribute.\nhyperparams : Hyperparams\n Hyperparams passed to the constructor.\nrandom_seed : int\n Random seed passed to the constructor.\ndocker_containers : Dict[str, DockerContainer]\n A dict mapping Docker image keys from primitive's metadata to (named) tuples containing\n container's address under which the container is accessible by the primitive, and a\n dict mapping exposed ports to ports on that address.\nvolumes : Dict[str, str]\n A dict mapping volume keys from primitive's metadata to file and directory paths\n where downloaded and extracted files are available to the primitive.\ntemporary_directory : str\n An absolute path to a temporary directory a primitive can use to store any files\n for the duration of the current pipeline run phase. Directory is automatically\n cleaned up after the current pipeline run phase finishes.",
"digest": "74c22ee6e849f094aa6238abeceaf8409db90ccb3bba1a6ed96f9d4d9c79dc0d"
"digest": "caa2b367b057509d774421687c1776d18727453729f677e2205b9133d36565fe"
}
{
"id": "aa7e978d-c816-4b4a-816a-4a164ea027f7",
"id": "35402e50-52f3-487d-89eb-22649f65b41f",
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json",
"created": "2019-11-21T19:07:48.591926Z",
"created": "2019-11-26T02:03:46.152906Z",
"inputs": [
{
"name": "inputs"
......@@ -404,5 +404,5 @@
]