Commit d70f591b authored by Mitar's avatar Mitar
Browse files

Merge branch 'master' into 'master'

Master

See merge request !175
parents d9f98b9a 11467d25
Pipeline #114048831 failed with stages
in 40 minutes and 18 seconds
{
"id": "896c3187-5e91-48c9-9b14-51a25fc35390",
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json",
"created": "2020-01-31T05:05:13.892079Z",
"inputs": [
{
"name": "inputs"
}
],
"outputs": [
{
"data": "steps.6.produce",
"name": "output predictions"
}
],
"steps": [
{
"type": "PRIMITIVE",
"primitive": {
"id": "f31f8c1f-d1c5-43e5-a4b2-2ae4a761ef2e",
"version": "0.2.0",
"python_path": "d3m.primitives.data_transformation.denormalize.Common",
"name": "Denormalize datasets",
"digest": "6c1cc604302d7418145f405efe5ae124755cab6da2b66cb53240b74e2b140a1e"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "inputs.0"
}
},
"outputs": [
{
"id": "produce"
}
]
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65",
"version": "0.3.0",
"python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common",
"name": "Extract a DataFrame from a Dataset",
"digest": "990784f527a78250fcab70af9714314490e91f9a5916eb38834d74e8c38f435b"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.0.produce"
}
},
"outputs": [
{
"id": "produce"
}
]
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "19d45344-12e7-48fe-9eff-3be26525c0b0",
"version": "0.1.0",
"python_path": "d3m.primitives.feature_extraction.bag_of_characters.UBC",
"name": "Bag of characters feature extraction",
"digest": "6b8e3fd625d76c5d77fdddbd3bad4d36caf91c476a54b8c8ef5e1ef4324a1ec5"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.1.produce"
}
},
"outputs": [
{
"id": "produce"
}
]
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "d510cb7a-1782-4f51-b44c-58f0236e47c7",
"version": "0.6.0",
"python_path": "d3m.primitives.data_transformation.column_parser.Common",
"name": "Parses strings into their types",
"digest": "96e020725140c0a67033e5e340c555514b0a5432179254c38813a4e85687528d"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.2.produce"
}
},
"outputs": [
{
"id": "produce"
}
]
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1",
"version": "0.4.0",
"python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common",
"name": "Extracts columns by semantic type",
"digest": "591d3ee40b266bda04fa345aeea7b56d81db182fb144cde164e02be019a05f10"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.3.produce"
}
},
"outputs": [
{
"id": "produce"
}
],
"hyperparams": {
"semantic_types": {
"type": "VALUE",
"data": [
"https://metadata.datadrivendiscovery.org/types/Attribute"
]
}
}
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1",
"version": "0.4.0",
"python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common",
"name": "Extracts columns by semantic type",
"digest": "591d3ee40b266bda04fa345aeea7b56d81db182fb144cde164e02be019a05f10"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.1.produce"
}
},
"outputs": [
{
"id": "produce"
}
],
"hyperparams": {
"semantic_types": {
"type": "VALUE",
"data": [
"https://metadata.datadrivendiscovery.org/types/TrueTarget"
]
}
}
},
{
"type": "PRIMITIVE",
"primitive": {
"id": "e20d003d-6a9f-35b0-b4b5-20e42b30282a",
"version": "2019.11.13",
"python_path": "d3m.primitives.classification.decision_tree.SKlearn",
"name": "sklearn.tree.tree.DecisionTreeClassifier",
"digest": "1e227dfcd7fb9677352bcf17ca6b76352eb2ae830b61a5181a3020cc07e98b85"
},
"arguments": {
"inputs": {
"type": "CONTAINER",
"data": "steps.4.produce"
},
"outputs": {
"type": "CONTAINER",
"data": "steps.5.produce"
}
},
"outputs": [
{
"id": "produce"
}
]
}
],
"digest": "0fd7389b1c09cafb21b54cc0cf9ddeca36450e613257e4a946e444ad78232500"
}
\ No newline at end of file
{
"id": "19d45344-12e7-48fe-9eff-3be26525c0b0",
"version": "0.1.0",
"name": "Bag of characters feature extraction",
"description": "A primitive for extract features describing the distribution of characters in a column.\nIt computes the count of all 96 ASCII-printable characters (i.e., digits, letters,\nand punctuation characters, but not whitespace) within each value of a column.\nThen aggregate these counts with 10 statistical functions (i.e., any, all, mean,\nvariance, min, max, median, sum, kurtosis, skewness), resulting in 960 features\nCitation: https://arxiv.org/pdf/1905.10688.pdf\n\nAttributes\n----------\nmetadata : PrimitiveMetadata\n Primitive's metadata. Available as a class attribute.\nlogger : Logger\n Primitive's logger. Available as a class attribute.\nhyperparams : Hyperparams\n Hyperparams passed to the constructor.\nrandom_seed : int\n Random seed passed to the constructor.\ndocker_containers : Dict[str, DockerContainer]\n A dict mapping Docker image keys from primitive's metadata to (named) tuples containing\n container's address under which the container is accessible by the primitive, and a\n dict mapping exposed ports to ports on that address.\nvolumes : Dict[str, str]\n A dict mapping volume keys from primitive's metadata to file and directory paths\n where downloaded and extracted files are available to the primitive.\ntemporary_directory : str\n An absolute path to a temporary directory a primitive can use to store any files\n for the duration of the current pipeline run phase. Directory is automatically\n cleaned up after the current pipeline run phase finishes.",
"python_path": "d3m.primitives.feature_extraction.bag_of_characters.UBC",
"primitive_family": "FEATURE_EXTRACTION",
"algorithm_types": [
"VECTORIZATION"
],
"source": {
"name": "UBC",
"contact": "mailto:tonyjos@ubc.cs.ca",
"uris": [
"https://github.com/tonyjo/ubc_primitives.git"
]
},
"keywords": [
"bag of characters",
"NLP",
"character features"
],
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/tonyjo/ubc_primitives.git@25280d477af3fb3b6ac5350da6e2c92954119a79#egg=ubc_primitives"
}
],
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/primitive.json",
"original_python_path": "primitives.boc.bag_of_characters.BagOfCharacters",
"primitive_code": {
"class_type_arguments": {
"Inputs": "d3m.container.pandas.DataFrame",
"Outputs": "d3m.container.pandas.DataFrame",
"Hyperparams": "primitives.boc.bag_of_characters.Hyperparams",
"Params": "NoneType"
},
"interfaces_version": "2020.1.9",
"interfaces": [
"transformer.TransformerPrimitiveBase",
"base.PrimitiveBase"
],
"hyperparams": {
"n_samples": {
"type": "d3m.metadata.hyperparams.Constant",
"default": 1000,
"structural_type": "int",
"semantic_types": [
"https://metadata.datadrivendiscovery.org/types/ControlParameter"
],
"description": "Max number of samples/words to select"
}
},
"arguments": {
"hyperparams": {
"type": "primitives.boc.bag_of_characters.Hyperparams",
"kind": "RUNTIME"
},
"volumes": {
"type": "typing.Union[NoneType, typing.Dict[str, str]]",
"kind": "RUNTIME",
"default": null
},
"timeout": {
"type": "typing.Union[NoneType, float]",
"kind": "RUNTIME",
"default": null
},
"iterations": {
"type": "typing.Union[NoneType, int]",
"kind": "RUNTIME",
"default": null
},
"produce_methods": {
"type": "typing.Sequence[str]",
"kind": "RUNTIME"
},
"inputs": {
"type": "d3m.container.pandas.DataFrame",
"kind": "PIPELINE"
},
"params": {
"type": "NoneType",
"kind": "RUNTIME"
}
},
"class_methods": {},
"instance_methods": {
"__init__": {
"kind": "OTHER",
"arguments": [
"hyperparams",
"volumes"
],
"returns": "typing.Any"
},
"fit": {
"kind": "OTHER",
"arguments": [
"timeout",
"iterations"
],
"returns": "d3m.primitive_interfaces.base.CallResult[NoneType]",
"description": "A noop.\n\nParameters\n----------\ntimeout : float\n A maximum time this primitive should be fitting during this method call, in seconds.\niterations : int\n How many of internal iterations should the primitive do.\n\nReturns\n-------\nCallResult[None]\n A ``CallResult`` with ``None`` value."
},
"fit_multi_produce": {
"kind": "OTHER",
"arguments": [
"produce_methods",
"inputs",
"timeout",
"iterations"
],
"returns": "d3m.primitive_interfaces.base.MultiCallResult",
"description": "A method calling ``fit`` and after that multiple produce methods at once.\n\nParameters\n----------\nproduce_methods : Sequence[str]\n A list of names of produce methods to call.\ninputs : Inputs\n The inputs given to all produce methods.\ntimeout : float\n A maximum time this primitive should take to both fit the primitive and produce outputs\n for all produce methods listed in ``produce_methods`` argument, in seconds.\niterations : int\n How many of internal iterations should the primitive do for both fitting and producing\n outputs of all produce methods.\n\nReturns\n-------\nMultiCallResult\n A dict of values for each produce method wrapped inside ``MultiCallResult``."
},
"get_params": {
"kind": "OTHER",
"arguments": [],
"returns": "NoneType",
"description": "A noop.\n\nReturns\n-------\nParams\n An instance of parameters."
},
"multi_produce": {
"kind": "OTHER",
"arguments": [
"produce_methods",
"inputs",
"timeout",
"iterations"
],
"returns": "d3m.primitive_interfaces.base.MultiCallResult",
"description": "A method calling multiple produce methods at once.\n\nWhen a primitive has multiple produce methods it is common that they might compute the\nsame internal results for same inputs but return different representations of those results.\nIf caller is interested in multiple of those representations, calling multiple produce\nmethods might lead to recomputing same internal results multiple times. To address this,\nthis method allows primitive author to implement an optimized version which computes\ninternal results only once for multiple calls of produce methods, but return those different\nrepresentations.\n\nIf any additional method arguments are added to primitive's produce method(s), they have\nto be added to this method as well. This method should accept an union of all arguments\naccepted by primitive's produce method(s) and then use them accordingly when computing\nresults.\n\nThe default implementation of this method just calls all produce methods listed in\n``produce_methods`` in order and is potentially inefficient.\n\nIf primitive should have been fitted before calling this method, but it has not been,\nprimitive should raise a ``PrimitiveNotFittedError`` exception.\n\nParameters\n----------\nproduce_methods : Sequence[str]\n A list of names of produce methods to call.\ninputs : Inputs\n The inputs given to all produce methods.\ntimeout : float\n A maximum time this primitive should take to produce outputs for all produce methods\n listed in ``produce_methods`` argument, in seconds.\niterations : int\n How many of internal iterations should the primitive do.\n\nReturns\n-------\nMultiCallResult\n A dict of values for each produce method wrapped inside ``MultiCallResult``."
},
"produce": {
"kind": "PRODUCE",
"arguments": [
"inputs",
"timeout",
"iterations"
],
"returns": "d3m.primitive_interfaces.base.CallResult[d3m.container.pandas.DataFrame]",
"singleton": false,
"inputs_across_samples": [],
"description": "Inputs: pandas DataFrame\nReturns: Output pandas DataFrame with 960 features.\n\nParameters\n----------\ninputs : Inputs\n The inputs of shape [num_inputs, ...].\ntimeout : float\n A maximum time this primitive should take to produce outputs during this method call, in seconds.\niterations : int\n How many of internal iterations should the primitive do.\n\nReturns\n-------\nCallResult[Outputs]\n The outputs of shape [num_inputs, ...] wrapped inside ``CallResult``."
},
"set_params": {
"kind": "OTHER",
"arguments": [
"params"
],
"returns": "NoneType",
"description": "A noop.\n\nParameters\n----------\nparams : Params\n An instance of parameters."
},
"set_training_data": {
"kind": "OTHER",
"arguments": [],
"returns": "NoneType",
"description": "A noop.\n\nParameters\n----------"
}
},
"class_attributes": {
"logger": "logging.Logger",
"metadata": "d3m.metadata.base.PrimitiveMetadata"
},
"instance_attributes": {
"hyperparams": "d3m.metadata.hyperparams.Hyperparams",
"random_seed": "int",
"docker_containers": "typing.Dict[str, d3m.primitive_interfaces.base.DockerContainer]",
"volumes": "typing.Dict[str, str]",
"temporary_directory": "typing.Union[NoneType, str]"
}
},
"structural_type": "primitives.boc.bag_of_characters.BagOfCharacters",
"digest": "6b8e3fd625d76c5d77fdddbd3bad4d36caf91c476a54b8c8ef5e1ef4324a1ec5"
}
{
"id": "94f832f8-3e8c-4de8-bb92-479721dd45e5",
"id": "8b3db6ca-5607-4852-921b-1548cb483ee9",
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json",
"created": "2020-01-24T05:28:28.392873Z",
"created": "2020-01-31T04:43:40.161838Z",
"inputs": [
{
"name": "inputs"
......@@ -21,7 +21,7 @@
"version": "0.3.0",
"python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common",
"name": "Extract a DataFrame from a Dataset",
"digest": "e796f76607089407dcd9b646eaccb244f06520e759112fd21846993f88fcd208"
"digest": "990784f527a78250fcab70af9714314490e91f9a5916eb38834d74e8c38f435b"
},
"arguments": {
"inputs": {
......@@ -38,11 +38,11 @@
{
"type": "PRIMITIVE",
"primitive": {
"id": "6f6ffb72-96cf-4cfe-9754-e2302eb5c927",
"id": "3d1876f2-cfbd-40a8-a6ec-b6a21efaa28d",
"version": "0.1.0",
"python_path": "d3m.primitives.data_transformation.semantic_type.UBC",
"python_path": "d3m.primitives.schema_discovery.profiler.UBC",
"name": "UBC semantic type",
"digest": "9bb05bd74ad658483fc7f291e0cf0a59af47de4731c74bc4240ec645f5453c74"
"digest": "15214f9416b5c7077da0391f5cfd034042fb19ce8b19d3d4458442a5379805d4"
},
"arguments": {
"inputs": {
......@@ -63,7 +63,7 @@
"version": "0.6.0",
"python_path": "d3m.primitives.data_transformation.column_parser.Common",
"name": "Parses strings into their types",
"digest": "c0be7426515ed38d05b03f3c08cb25db1d21e955f6d2d919ead8b5cb842bf2da"
"digest": "96e020725140c0a67033e5e340c555514b0a5432179254c38813a4e85687528d"
},
"arguments": {
"inputs": {
......@@ -81,10 +81,10 @@
"type": "PRIMITIVE",
"primitive": {
"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1",
"version": "0.3.0",
"version": "0.4.0",
"python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common",
"name": "Extracts columns by semantic type",
"digest": "de7c7baf994304d768ac9ab1ebcc8e4c5aa05c3abca0349fede0dc3604388f43"
"digest": "591d3ee40b266bda04fa345aeea7b56d81db182fb144cde164e02be019a05f10"
},
"arguments": {
"inputs": {
......@@ -110,10 +110,10 @@
"type": "PRIMITIVE",
"primitive": {
"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1",
"version": "0.3.0",
"version": "0.4.0",
"python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common",
"name": "Extracts columns by semantic type",
"digest": "de7c7baf994304d768ac9ab1ebcc8e4c5aa05c3abca0349fede0dc3604388f43"
"digest": "591d3ee40b266bda04fa345aeea7b56d81db182fb144cde164e02be019a05f10"
},
"arguments": {
"inputs": {
......@@ -139,10 +139,10 @@
"type": "PRIMITIVE",
"primitive": {
"id": "d016df89-de62-3c53-87ed-c06bb6a23cde",
"version": "2019.6.7",
"version": "2019.11.13",
"python_path": "d3m.primitives.data_cleaning.imputer.SKlearn",
"name": "sklearn.impute.SimpleImputer",
"digest": "adc79e644eec35eb9d616be755a5de83b27f66e42b04f6508a9ceb82d99cc739"
"digest": "1fbe6321949de3f9bd1d93c6900cd5c3b3ee4b1a01506b89c69a776a9d27bf7a"
},
"arguments": {
"inputs": {
......@@ -160,10 +160,10 @@
"type": "PRIMITIVE",
"primitive": {
"id": "e20d003d-6a9f-35b0-b4b5-20e42b30282a",
"version": "2019.6.7",
"version": "2019.11.13",
"python_path": "d3m.primitives.classification.decision_tree.SKlearn",
"name": "sklearn.tree.tree.DecisionTreeClassifier",
"digest": "f84f7e68434e476d5db1041f14fb867d6c44204271147d4e8f9fcf3a4638c9da"
"digest": "1e227dfcd7fb9677352bcf17ca6b76352eb2ae830b61a5181a3020cc07e98b85"
},
"arguments": {
"inputs": {
......@@ -182,5 +182,5 @@
]
}
],
"digest": "492742ad1e428efda234b39ffdccd5e9d7de1a36ef586733f98de74c7a8ef013"
"digest": "6e005e91ce0c3bc000d77de907ecde1bd441a3817efcfda4dbd620163d3fe014"
}
\ No newline at end of file
{
"id": "6f6ffb72-96cf-4cfe-9754-e2302eb5c927",
"id": "3d1876f2-cfbd-40a8-a6ec-b6a21efaa28d",
"version": "0.1.0",
"name": "UBC semantic type",
"description": "A primitive for detecting the semantic type of inputed column data.\n--> Currently Supported: 78 Semantic Types\n --------------------------------------------------------------------------\n |Address | Code | Education | Notes | Requirement |\n |Affiliate | Collection | Elevation | Operator | Result |\n |Age | Company | File size | Organisation | Service |\n |Affiliation | Command | Family | Order | Sales |\n |Album | Component | Format | Origin | Sex |\n |Area | Continent | Gender | Owner | Species |\n |Artist | Country | Genre | Person | State |\n |Birth date | County | Grades | Plays | Status |\n |Birth place | Creator | Industry | Position | Symbol |\n |Brand | Credit | ISBN | Product | Team |\n |Capacity | Currency | Jockey | Publisher | Team name |\n |Category | Day | Language | Range | Type |\n |City | Depth | Location | Rank | Weight |\n |Class | Description | Manufacturer| Ranking | Year |\n |Classification | Director | Name | Region | |\n |Club | Duration | Nationality | Religion | |\n --------------------------------------------------------------------------\n\nAttributes\n----------\nmetadata : PrimitiveMetadata\n Primitive's metadata. Available as a class attribute.\nlogger : Logger\n Primitive's logger. Available as a class attribute.\nhyperparams : Hyperparams\n Hyperparams passed to the constructor.\nrandom_seed : int\n Random seed passed to the constructor.\ndocker_containers : Dict[str, DockerContainer]\n A dict mapping Docker image keys from primitive's metadata to (named) tuples containing\n container's address under which the container is accessible by the primitive, and a\n dict mapping exposed ports to ports on that address.\nvolumes : Dict[str, str]\n A dict mapping volume keys from primitive's metadata to file and directory paths\n where downloaded and extracted files are available to the primitive.\ntemporary_directory : str\n An absolute path to a temporary directory a primitive can use to store any files\n for the duration of the current pipeline run phase. Directory is automatically\n cleaned up after the current pipeline run phase finishes.",
"python_path": "d3m.primitives.data_transformation.semantic_type.UBC",
"primitive_family": "DATA_TRANSFORMATION",
"python_path": "d3m.primitives.schema_discovery.profiler.UBC",
"primitive_family": "SCHEMA_DISCOVERY",
"algorithm_types": [
"DATA_CONVERSION"
],
......@@ -17,12 +17,43 @@
},
"keywords": [
"semantic type inference\"",
"data type detection"
"data type detection",
"data profiler"
],
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/tonyjo/ubc_primitives.git@384f36feeed3bd416a299b77e64a1efc2fc3aee0#egg=ubc_primitives"
"package_uri": "git+https://github.com/tonyjo/ubc_primitives.git@ef6fe1db6d956338d82b4b43c825f2d997f387f2#egg=ubc_primitives"
},
{
"type": "FILE",
"key": "sherlock_weights.h5",
"file_uri": "https://dl.dropboxusercontent.com/s/8g14nif72mp44o7/sherlock_weights.h5?dl=1",
"file_digest": "4b121359def9f155c4e80728c9320a51b46c56b98c0e9949d3406ff6ba56dc14"
},
{
"type": "FILE",
"key": "sherlock_model.json",
"file_uri": "https://dl.dropboxusercontent.com/s/2bb9n3g1b982r04/sherlock_model.json?dl=1",
"file_digest": "a12efdb386256a27f234eb475550cbb3ad4820bd5a5a085f6da4cdd36797897f"
},
{
"type": "FILE",
"key": "classes_sherlock.npy",
"file_uri": "https://dl.dropboxusercontent.com/s/k7mjisbfmffw4l4/classes_sherlock.npy?dl=1",
"file_digest": "0bb18ba9dd97e124c8956f0abb1e8ff3a5aeabe619a3c38852d85ea0ec876c4a"
},
{
"type": "FILE",
"key": "glove.6B.50d.txt",
"file_uri": "https://dl.dropboxusercontent.com/s/8x197jze94d82qu/glove.6B.50d.txt?dl=1",
"file_digest": "d8f717f8dd4b545cb7f418ef9f3d0c3e6e68a6f48b97d32f8b7aae40cb31f96f"
},
{
"type": "TGZ",
"key": "par_vec_trained_400",
"file_uri": "https://dl.dropboxusercontent.com/s/yn7n6eso6382ey9/par_vec_trained_400.tar.gz?dl=1",
"file_digest": "8e7dc7f5876d764761a3093f6ddd315f295a3a6c8578efa078ad27baf08b2569"
}
],
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/primitive.json",
......@@ -204,5 +235,5 @@
}
},
"structural_type": "primitives.smi.semantic_type.SemanticTypeInfer",
"digest": "50f163afe4d6724aa35df7f90ca4520b8e4633ebcc1300bfacf2a62fa773358f"
"digest": "586968f9df3bd6dbe655c786c8e9a3bcf65ab82ad34f4a44a7d2cd3ccffcabce"
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment