Commit ba3ceb4e authored by jgleason's avatar jgleason
Browse files

add gator primitives and pipelines

parent 4a3c4eb7
{"id": "7b69a529-d64f-4c7e-92ce-b0c9d2ed0135", "schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2020-01-16T20:56:56.631366Z", "inputs": [{"name": "inputs"}], "outputs": [{"data": "steps.7.produce", "name": "output predictions"}], "steps": [{"type": "PRIMITIVE", "primitive": {"id": "f31f8c1f-d1c5-43e5-a4b2-2ae4a761ef2e", "version": "0.2.0", "python_path": "d3m.primitives.data_transformation.denormalize.Common", "name": "Denormalize datasets", "digest": "5ac405757790f53ed8bfdf782ea5805c3d115dca1df1d1479c6478c6d3038340"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "inputs.0"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "268315c1-7549-4aee-a4cc-28921cba74c0", "version": "0.1.0", "python_path": "d3m.primitives.data_preprocessing.dataset_sample.Common", "name": "Dataset sampling primitive", "digest": "a05a01f5795c8630391f75455300a22eb7a7798b620d257d325b795240e4548a"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.0.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"sample_size": {"type": "VALUE", "data": {"case": "absolute", "value": 10000}}}}, {"type": "PRIMITIVE", "primitive": {"id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65", "version": "0.3.0", "python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common", "name": "Extract a DataFrame from a Dataset", "digest": "422744651afd5995d029a227a1dd7b1696038816b7eb9601f37d661757812aee"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"dataframe_resource": {"type": "VALUE", "data": "learningData"}}}, {"type": "PRIMITIVE", "primitive": {"id": "d510cb7a-1782-4f51-b44c-58f0236e47c7", "version": "0.6.0", "python_path": "d3m.primitives.data_transformation.column_parser.Common", "name": "Parses strings into their types", "digest": "f1215fe3351a2e8bf495f9ed5ed50e88b30d78d4d7ebd9e13bb544e63a10994b"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.2.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"parse_semantic_types": {"type": "VALUE", "data": ["http://schema.org/Boolean", "http://schema.org/Integer", "http://schema.org/Float", "https://metadata.datadrivendiscovery.org/types/FloatVector"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1", "version": "0.3.0", "python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common", "name": "Extracts columns by semantic type", "digest": "30cceb9812b430d6550d54766b4f674b68b92531fc2ad63f56818ea002399c13"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.3.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/Attribute"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1", "version": "0.3.0", "python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common", "name": "Extracts columns by semantic type", "digest": "30cceb9812b430d6550d54766b4f674b68b92531fc2ad63f56818ea002399c13"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.3.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/Target", "https://metadata.datadrivendiscovery.org/types/TrueTarget", "https://metadata.datadrivendiscovery.org/types/SuggestedTarget"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "475c26dc-eb2e-43d3-acdb-159b80d9f099", "version": "1.0.2", "python_path": "d3m.primitives.digital_image_processing.convolutional_neural_net.Gator", "name": "gator", "digest": "7c229cbcac9998c56a8793e4d523ee20f554107c445bb5acb3e21a73f1e737e2"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.4.produce"}, "outputs": {"type": "CONTAINER", "data": "steps.5.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"unfreeze_proportions": {"type": "VALUE", "data": [0.5]}}}, {"type": "PRIMITIVE", "primitive": {"id": "8d38b340-f83f-4877-baaa-162f8e551736", "version": "0.3.0", "python_path": "d3m.primitives.data_transformation.construct_predictions.Common", "name": "Construct pipeline predictions output", "digest": "2994d9885ddeb72f5a47a8fb04e27fe3f684adebf7c5dc27cb56392475dbb29c"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.6.produce"}, "reference": {"type": "CONTAINER", "data": "steps.2.produce"}}, "outputs": [{"id": "produce"}]}], "digest": "3535ba4045eaf0a835abe57b79be110f39f48acc2a10295dfddb9a2cf79a2d52"}
\ No newline at end of file
{
"id": "475c26dc-eb2e-43d3-acdb-159b80d9f099",
"version": "1.0.2",
"name": "gator",
"keywords": [
"Image Recognition",
"digital image processing",
"ImageNet",
"Convolutional Neural Network"
],
"source": {
"name": "Distil",
"contact": "mailto:jeffrey.gleason@yonder.co",
"uris": [
"https://github.com/NewKnowledge/gator"
]
},
"installation": [
{
"type": "PIP",
"package_uri": "git+https://github.com/NewKnowledge/gator.git@c826924a33ba2c533f1531dcd0fd76c8673ff274#egg=Gator"
},
{
"type": "FILE",
"key": "gator_weights",
"file_uri": "http://public.datadrivendiscovery.org/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5",
"file_digest": "9617109a16463f250180008f9818336b767bdf5164315e8cd5761a8c34caa62a"
}
],
"python_path": "d3m.primitives.digital_image_processing.convolutional_neural_net.Gator",
"algorithm_types": [
"CONVOLUTIONAL_NEURAL_NETWORK"
],
"primitive_family": "DIGITAL_IMAGE_PROCESSING",
"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/primitive.json",
"original_python_path": "gator.gator.gator",
"primitive_code": {
"class_type_arguments": {
"Inputs": "d3m.container.pandas.DataFrame",
"Outputs": "d3m.container.pandas.DataFrame",
"Params": "gator.gator.Params",
"Hyperparams": "gator.gator.Hyperparams"
},
"interfaces_version": "2020.1.9",
"interfaces": [
"supervised_learning.SupervisedLearnerPrimitiveBase",
"base.PrimitiveBase"
],
"hyperparams": {
"pooling": {
"type": "d3m.metadata.hyperparams.Enumeration",
"default": "avg",
"structural_type": "str",
"semantic_types": [
"https://metadata.datadrivendiscovery.org/types/TuningParameter"
],
"description": "whether to use average or max pooling to transform 4D ImageNet features to 2D output",
"values": [
"avg",
"max"
]
},
"dense_dim": {
"type": "d3m.metadata.hyperparams.UniformInt",
"default": 1024,
"structural_type": "int",
"semantic_types": [
"https://metadata.datadrivendiscovery.org/types/TuningParameter"
],
"description": "dimension of classification head (1 single dense layer)",
"lower": 128,
"upper": 4096,
"lower_inclusive": true,
"upper_inclusive": true
},
"batch_size": {
"type": "d3m.metadata.hyperparams.UniformInt",
"default": 32,
"structural_type": "int",
"semantic_types": [
"https://metadata.datadrivendiscovery.org/types/TuningParameter"
],
"description": "batch size",
"lower": 1,
"upper": 256,
"lower_inclusive": true,
"upper_inclusive": true
},
"top_layer_epochs": {
"type": "d3m.metadata.hyperparams.UniformInt",
"default": 100,
"structural_type": "int",
"semantic_types": [
"https://metadata.datadrivendiscovery.org/types/TuningParameter"
],
"description": "how many epochs for which to finetune classification head (happens first)",
"lower": 1,
"upper": 9223372036854775807,
"lower_inclusive": true,
"upper_inclusive": false
},
"all_layer_epochs": {
"type": "d3m.metadata.hyperparams.UniformInt",
"default": 100,
"structural_type": "int",
"semantic_types": [
"https://metadata.datadrivendiscovery.org/types/TuningParameter"
],
"description": "how many epochs for which to finetune entire model (happens second)",
"lower": 1,
"upper": 9223372036854775807,
"lower_inclusive": true,
"upper_inclusive": false
},
"unfreeze_proportions": {
"type": "d3m.metadata.hyperparams.Set",
"default": [],
"structural_type": "typing.Sequence[float]",
"semantic_types": [
"https://metadata.datadrivendiscovery.org/types/TuningParameter"
],
"description": "list of proportions representing how much of the base ImageNet model one wants to\n unfreeze (later layers unfrozen) for another round of finetuning",
"elements": {
"type": "d3m.metadata.hyperparams.Hyperparameter",
"default": -1,
"structural_type": "float",
"semantic_types": []
},
"is_configuration": false,
"min_size": 0
},
"early_stopping_patience": {
"type": "d3m.metadata.hyperparams.UniformInt",
"default": 5,
"structural_type": "int",
"semantic_types": [
"https://metadata.datadrivendiscovery.org/types/TuningParameter"
],
"description": "number of epochs to wait before invoking early stopping criterion. applied to all \n iterations of finetuning",
"lower": 0,
"upper": 9223372036854775807,
"lower_inclusive": true,
"upper_inclusive": false
},
"val_split": {
"type": "d3m.metadata.hyperparams.Uniform",
"default": 0.2,
"structural_type": "float",
"semantic_types": [
"https://metadata.datadrivendiscovery.org/types/TuningParameter"
],
"description": "proportion of training records to set aside for validation. Ignored if iterations flag in `fit` method is not None",
"lower": 0.0,
"upper": 1.0,
"lower_inclusive": true,
"upper_inclusive": false
},
"include_class_weights": {
"type": "d3m.metadata.hyperparams.UniformBool",
"default": true,
"structural_type": "bool",
"semantic_types": [
"https://metadata.datadrivendiscovery.org/types/TuningParameter"
],
"description": "whether to include class weights in finetuning of ImageNet model"
}
},
"arguments": {
"hyperparams": {
"type": "gator.gator.Hyperparams",
"kind": "RUNTIME"
},
"random_seed": {
"type": "int",
"kind": "RUNTIME",
"default": 0
},
"volumes": {
"type": "typing.Union[NoneType, typing.Dict[str, str]]",
"kind": "RUNTIME",
"default": null
},
"timeout": {
"type": "typing.Union[NoneType, float]",
"kind": "RUNTIME",
"default": null
},
"iterations": {
"type": "typing.Union[NoneType, int]",
"kind": "RUNTIME",
"default": null
},
"produce_methods": {
"type": "typing.Sequence[str]",
"kind": "RUNTIME"
},
"inputs": {
"type": "d3m.container.pandas.DataFrame",
"kind": "PIPELINE"
},
"outputs": {
"type": "d3m.container.pandas.DataFrame",
"kind": "PIPELINE"
},
"params": {
"type": "gator.gator.Params",
"kind": "RUNTIME"
}
},
"class_methods": {},
"instance_methods": {
"__init__": {
"kind": "OTHER",
"arguments": [
"hyperparams",
"random_seed",
"volumes"
],
"returns": "NoneType"
},
"fit": {
"kind": "OTHER",
"arguments": [
"timeout",
"iterations"
],
"returns": "d3m.primitive_interfaces.base.CallResult[NoneType]",
"description": "Trains a single Inception model on all columns of image paths using dataframe's target column\n\nParameters\n----------\ntimeout : float\n A maximum time this primitive should be fitting during this method call, in seconds.\niterations : int\n How many of internal iterations should the primitive do.\n\nReturns\n-------\nCallResult[None]\n A ``CallResult`` with ``None`` value."
},
"fit_multi_produce": {
"kind": "OTHER",
"arguments": [
"produce_methods",
"inputs",
"outputs",
"timeout",
"iterations"
],
"returns": "d3m.primitive_interfaces.base.MultiCallResult",
"description": "A method calling ``fit`` and after that multiple produce methods at once.\n\nThis method allows primitive author to implement an optimized version of both fitting\nand producing a primitive on same data.\n\nIf any additional method arguments are added to primitive's ``set_training_data`` method\nor produce method(s), or removed from them, they have to be added to or removed from this\nmethod as well. This method should accept an union of all arguments accepted by primitive's\n``set_training_data`` method and produce method(s) and then use them accordingly when\ncomputing results.\n\nThe default implementation of this method just calls first ``set_training_data`` method,\n``fit`` method, and all produce methods listed in ``produce_methods`` in order and is\npotentially inefficient.\n\nParameters\n----------\nproduce_methods : Sequence[str]\n A list of names of produce methods to call.\ninputs : Inputs\n The inputs given to ``set_training_data`` and all produce methods.\noutputs : Outputs\n The outputs given to ``set_training_data``.\ntimeout : float\n A maximum time this primitive should take to both fit the primitive and produce outputs\n for all produce methods listed in ``produce_methods`` argument, in seconds.\niterations : int\n How many of internal iterations should the primitive do for both fitting and producing\n outputs of all produce methods.\n\nReturns\n-------\nMultiCallResult\n A dict of values for each produce method wrapped inside ``MultiCallResult``."
},
"get_params": {
"kind": "OTHER",
"arguments": [],
"returns": "gator.gator.Params",
"description": "Returns parameters of this primitive.\n\nParameters are all parameters of the primitive which can potentially change during a life-time of\na primitive. Parameters which cannot are passed through constructor.\n\nParameters should include all data which is necessary to create a new instance of this primitive\nbehaving exactly the same as this instance, when the new instance is created by passing the same\nparameters to the class constructor and calling ``set_params``.\n\nNo other arguments to the method are allowed (except for private arguments).\n\nReturns\n-------\nParams\n An instance of parameters."
},
"multi_produce": {
"kind": "OTHER",
"arguments": [
"produce_methods",
"inputs",
"timeout",
"iterations"
],
"returns": "d3m.primitive_interfaces.base.MultiCallResult",
"description": "A method calling multiple produce methods at once.\n\nWhen a primitive has multiple produce methods it is common that they might compute the\nsame internal results for same inputs but return different representations of those results.\nIf caller is interested in multiple of those representations, calling multiple produce\nmethods might lead to recomputing same internal results multiple times. To address this,\nthis method allows primitive author to implement an optimized version which computes\ninternal results only once for multiple calls of produce methods, but return those different\nrepresentations.\n\nIf any additional method arguments are added to primitive's produce method(s), they have\nto be added to this method as well. This method should accept an union of all arguments\naccepted by primitive's produce method(s) and then use them accordingly when computing\nresults.\n\nThe default implementation of this method just calls all produce methods listed in\n``produce_methods`` in order and is potentially inefficient.\n\nIf primitive should have been fitted before calling this method, but it has not been,\nprimitive should raise a ``PrimitiveNotFittedError`` exception.\n\nParameters\n----------\nproduce_methods : Sequence[str]\n A list of names of produce methods to call.\ninputs : Inputs\n The inputs given to all produce methods.\ntimeout : float\n A maximum time this primitive should take to produce outputs for all produce methods\n listed in ``produce_methods`` argument, in seconds.\niterations : int\n How many of internal iterations should the primitive do.\n\nReturns\n-------\nMultiCallResult\n A dict of values for each produce method wrapped inside ``MultiCallResult``."
},
"produce": {
"kind": "PRODUCE",
"arguments": [
"inputs",
"timeout",
"iterations"
],
"returns": "d3m.primitive_interfaces.base.CallResult[d3m.container.pandas.DataFrame]",
"singleton": false,
"inputs_across_samples": [],
"description": "Produce image object classification predictions\n\nParameters\n----------\ninputs : feature dataframe\n\nReturns\n-------\noutput : A dataframe with image labels/classifications/cluster assignments"
},
"set_params": {
"kind": "OTHER",
"arguments": [
"params"
],
"returns": "NoneType",
"description": "Sets parameters of this primitive.\n\nParameters are all parameters of the primitive which can potentially change during a life-time of\na primitive. Parameters which cannot are passed through constructor.\n\nNo other arguments to the method are allowed (except for private arguments).\n\nParameters\n----------\nparams : Params\n An instance of parameters."
},
"set_training_data": {
"kind": "OTHER",
"arguments": [
"inputs",
"outputs"
],
"returns": "NoneType",
"description": "Sets primitive's training data\n\nParameters\n----------\ninputs: feature dataframe\noutputs: labels from dataframe's target column"
}
},
"class_attributes": {
"logger": "logging.Logger",
"metadata": "d3m.metadata.base.PrimitiveMetadata"
},
"instance_attributes": {
"hyperparams": "d3m.metadata.hyperparams.Hyperparams",
"random_seed": "int",
"docker_containers": "typing.Dict[str, d3m.primitive_interfaces.base.DockerContainer]",
"volumes": "typing.Dict[str, str]",
"temporary_directory": "typing.Union[NoneType, str]"
},
"params": {}
},
"structural_type": "gator.gator.gator",
"description": "Produce image classification predictions by iteratively finetuning an Inception V3 model\ntrained on ImageNet (can have multiple columns of images, but assumption is that there is a\nsingle column of target labels, these labels are broadcast to all images by row)\n\nTraining inputs: 1) Feature dataframe, 2) Label dataframe\nOutputs: Dataframe with predictions\n\nAttributes\n----------\nmetadata : PrimitiveMetadata\n Primitive's metadata. Available as a class attribute.\nlogger : Logger\n Primitive's logger. Available as a class attribute.\nhyperparams : Hyperparams\n Hyperparams passed to the constructor.\nrandom_seed : int\n Random seed passed to the constructor.\ndocker_containers : Dict[str, DockerContainer]\n A dict mapping Docker image keys from primitive's metadata to (named) tuples containing\n container's address under which the container is accessible by the primitive, and a\n dict mapping exposed ports to ports on that address.\nvolumes : Dict[str, str]\n A dict mapping volume keys from primitive's metadata to file and directory paths\n where downloaded and extracted files are available to the primitive.\ntemporary_directory : str\n An absolute path to a temporary directory a primitive can use to store any files\n for the duration of the current pipeline run phase. Directory is automatically\n cleaned up after the current pipeline run phase finishes.",
"digest": "7c229cbcac9998c56a8793e4d523ee20f554107c445bb5acb3e21a73f1e737e2"
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment