sum.py 9.34 KB
Newer Older
Mitar's avatar
Mitar committed
1 2 3 4 5 6 7
import os.path
import pickle
import typing
from http import client

import numpy  # type: ignore

Mitar's avatar
Mitar committed
8 9
from d3m import container, utils
from d3m.metadata import hyperparams, base as metadata_base
Mitar's avatar
Mitar committed
10
from d3m.primitive_interfaces import base, transformer
Mitar's avatar
Mitar committed
11 12 13 14 15 16 17 18 19 20 21

from . import __author__, __version__

__all__ = ('SumPrimitive',)


DOCKER_KEY = 'summing'

# It is useful to define these names, so that you can reuse it both
# for class type arguments and method signatures.
# This is just an example of how to define a more complicated input type,
Mitar's avatar
Typo.  
Mitar committed
22
# which is in fact more restrictive than what the primitive can really handle.
Mitar's avatar
Mitar committed
23 24
# One could probably just use "typing.Container" in this case, if accepting
# a wide range of input types.
Mitar's avatar
Mitar committed
25
Inputs = typing.Union[container.ndarray, container.DataFrame, container.List[float], container.List[container.List[float]]]
Mitar's avatar
Mitar committed
26 27 28 29 30 31 32 33 34 35 36
Outputs = container.List[float]


class Hyperparams(hyperparams.Hyperparams):
    """
    No hyper-parameters for this primitive.
    """

    pass


37
class SumPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
Mitar's avatar
Mitar committed
38 39 40
    # It is important to provide a docstring because this docstring is used as a description of
    # a primitive. Some callers might analyze it to determine the nature and purpose of a primitive.

Mitar's avatar
Mitar committed
41 42 43 44 45
    """
    A primitive which sums all the values on input into one number.
    """

    # This should contain only metadata which cannot be automatically determined from the code.
Mitar's avatar
Mitar committed
46
    metadata = metadata_base.PrimitiveMetadata({
Mitar's avatar
Mitar committed
47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
        # Simply an UUID generated once and fixed forever. Generated using "uuid.uuid4()".
        'id': '9c00d42d-382d-4177-a0e7-082da88a29c8',
        'version': __version__,
        'name': "Sum Values",
        # Keywords do not have a controlled vocabulary. Authors can put here whatever they find suitable.
        'keywords': ['test primitive'],
        'source': {
            'name': __author__,
            'uris': [
                # Unstructured URIs. Link to file and link to repo in this case.
                'https://gitlab.com/datadrivendiscovery/tests-data/blob/master/primitives/test_primitives/sum.py',
                'https://gitlab.com/datadrivendiscovery/tests-data.git',
            ],
        },
        # A list of dependencies in order. These can be Python packages, system packages, or Docker images.
        # Of course Python packages can also have their own dependencies, but sometimes it is necessary to
        # install a Python package first to be even able to run setup.py of another package. Or you have
        # a dependency which is not on PyPi.
        'installation': [{
Mitar's avatar
Mitar committed
66
            'type': metadata_base.PrimitiveInstallationType.PIP,
Mitar's avatar
Mitar committed
67
            'package_uri': 'git+https://gitlab.com/datadrivendiscovery/tests-data.git@{git_commit}#egg=test_primitives&subdirectory=primitives'.format(
Mitar's avatar
Mitar committed
68 69 70
                git_commit=utils.current_git_commit(os.path.dirname(__file__)),
            ),
        }, {
Mitar's avatar
Mitar committed
71
            'type': metadata_base.PrimitiveInstallationType.DOCKER,
Mitar's avatar
Mitar committed
72 73 74 75 76
            # A key under which information about a running container will be provided to the primitive.
            'key': DOCKER_KEY,
            'image_name': 'registry.gitlab.com/datadrivendiscovery/tests-data/summing',
            # Instead of a label, an exact hash of the image is required. This assures reproducibility.
            # You can see digests using "docker images --digests".
Mitar's avatar
Mitar committed
77
            'image_digest': 'sha256:f75e21720e44cfa29d8a8e239b5746c715aa7cf99f9fde7916623fabc30d3364',
Mitar's avatar
Mitar committed
78
        }],
Mitar's avatar
Mitar committed
79 80 81 82 83 84
        # URIs at which one can obtain code for the primitive, if available.
        'location_uris': [
            'https://gitlab.com/datadrivendiscovery/tests-data/raw/{git_commit}/primitives/test_primitives/sum.py'.format(
                git_commit=utils.current_git_commit(os.path.dirname(__file__)),
            ),
        ],
Mitar's avatar
Mitar committed
85 86 87 88 89
        # The same path the primitive is registered with entry points in setup.py.
        'python_path': 'd3m.primitives.test.SumPrimitive',
        # Choose these from a controlled vocabulary in the schema. If anything is missing which would
        # best describe the primitive, make a merge request.
        'algorithm_types': [
Mitar's avatar
Mitar committed
90
            metadata_base.PrimitiveAlgorithmType.COMPUTER_ALGEBRA,
Mitar's avatar
Mitar committed
91
        ],
Mitar's avatar
Mitar committed
92
        'primitive_family': metadata_base.PrimitiveFamily.OPERATOR,
Mitar's avatar
Mitar committed
93 94 95
        # A metafeature about preconditions required for this primitive to operate well.
        'preconditions': [
            # Instead of strings you can also use available Python enumerations.
Mitar's avatar
Mitar committed
96 97
            metadata_base.PrimitivePrecondition.NO_MISSING_VALUES,
            metadata_base.PrimitivePrecondition.NO_CATEGORICAL_VALUES,
Mitar's avatar
Mitar committed
98 99 100
        ]
    })

101 102
    def __init__(self, *, hyperparams: Hyperparams, docker_containers: typing.Dict[str, base.DockerContainer] = None) -> None:
        super().__init__(hyperparams=hyperparams, docker_containers=docker_containers)
Mitar's avatar
Mitar committed
103

104
        # We cannot check for expected ports here because during class construction, a mock value is passed which has empty ports dict.
Mitar's avatar
Mitar committed
105 106 107
        if DOCKER_KEY not in self.docker_containers:
            raise ValueError("Docker key '{docker_key}' missing among provided Docker containers.".format(docker_key=DOCKER_KEY))

Mitar's avatar
Mitar committed
108 109 110 111 112 113 114 115 116
    def _convert_value(self, value: typing.Any) -> typing.Union[numpy.ndarray, typing.List, typing.Any]:
        # Server does not know about container types, just standard numpy arrays and lists.
        if isinstance(value, container.ndarray):
            return value.view(numpy.ndarray)
        elif isinstance(value, container.List):
            return [self._convert_value(v) for v in value]
        else:
            return value

117
    @base.singleton
Mitar's avatar
Mitar committed
118 119 120
    def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
        # In the future, we should store here data in Arrow format into
        # Plasma store and just pass an ObjectId of data over HTTP.
Mitar's avatar
Mitar committed
121 122
        value = self._convert_value(inputs)
        data = pickle.dumps(value)
Mitar's avatar
Mitar committed
123 124 125 126 127 128 129

        # TODO: Retry if connection fails.
        #       This connection can sometimes fail because the service inside a Docker container
        #       is not yet ready, despite container itself already running. Primitive should retry
        #       a few times before aborting.

        # Primitive knows the port the container is listening on.
130
        connection = client.HTTPConnection(self.docker_containers[DOCKER_KEY].address, port=self.docker_containers[DOCKER_KEY].ports['8000/tcp'])
Mitar's avatar
Mitar committed
131 132 133
        # This simple primitive does not keep any state in the Docker container.
        # But if your primitive does have to associate requests with a primitive, consider
        # using Python's "id(self)" call to get an identifier of a primitive's instance.
134
        self.logger.debug("HTTP request: container=%(container)s", {'container': self.docker_containers[DOCKER_KEY]}, extra={'data': value})
Mitar's avatar
Mitar committed
135 136 137 138
        connection.request('POST', '/', data, {
            'Content-Type': 'multipart/form-data',
        })
        response = connection.getresponse()
Mitar's avatar
Mitar committed
139
        self.logger.debug("HTTP response: status=%(status)s", {'status': response.status}, extra={'response': response})
Mitar's avatar
Mitar committed
140

Mitar's avatar
Mitar committed
141 142
        if response.status != 200:
            raise ValueError("Invalid HTTP response status: {status}".format(status=response.status))
Mitar's avatar
Mitar committed
143 144 145 146

        result = int(response.read())

        outputs = container.List[float]((result,), {
Mitar's avatar
Mitar committed
147
            'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
Mitar's avatar
Mitar committed
148 149 150 151 152
            'structural_type': container.List[float],
            'dimension': {
                'length': 1,
            },
        })
Mitar's avatar
Mitar committed
153
        outputs.metadata = outputs.metadata.update((metadata_base.ALL_ELEMENTS,), {
Mitar's avatar
Mitar committed
154 155 156 157 158 159
            'structural_type': float,
        })

        # Wrap it into default "CallResult" object: we are not doing any iterations.
        return base.CallResult(outputs)

Mitar's avatar
Mitar committed
160
    # Because numpy arrays and DataFrames do not contain shapes and dtype as part of their structural types,
Mitar's avatar
Mitar committed
161 162 163
    # we have to manually check those in metadata. In this case, just dtype which is stored as
    # "structural_type" on values themselves (and not the container or dimensions).
    @classmethod
164 165 166
    def can_accept(cls, *, method_name: str, arguments: typing.Dict[str, typing.Union[metadata_base.Metadata, type]],
                   hyperparams: Hyperparams) -> typing.Optional[metadata_base.DataMetadata]:
        output_metadata = super().can_accept(method_name=method_name, arguments=arguments, hyperparams=hyperparams)
Mitar's avatar
Mitar committed
167 168 169 170 171 172 173 174

        # If structural types didn't match, don't bother.
        if output_metadata is None:
            return None

        if 'inputs' not in arguments:
            return output_metadata

Mitar's avatar
Mitar committed
175
        inputs_metadata = typing.cast(metadata_base.DataMetadata, arguments['inputs'])
Mitar's avatar
Mitar committed
176 177
        dimension_index = 0
        while True:
Mitar's avatar
Mitar committed
178
            metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS,) * dimension_index)
Mitar's avatar
Mitar committed
179 180 181 182 183 184 185 186 187

            if 'dimension' not in metadata:
                break

            dimension_index += 1

        inputs_value_structural_type = metadata.get('structural_type', None)

        if inputs_value_structural_type is None:
Mitar's avatar
Mitar committed
188
            # TODO: Check if every element individually is a numeric type.
Mitar's avatar
Mitar committed
189 190 191 192 193 194 195 196
            return None

        # Not a perfect way to check for a numeric type but will do for this example.
        # Otherwise check out "pandas.api.types.is_numeric_dtype".
        if not issubclass(inputs_value_structural_type, (float, int, numpy.number)):
            return None

        return output_metadata