sum.py 9.68 KB
Newer Older
Mitar's avatar
Mitar committed
1 2 3 4 5 6 7
import os.path
import pickle
import typing
from http import client

import numpy  # type: ignore

Mitar's avatar
Mitar committed
8 9
from d3m import container, utils
from d3m.metadata import hyperparams, base as metadata_base
Mitar's avatar
Mitar committed
10
from d3m.primitive_interfaces import base, transformer
Mitar's avatar
Mitar committed
11 12 13 14 15 16 17 18 19 20 21

from . import __author__, __version__

__all__ = ('SumPrimitive',)


DOCKER_KEY = 'summing'

# It is useful to define these names, so that you can reuse it both
# for class type arguments and method signatures.
# This is just an example of how to define a more complicated input type,
Mitar's avatar
Typo.  
Mitar committed
22
# which is in fact more restrictive than what the primitive can really handle.
Mitar's avatar
Mitar committed
23
# One could probably just use "typing.Union[typing.Container]" in this case, if accepting
Mitar's avatar
Mitar committed
24
# a wide range of input types.
Mitar's avatar
Mitar committed
25 26
Inputs = typing.Union[container.ndarray, container.DataFrame, container.List]
Outputs = container.List
Mitar's avatar
Mitar committed
27 28 29 30 31 32 33 34 35 36


class Hyperparams(hyperparams.Hyperparams):
    """
    No hyper-parameters for this primitive.
    """

    pass


37
class SumPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
Mitar's avatar
Mitar committed
38 39 40
    # It is important to provide a docstring because this docstring is used as a description of
    # a primitive. Some callers might analyze it to determine the nature and purpose of a primitive.

Mitar's avatar
Mitar committed
41 42 43 44 45
    """
    A primitive which sums all the values on input into one number.
    """

    # This should contain only metadata which cannot be automatically determined from the code.
Mitar's avatar
Mitar committed
46
    metadata = metadata_base.PrimitiveMetadata({
Mitar's avatar
Mitar committed
47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
        # Simply an UUID generated once and fixed forever. Generated using "uuid.uuid4()".
        'id': '9c00d42d-382d-4177-a0e7-082da88a29c8',
        'version': __version__,
        'name': "Sum Values",
        # Keywords do not have a controlled vocabulary. Authors can put here whatever they find suitable.
        'keywords': ['test primitive'],
        'source': {
            'name': __author__,
            'uris': [
                # Unstructured URIs. Link to file and link to repo in this case.
                'https://gitlab.com/datadrivendiscovery/tests-data/blob/master/primitives/test_primitives/sum.py',
                'https://gitlab.com/datadrivendiscovery/tests-data.git',
            ],
        },
        # A list of dependencies in order. These can be Python packages, system packages, or Docker images.
        # Of course Python packages can also have their own dependencies, but sometimes it is necessary to
        # install a Python package first to be even able to run setup.py of another package. Or you have
        # a dependency which is not on PyPi.
        'installation': [{
Mitar's avatar
Mitar committed
66
            'type': metadata_base.PrimitiveInstallationType.PIP,
Mitar's avatar
Mitar committed
67
            'package_uri': 'git+https://gitlab.com/datadrivendiscovery/tests-data.git@{git_commit}#egg=test_primitives&subdirectory=primitives'.format(
Mitar's avatar
Mitar committed
68 69 70
                git_commit=utils.current_git_commit(os.path.dirname(__file__)),
            ),
        }, {
Mitar's avatar
Mitar committed
71
            'type': metadata_base.PrimitiveInstallationType.DOCKER,
Mitar's avatar
Mitar committed
72 73 74 75 76
            # A key under which information about a running container will be provided to the primitive.
            'key': DOCKER_KEY,
            'image_name': 'registry.gitlab.com/datadrivendiscovery/tests-data/summing',
            # Instead of a label, an exact hash of the image is required. This assures reproducibility.
            # You can see digests using "docker images --digests".
Mitar's avatar
Mitar committed
77
            'image_digest': 'sha256:f75e21720e44cfa29d8a8e239b5746c715aa7cf99f9fde7916623fabc30d3364',
Mitar's avatar
Mitar committed
78
        }],
Mitar's avatar
Mitar committed
79 80 81 82 83 84
        # URIs at which one can obtain code for the primitive, if available.
        'location_uris': [
            'https://gitlab.com/datadrivendiscovery/tests-data/raw/{git_commit}/primitives/test_primitives/sum.py'.format(
                git_commit=utils.current_git_commit(os.path.dirname(__file__)),
            ),
        ],
Mitar's avatar
Mitar committed
85 86 87 88 89
        # The same path the primitive is registered with entry points in setup.py.
        'python_path': 'd3m.primitives.test.SumPrimitive',
        # Choose these from a controlled vocabulary in the schema. If anything is missing which would
        # best describe the primitive, make a merge request.
        'algorithm_types': [
Mitar's avatar
Mitar committed
90
            metadata_base.PrimitiveAlgorithmType.COMPUTER_ALGEBRA,
Mitar's avatar
Mitar committed
91
        ],
Mitar's avatar
Mitar committed
92
        'primitive_family': metadata_base.PrimitiveFamily.OPERATOR,
Mitar's avatar
Mitar committed
93 94 95
        # A metafeature about preconditions required for this primitive to operate well.
        'preconditions': [
            # Instead of strings you can also use available Python enumerations.
Mitar's avatar
Mitar committed
96 97
            metadata_base.PrimitivePrecondition.NO_MISSING_VALUES,
            metadata_base.PrimitivePrecondition.NO_CATEGORICAL_VALUES,
Mitar's avatar
Mitar committed
98 99 100
        ]
    })

101 102
    def __init__(self, *, hyperparams: Hyperparams, docker_containers: typing.Dict[str, base.DockerContainer] = None) -> None:
        super().__init__(hyperparams=hyperparams, docker_containers=docker_containers)
Mitar's avatar
Mitar committed
103

104
        # We cannot check for expected ports here because during class construction, a mock value is passed which has empty ports dict.
Mitar's avatar
Mitar committed
105
        if not self.docker_containers or DOCKER_KEY not in self.docker_containers:
Mitar's avatar
Mitar committed
106 107
            raise ValueError("Docker key '{docker_key}' missing among provided Docker containers.".format(docker_key=DOCKER_KEY))

Mitar's avatar
Mitar committed
108 109 110 111 112 113 114 115 116
    def _convert_value(self, value: typing.Any) -> typing.Union[numpy.ndarray, typing.List, typing.Any]:
        # Server does not know about container types, just standard numpy arrays and lists.
        if isinstance(value, container.ndarray):
            return value.view(numpy.ndarray)
        elif isinstance(value, container.List):
            return [self._convert_value(v) for v in value]
        else:
            return value

117
    @base.singleton
Mitar's avatar
Mitar committed
118 119 120
    def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
        # In the future, we should store here data in Arrow format into
        # Plasma store and just pass an ObjectId of data over HTTP.
Mitar's avatar
Mitar committed
121 122
        value = self._convert_value(inputs)
        data = pickle.dumps(value)
Mitar's avatar
Mitar committed
123 124 125 126 127 128 129

        # TODO: Retry if connection fails.
        #       This connection can sometimes fail because the service inside a Docker container
        #       is not yet ready, despite container itself already running. Primitive should retry
        #       a few times before aborting.

        # Primitive knows the port the container is listening on.
130
        connection = client.HTTPConnection(self.docker_containers[DOCKER_KEY].address, port=self.docker_containers[DOCKER_KEY].ports['8000/tcp'])
Mitar's avatar
Mitar committed
131 132 133
        # This simple primitive does not keep any state in the Docker container.
        # But if your primitive does have to associate requests with a primitive, consider
        # using Python's "id(self)" call to get an identifier of a primitive's instance.
134
        self.logger.debug("HTTP request: container=%(container)s", {'container': self.docker_containers[DOCKER_KEY]}, extra={'data': value})
Mitar's avatar
Mitar committed
135 136 137 138
        connection.request('POST', '/', data, {
            'Content-Type': 'multipart/form-data',
        })
        response = connection.getresponse()
Mitar's avatar
Mitar committed
139
        self.logger.debug("HTTP response: status=%(status)s", {'status': response.status}, extra={'response': response})
Mitar's avatar
Mitar committed
140

Mitar's avatar
Mitar committed
141 142
        if response.status != 200:
            raise ValueError("Invalid HTTP response status: {status}".format(status=response.status))
Mitar's avatar
Mitar committed
143

Mitar's avatar
Mitar committed
144 145
        result = float(response.read())

Mitar's avatar
Mitar committed
146
        outputs = container.List((result,))
Mitar's avatar
Mitar committed
147 148 149 150 151 152

        # Outputs are different from inputs, so we do not reuse metadata from inputs but create
        # new metadata. We do this by clearing old metadata which keeps history and link the
        # to inputs metadata. "for_value" tells that this new metadata will be associated with
        # "outputs" and "source" tells which primitive generated this metadata.
        outputs.metadata = inputs.metadata.clear(for_value=outputs, source=self)
Mitar's avatar
Mitar committed
153 154 155 156

        # Wrap it into default "CallResult" object: we are not doing any iterations.
        return base.CallResult(outputs)

Mitar's avatar
Mitar committed
157
    # Because numpy arrays and DataFrames do not contain shapes and dtype as part of their structural types,
Mitar's avatar
Mitar committed
158 159 160
    # we have to manually check those in metadata. In this case, just dtype which is stored as
    # "structural_type" on values themselves (and not the container or dimensions).
    @classmethod
161 162 163
    def can_accept(cls, *, method_name: str, arguments: typing.Dict[str, typing.Union[metadata_base.Metadata, type]],
                   hyperparams: Hyperparams) -> typing.Optional[metadata_base.DataMetadata]:
        output_metadata = super().can_accept(method_name=method_name, arguments=arguments, hyperparams=hyperparams)
Mitar's avatar
Mitar committed
164 165 166 167 168

        # If structural types didn't match, don't bother.
        if output_metadata is None:
            return None

Mitar's avatar
Mitar committed
169 170 171
        if method_name != 'produce':
            return output_metadata

Mitar's avatar
Mitar committed
172 173 174
        if 'inputs' not in arguments:
            return output_metadata

Mitar's avatar
Mitar committed
175
        inputs_metadata = typing.cast(metadata_base.DataMetadata, arguments['inputs'])
Mitar's avatar
Mitar committed
176 177

        # Try to get structural types defined for all elements.
Mitar's avatar
Mitar committed
178 179
        dimension_index = 0
        while True:
Mitar's avatar
Mitar committed
180
            metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS,) * dimension_index)
Mitar's avatar
Mitar committed
181 182 183 184 185 186 187 188 189

            if 'dimension' not in metadata:
                break

            dimension_index += 1

        inputs_value_structural_type = metadata.get('structural_type', None)

        if inputs_value_structural_type is None:
Mitar's avatar
Mitar committed
190
            # TODO: Check if every element individually is a numeric type.
Mitar's avatar
Mitar committed
191
            #       There was no structural type defined for all elements, but there should be one for each element.
Mitar's avatar
Mitar committed
192 193 194 195 196 197 198 199
            return None

        # Not a perfect way to check for a numeric type but will do for this example.
        # Otherwise check out "pandas.api.types.is_numeric_dtype".
        if not issubclass(inputs_value_structural_type, (float, int, numpy.number)):
            return None

        return output_metadata