sum.py 9.29 KB
Newer Older
Mitar's avatar
Mitar committed
1 2 3 4 5 6 7
import os.path
import pickle
import typing
from http import client

import numpy  # type: ignore

Mitar's avatar
Mitar committed
8 9
from d3m import container, utils
from d3m.metadata import hyperparams, base as metadata_base
Mitar's avatar
Mitar committed
10
from d3m.primitive_interfaces import base, transformer
Mitar's avatar
Mitar committed
11 12 13 14 15 16 17 18 19 20 21

from . import __author__, __version__

__all__ = ('SumPrimitive',)


DOCKER_KEY = 'summing'

# It is useful to define these names, so that you can reuse it both
# for class type arguments and method signatures.
# This is just an example of how to define a more complicated input type,
Mitar's avatar
Typo.  
Mitar committed
22
# which is in fact more restrictive than what the primitive can really handle.
Mitar's avatar
Mitar committed
23
# One could probably just use "typing.Union[typing.Container]" in this case, if accepting
Mitar's avatar
Mitar committed
24
# a wide range of input types.
Mitar's avatar
Mitar committed
25 26
Inputs = typing.Union[container.ndarray, container.DataFrame, container.List]
Outputs = container.List
Mitar's avatar
Mitar committed
27 28 29 30 31 32 33 34 35 36


class Hyperparams(hyperparams.Hyperparams):
    """
    No hyper-parameters for this primitive.
    """

    pass


37
class SumPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
Mitar's avatar
Mitar committed
38 39 40
    # It is important to provide a docstring because this docstring is used as a description of
    # a primitive. Some callers might analyze it to determine the nature and purpose of a primitive.

Mitar's avatar
Mitar committed
41 42 43 44 45
    """
    A primitive which sums all the values on input into one number.
    """

    # This should contain only metadata which cannot be automatically determined from the code.
Mitar's avatar
Mitar committed
46
    metadata = metadata_base.PrimitiveMetadata({
Mitar's avatar
Mitar committed
47 48 49 50 51 52 53 54
        # Simply an UUID generated once and fixed forever. Generated using "uuid.uuid4()".
        'id': '9c00d42d-382d-4177-a0e7-082da88a29c8',
        'version': __version__,
        'name': "Sum Values",
        # Keywords do not have a controlled vocabulary. Authors can put here whatever they find suitable.
        'keywords': ['test primitive'],
        'source': {
            'name': __author__,
Mitar's avatar
Mitar committed
55
            'contact': 'mailto:author@example.com',
Mitar's avatar
Mitar committed
56 57 58 59 60 61 62 63 64 65 66
            'uris': [
                # Unstructured URIs. Link to file and link to repo in this case.
                'https://gitlab.com/datadrivendiscovery/tests-data/blob/master/primitives/test_primitives/sum.py',
                'https://gitlab.com/datadrivendiscovery/tests-data.git',
            ],
        },
        # A list of dependencies in order. These can be Python packages, system packages, or Docker images.
        # Of course Python packages can also have their own dependencies, but sometimes it is necessary to
        # install a Python package first to be even able to run setup.py of another package. Or you have
        # a dependency which is not on PyPi.
        'installation': [{
Mitar's avatar
Mitar committed
67
            'type': metadata_base.PrimitiveInstallationType.PIP,
Mitar's avatar
Mitar committed
68
            'package_uri': 'git+https://gitlab.com/datadrivendiscovery/tests-data.git@{git_commit}#egg=test_primitives&subdirectory=primitives'.format(
Mitar's avatar
Mitar committed
69 70 71
                git_commit=utils.current_git_commit(os.path.dirname(__file__)),
            ),
        }, {
Mitar's avatar
Mitar committed
72
            'type': metadata_base.PrimitiveInstallationType.DOCKER,
Mitar's avatar
Mitar committed
73 74 75 76 77
            # A key under which information about a running container will be provided to the primitive.
            'key': DOCKER_KEY,
            'image_name': 'registry.gitlab.com/datadrivendiscovery/tests-data/summing',
            # Instead of a label, an exact hash of the image is required. This assures reproducibility.
            # You can see digests using "docker images --digests".
Mitar's avatar
Mitar committed
78
            'image_digest': 'sha256:f75e21720e44cfa29d8a8e239b5746c715aa7cf99f9fde7916623fabc30d3364',
Mitar's avatar
Mitar committed
79
        }],
Mitar's avatar
Mitar committed
80 81 82 83 84 85
        # URIs at which one can obtain code for the primitive, if available.
        'location_uris': [
            'https://gitlab.com/datadrivendiscovery/tests-data/raw/{git_commit}/primitives/test_primitives/sum.py'.format(
                git_commit=utils.current_git_commit(os.path.dirname(__file__)),
            ),
        ],
Mitar's avatar
Mitar committed
86
        # The same path the primitive is registered with entry points in setup.py.
Mitar's avatar
Mitar committed
87
        'python_path': 'd3m.primitives.operator.sum.Test',
Mitar's avatar
Mitar committed
88 89 90
        # Choose these from a controlled vocabulary in the schema. If anything is missing which would
        # best describe the primitive, make a merge request.
        'algorithm_types': [
Mitar's avatar
Mitar committed
91
            metadata_base.PrimitiveAlgorithmType.COMPUTER_ALGEBRA,
Mitar's avatar
Mitar committed
92
        ],
Mitar's avatar
Mitar committed
93
        'primitive_family': metadata_base.PrimitiveFamily.OPERATOR,
Mitar's avatar
Mitar committed
94 95 96
        # A metafeature about preconditions required for this primitive to operate well.
        'preconditions': [
            # Instead of strings you can also use available Python enumerations.
Mitar's avatar
Mitar committed
97 98
            metadata_base.PrimitivePrecondition.NO_MISSING_VALUES,
            metadata_base.PrimitivePrecondition.NO_CATEGORICAL_VALUES,
Mitar's avatar
Mitar committed
99 100 101
        ]
    })

102 103
    def __init__(self, *, hyperparams: Hyperparams, docker_containers: typing.Dict[str, base.DockerContainer] = None) -> None:
        super().__init__(hyperparams=hyperparams, docker_containers=docker_containers)
Mitar's avatar
Mitar committed
104

105
        # We cannot check for expected ports here because during class construction, a mock value is passed which has empty ports dict.
Mitar's avatar
Mitar committed
106
        if not self.docker_containers or DOCKER_KEY not in self.docker_containers:
Mitar's avatar
Mitar committed
107 108
            raise ValueError("Docker key '{docker_key}' missing among provided Docker containers.".format(docker_key=DOCKER_KEY))

Mitar's avatar
Mitar committed
109 110 111 112 113 114 115 116 117
    def _convert_value(self, value: typing.Any) -> typing.Union[numpy.ndarray, typing.List, typing.Any]:
        # Server does not know about container types, just standard numpy arrays and lists.
        if isinstance(value, container.ndarray):
            return value.view(numpy.ndarray)
        elif isinstance(value, container.List):
            return [self._convert_value(v) for v in value]
        else:
            return value

118
    @base.singleton
Mitar's avatar
Mitar committed
119 120 121
    def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
        # In the future, we should store here data in Arrow format into
        # Plasma store and just pass an ObjectId of data over HTTP.
Mitar's avatar
Mitar committed
122 123
        value = self._convert_value(inputs)
        data = pickle.dumps(value)
Mitar's avatar
Mitar committed
124 125 126 127 128 129 130

        # TODO: Retry if connection fails.
        #       This connection can sometimes fail because the service inside a Docker container
        #       is not yet ready, despite container itself already running. Primitive should retry
        #       a few times before aborting.

        # Primitive knows the port the container is listening on.
131
        connection = client.HTTPConnection(self.docker_containers[DOCKER_KEY].address, port=self.docker_containers[DOCKER_KEY].ports['8000/tcp'])
Mitar's avatar
Mitar committed
132 133 134
        # This simple primitive does not keep any state in the Docker container.
        # But if your primitive does have to associate requests with a primitive, consider
        # using Python's "id(self)" call to get an identifier of a primitive's instance.
135
        self.logger.debug("HTTP request: container=%(container)s", {'container': self.docker_containers[DOCKER_KEY]}, extra={'data': value})
Mitar's avatar
Mitar committed
136 137 138 139
        connection.request('POST', '/', data, {
            'Content-Type': 'multipart/form-data',
        })
        response = connection.getresponse()
Mitar's avatar
Mitar committed
140
        self.logger.debug("HTTP response: status=%(status)s", {'status': response.status}, extra={'response': response})
Mitar's avatar
Mitar committed
141

Mitar's avatar
Mitar committed
142 143
        if response.status != 200:
            raise ValueError("Invalid HTTP response status: {status}".format(status=response.status))
Mitar's avatar
Mitar committed
144

Mitar's avatar
Mitar committed
145 146
        result = float(response.read())

Mitar's avatar
Mitar committed
147 148
        # Outputs are different from inputs, so we do not reuse metadata from inputs but generate new metadata.
        outputs = container.List((result,), generate_metadata=True)
Mitar's avatar
Mitar committed
149 150 151 152

        # Wrap it into default "CallResult" object: we are not doing any iterations.
        return base.CallResult(outputs)

Mitar's avatar
Mitar committed
153
    # Because numpy arrays and DataFrames do not contain shapes and dtype as part of their structural types,
Mitar's avatar
Mitar committed
154 155 156
    # we have to manually check those in metadata. In this case, just dtype which is stored as
    # "structural_type" on values themselves (and not the container or dimensions).
    @classmethod
157 158 159
    def can_accept(cls, *, method_name: str, arguments: typing.Dict[str, typing.Union[metadata_base.Metadata, type]],
                   hyperparams: Hyperparams) -> typing.Optional[metadata_base.DataMetadata]:
        output_metadata = super().can_accept(method_name=method_name, arguments=arguments, hyperparams=hyperparams)
Mitar's avatar
Mitar committed
160 161 162 163 164

        # If structural types didn't match, don't bother.
        if output_metadata is None:
            return None

Mitar's avatar
Mitar committed
165 166 167
        if method_name != 'produce':
            return output_metadata

Mitar's avatar
Mitar committed
168 169 170
        if 'inputs' not in arguments:
            return output_metadata

Mitar's avatar
Mitar committed
171
        inputs_metadata = typing.cast(metadata_base.DataMetadata, arguments['inputs'])
Mitar's avatar
Mitar committed
172 173

        # Try to get structural types defined for all elements.
Mitar's avatar
Mitar committed
174 175
        dimension_index = 0
        while True:
Mitar's avatar
Mitar committed
176
            metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS,) * dimension_index)
Mitar's avatar
Mitar committed
177 178 179 180 181 182

            if 'dimension' not in metadata:
                break

            dimension_index += 1

Mitar's avatar
Mitar committed
183
        structural_type, exceptions = inputs_metadata.query_field_with_exceptions((metadata_base.ALL_ELEMENTS,) * dimension_index, 'structural_type')
Mitar's avatar
Mitar committed
184

Mitar's avatar
Mitar committed
185 186 187
        if structural_type is not metadata_base.NO_VALUE:
            if not utils.is_numeric(structural_type):
                return None
Mitar's avatar
Mitar committed
188

Mitar's avatar
Mitar committed
189 190
        for exception_selector, exception_structural_type in exceptions.items():
            if not utils.is_numeric(exception_structural_type):
Mitar's avatar
Mitar committed
191
                return None
Mitar's avatar
Mitar committed
192 193

        return output_metadata