sum.py 9.06 KB
Newer Older
Mitar's avatar
Mitar committed
1 2 3 4 5 6 7
import os.path
import pickle
import typing
from http import client

import numpy  # type: ignore

Mitar's avatar
Mitar committed
8 9
from d3m import container
from d3m.metadata import hyperparams, base as metadata_base, utils
Mitar's avatar
Mitar committed
10
from d3m.primitive_interfaces import base, transformer
Mitar's avatar
Mitar committed
11 12 13 14 15 16 17 18 19 20 21

from . import __author__, __version__

__all__ = ('SumPrimitive',)


DOCKER_KEY = 'summing'

# It is useful to define these names, so that you can reuse it both
# for class type arguments and method signatures.
# This is just an example of how to define a more complicated input type,
Mitar's avatar
Typo.  
Mitar committed
22
# which is in fact more restrictive than what the primitive can really handle.
Mitar's avatar
Mitar committed
23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
# One could probably just use "typing.Container" in this case, if accepting
# a wide range of input types.
Inputs = typing.Union[container.ndarray, container.List[float], container.List[container.List[float]]]
Outputs = container.List[float]


class Hyperparams(hyperparams.Hyperparams):
    """
    No hyper-parameters for this primitive.
    """

    pass


class SumPrimitive(base.SingletonOutputMixin[Inputs, Outputs, None, Hyperparams], transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
Mitar's avatar
Mitar committed
38 39 40
    # It is important to provide a docstring because this docstring is used as a description of
    # a primitive. Some callers might analyze it to determine the nature and purpose of a primitive.

Mitar's avatar
Mitar committed
41 42 43 44 45
    """
    A primitive which sums all the values on input into one number.
    """

    # This should contain only metadata which cannot be automatically determined from the code.
Mitar's avatar
Mitar committed
46
    metadata = metadata_base.PrimitiveMetadata({
Mitar's avatar
Mitar committed
47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
        # Simply an UUID generated once and fixed forever. Generated using "uuid.uuid4()".
        'id': '9c00d42d-382d-4177-a0e7-082da88a29c8',
        'version': __version__,
        'name': "Sum Values",
        # Keywords do not have a controlled vocabulary. Authors can put here whatever they find suitable.
        'keywords': ['test primitive'],
        'source': {
            'name': __author__,
            'uris': [
                # Unstructured URIs. Link to file and link to repo in this case.
                'https://gitlab.com/datadrivendiscovery/tests-data/blob/master/primitives/test_primitives/sum.py',
                'https://gitlab.com/datadrivendiscovery/tests-data.git',
            ],
        },
        # A list of dependencies in order. These can be Python packages, system packages, or Docker images.
        # Of course Python packages can also have their own dependencies, but sometimes it is necessary to
        # install a Python package first to be even able to run setup.py of another package. Or you have
        # a dependency which is not on PyPi.
        'installation': [{
Mitar's avatar
Mitar committed
66
            'type': metadata_base.PrimitiveInstallationType.PIP,
Mitar's avatar
Mitar committed
67
            'package_uri': 'git+https://gitlab.com/datadrivendiscovery/tests-data.git@{git_commit}#egg=test_primitives&subdirectory=primitives'.format(
Mitar's avatar
Mitar committed
68 69 70
                git_commit=utils.current_git_commit(os.path.dirname(__file__)),
            ),
        }, {
Mitar's avatar
Mitar committed
71
            'type': metadata_base.PrimitiveInstallationType.DOCKER,
Mitar's avatar
Mitar committed
72 73 74 75 76 77 78
            # A key under which information about a running container will be provided to the primitive.
            'key': DOCKER_KEY,
            'image_name': 'registry.gitlab.com/datadrivendiscovery/tests-data/summing',
            # Instead of a label, an exact hash of the image is required. This assures reproducibility.
            # You can see digests using "docker images --digests".
            'image_digest': 'sha256:07db5fef262c1172de5c1db5334944b2f58a679e4bb9ea6232234d71239deb64',
        }],
Mitar's avatar
Mitar committed
79 80 81 82 83 84
        # URIs at which one can obtain code for the primitive, if available.
        'location_uris': [
            'https://gitlab.com/datadrivendiscovery/tests-data/raw/{git_commit}/primitives/test_primitives/sum.py'.format(
                git_commit=utils.current_git_commit(os.path.dirname(__file__)),
            ),
        ],
Mitar's avatar
Mitar committed
85 86 87 88 89
        # The same path the primitive is registered with entry points in setup.py.
        'python_path': 'd3m.primitives.test.SumPrimitive',
        # Choose these from a controlled vocabulary in the schema. If anything is missing which would
        # best describe the primitive, make a merge request.
        'algorithm_types': [
Mitar's avatar
Mitar committed
90
            metadata_base.PrimitiveAlgorithmType.COMPUTER_ALGEBRA,
Mitar's avatar
Mitar committed
91
        ],
Mitar's avatar
Mitar committed
92
        'primitive_family': metadata_base.PrimitiveFamily.OPERATOR,
Mitar's avatar
Mitar committed
93 94 95
        # A metafeature about preconditions required for this primitive to operate well.
        'preconditions': [
            # Instead of strings you can also use available Python enumerations.
Mitar's avatar
Mitar committed
96 97
            metadata_base.PrimitivePrecondition.NO_MISSING_VALUES,
            metadata_base.PrimitivePrecondition.NO_CATEGORICAL_VALUES,
Mitar's avatar
Mitar committed
98 99 100 101 102 103 104 105 106
        ]
    })

    def __init__(self, *, hyperparams: Hyperparams, random_seed: int = 0, docker_containers: typing.Dict[str, str] = None) -> None:
        super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers)

        if DOCKER_KEY not in self.docker_containers:
            raise ValueError("Docker key '{docker_key}' missing among provided Docker containers.".format(docker_key=DOCKER_KEY))

Mitar's avatar
Mitar committed
107 108 109 110 111 112 113 114 115
    def _convert_value(self, value: typing.Any) -> typing.Union[numpy.ndarray, typing.List, typing.Any]:
        # Server does not know about container types, just standard numpy arrays and lists.
        if isinstance(value, container.ndarray):
            return value.view(numpy.ndarray)
        elif isinstance(value, container.List):
            return [self._convert_value(v) for v in value]
        else:
            return value

Mitar's avatar
Mitar committed
116 117 118
    def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
        # In the future, we should store here data in Arrow format into
        # Plasma store and just pass an ObjectId of data over HTTP.
Mitar's avatar
Mitar committed
119 120
        value = self._convert_value(inputs)
        data = pickle.dumps(value)
Mitar's avatar
Mitar committed
121 122 123 124 125 126 127 128

        # TODO: Retry if connection fails.
        #       This connection can sometimes fail because the service inside a Docker container
        #       is not yet ready, despite container itself already running. Primitive should retry
        #       a few times before aborting.

        # Primitive knows the port the container is listening on.
        connection = client.HTTPConnection(self.docker_containers[DOCKER_KEY], port=8000)
Mitar's avatar
Mitar committed
129 130 131
        # This simple primitive does not keep any state in the Docker container.
        # But if your primitive does have to associate requests with a primitive, consider
        # using Python's "id(self)" call to get an identifier of a primitive's instance.
Mitar's avatar
Mitar committed
132
        self.logger.debug("HTTP request: host=%(host)s port=%(port)s", {'host': self.docker_containers[DOCKER_KEY], 'port': 8000}, extra={'data': value})
Mitar's avatar
Mitar committed
133 134 135 136
        connection.request('POST', '/', data, {
            'Content-Type': 'multipart/form-data',
        })
        response = connection.getresponse()
Mitar's avatar
Mitar committed
137
        self.logger.debug("HTTP response: status=%(status)s", {'status': response.status}, extra={'response': response})
Mitar's avatar
Mitar committed
138

Mitar's avatar
Mitar committed
139 140
        if response.status != 200:
            raise ValueError("Invalid HTTP response status: {status}".format(status=response.status))
Mitar's avatar
Mitar committed
141 142 143 144

        result = int(response.read())

        outputs = container.List[float]((result,), {
Mitar's avatar
Mitar committed
145
            'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
Mitar's avatar
Mitar committed
146 147 148 149 150
            'structural_type': container.List[float],
            'dimension': {
                'length': 1,
            },
        })
Mitar's avatar
Mitar committed
151
        outputs.metadata = outputs.metadata.update((metadata_base.ALL_ELEMENTS,), {
Mitar's avatar
Mitar committed
152 153 154 155 156 157 158 159 160 161
            'structural_type': float,
        })

        # Wrap it into default "CallResult" object: we are not doing any iterations.
        return base.CallResult(outputs)

    # Because numpy arrays do not contain shapes and dtype as part of their structural types,
    # we have to manually check those in metadata. In this case, just dtype which is stored as
    # "structural_type" on values themselves (and not the container or dimensions).
    @classmethod
Mitar's avatar
Mitar committed
162
    def can_accept(cls, *, method_name: str, arguments: typing.Dict[str, typing.Union[metadata_base.Metadata, type]]) -> typing.Optional[metadata_base.DataMetadata]:
Mitar's avatar
Mitar committed
163 164 165 166 167 168 169 170 171
        output_metadata = super().can_accept(method_name=method_name, arguments=arguments)

        # If structural types didn't match, don't bother.
        if output_metadata is None:
            return None

        if 'inputs' not in arguments:
            return output_metadata

Mitar's avatar
Mitar committed
172
        inputs_metadata = typing.cast(metadata_base.DataMetadata, arguments['inputs'])
Mitar's avatar
Mitar committed
173 174
        dimension_index = 0
        while True:
Mitar's avatar
Mitar committed
175
            metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS,) * dimension_index)
Mitar's avatar
Mitar committed
176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192

            if 'dimension' not in metadata:
                break

            dimension_index += 1

        inputs_value_structural_type = metadata.get('structural_type', None)

        if inputs_value_structural_type is None:
            return None

        # Not a perfect way to check for a numeric type but will do for this example.
        # Otherwise check out "pandas.api.types.is_numeric_dtype".
        if not issubclass(inputs_value_structural_type, (float, int, numpy.number)):
            return None

        return output_metadata