Skip to content

Catalog Entry Missing Warning Logs

Summary

When running a tap with streams that leverage the Stream.schema syntax for defining their schemas, I'm noticing that nested objects within the schema definition are causing WARNING level log lines like the following to be emitted:

tap-ocs      | time=2021-05-19 11:15:14 name=tap-ocs level=WARNING message=Catalog entry missing for 'collections':'('properties', 'meta', 'properties', 'product')'. Using parent value of selected=True.

An example schema definition that causes these logs to appear is as follows:

class CollectionsStream(Stream):
    """Stream class for OCS Collections streams."""

    name = "collections"
    primary_keys = ["product_type", "id"]
    replication_key = None
    partitions = [
        {"collection": "flower"},
        {"collection": "vapes"},
        {"collection": "extracts"},
        {"collection": "edibles"},
        {"collection": "topicals"},
        {"collection": "accessories"},
    ]

    schema = th.PropertiesList(
        th.Property("title", th.StringType, required=True),
        th.Property("updated_at", th.DateTimeType, required=True),
        th.Property(
            "meta",
            th.ObjectType(
                th.Property(
                    "product",
                    th.ObjectType(
                        th.Property("brand", th.StringType),
                        th.Property("cbd_cont_per_cap", th.NumberType),
                        th.Property("cbd_cont_per_vol", th.NumberType),
                        th.Property("cbd_content_max", th.NumberType),
                        th.Property("cbd_content_min", th.NumberType),
                        th.Property("content_uom", th.StringType),
                        th.Property("plant_type", th.StringType),
                        th.Property("potency", th.StringType),
                        th.Property("thc_content_per_cap", th.NumberType),
                        th.Property("thc_cont_per_vol", th.NumberType),
                        th.Property("thc_content_max", th.NumberType),
                        th.Property("thc_content_min", th.NumberType),
                        th.Property("unit_suffix", th.StringType),
                    ),
                ),
                th.Property(
                    "variant",
                    th.ObjectType(
                        th.Property("cbd_max_per", th.NumberType),
                        th.Property("cbd_min_per", th.NumberType),
                        th.Property("thc_max_per", th.NumberType),
                        th.Property("thc_min_per", th.NumberType),
                        th.Property("display_ppu", th.NumberType),
                        th.Property("display_uom", th.StringType),
                        th.Property("price_per_unit", th.NumberType),
                        th.Property("price_per_uom", th.NumberType),
                        th.Property("retail_pack_net_content", th.NumberType),
                        th.Property("retail_pack_number_of_items", th.IntegerType),
                    ),
                ),
            ),
        ),
        th.Property("handle", th.StringType),
        th.Property("vendor", th.StringType),
        th.Property("product_type", th.StringType),
        th.Property("id", th.IntegerType),
        th.Property("tags", th.ArrayType(th.StringType)),
        th.Property("option_names", th.ArrayType(th.StringType)),
        th.Property("variants_count", th.IntegerType),
        th.Property("variants_inventory_count", th.IntegerType),
        th.Property("variants_min_price", th.NumberType),
        th.Property("variants_max_price", th.NumberType),
        th.Property("product_image", th.StringType),
        th.Property("created_at", th.DateTimeType),
        th.Property("published_at", th.DateTimeType),
        th.Property("body_html_safe", th.StringType),
        th.Property("collections", th.ArrayType(th.StringType)),
        th.Property("collection_ids", th.ArrayType(th.IntegerType)),
        th.Property("sku", th.StringType),
        th.Property("barcode", th.StringType),
        th.Property("option1", th.StringType),
        th.Property("option2", th.StringType),
        th.Property("option3", th.StringType),
        th.Property("position", th.IntegerType),
        th.Property("requires_shipping", th.BooleanType),
        th.Property("taxable", th.BooleanType),
        th.Property("inventory_management", th.StringType),
        th.Property("inventory_policy", th.StringType),
        th.Property("inventory_quantity", th.IntegerType),
        th.Property("variant_title", th.StringType),
        th.Property("inventory_available", th.BooleanType),
        th.Property("price", th.NumberType),
        th.Property("compare_at_price", th.NumberType),
        th.Property("price_ratio", th.NumberType),
        th.Property("price_range", th.StringType),
        th.Property("grams", th.NumberType),
        th.Property("weight", th.StringType),
        th.Property("image", th.StringType),
        th.Property("objectID", th.StringType),
    ).to_dict()

Steps to reproduce

Define a schema on a stream using Stream.schema syntax and create nested objects.

What is the current bug behavior?

See Summary.

What is the expected correct behavior?

I would expect that no WARNING logs would be emitted.

Relevant logs and/or screenshots

See summary.

Possible fixes

N/A