Commit eea0e614 authored by Edgar R. Mondragón's avatar Edgar R. Mondragón 🚴
Browse files

Tap SDK Fix: Improve performance by reusing a single authenticator instance

parent 530d135b
......@@ -4,6 +4,8 @@ import requests
from pathlib import Path
from typing import Any, Dict, Optional, Union, List, Iterable
from memoization import cached
from singer_sdk.helpers.jsonpath import extract_jsonpath
from singer_sdk.streams import {{ cookiecutter.stream_type }}Stream
{%- if cookiecutter.auth_method == "API Key" %}
......@@ -38,6 +40,7 @@ class {{ cookiecutter.source_name }}Stream({{ cookiecutter.stream_type }}Stream)
{%- if cookiecutter.auth_method in ("OAuth2", "JWT") %}
@property
@cached
def authenticator(self) -> {{ cookiecutter.source_name }}Authenticator:
"""Return a new authenticator object."""
return {{ cookiecutter.source_name }}Authenticator.create_for_stream(self)
......
......@@ -4,10 +4,12 @@
# TODO: Delete this file or add custom authentication logic as needed.
{%- elif cookiecutter.auth_method == "OAuth2" %}
from singer_sdk.authenticators import OAuthAuthenticator
from singer_sdk.authenticators import OAuthAuthenticator, SingletonMeta
class {{ cookiecutter.source_name }}Authenticator(OAuthAuthenticator):
# The SingletonMeta metaclass makes your streams reuse the same authenticator instance.
# If this behaviour interferes with your use-case, you can remove the metaclass.
class {{ cookiecutter.source_name }}Authenticator(OAuthAuthenticator, metaclass=SingletonMeta):
"""Authenticator class for {{ cookiecutter.source_name }}."""
@property
......
......@@ -196,3 +196,39 @@ def test_sdk_standard_tap_tests():
for test in tests:
test()
```
### Make all streams reuse the same authenticator instance
```python
from singer_sdk.authenticators import OAuthAuthenticator, SingletonMeta
from singer_sdk.streams import RESTStream
class SingletonAuthenticator(OAuthAuthenticator, metaclass=SingletonMeta):
"""A singleton authenticator."""
class SingletonAuthStream(RESTStream):
"""A stream with singleton authenticator."""
@property
def authenticator(self) -> SingletonAuthenticator:
"""Stream authenticator."""
return SingletonAuthenticator(stream=self)
```
### Make a stream reuse the same authenticator instance for all requests
```python
from memoization import cached
from singer_sdk.authenticators import APIAuthenticatorBase
from singer_sdk.streams import RESTStream
class CachedAuthStream(RESTStream):
"""A stream with singleton authenticator."""
@property
@cached
def authenticator(self) -> APIAuthenticatorBase:
"""Stream authenticator."""
return APIAuthenticatorBase(stream=self)
```
......@@ -19,7 +19,28 @@ from singer_sdk.streams import Stream as RESTStreamBase
from singer import utils
class APIAuthenticatorBase(object):
class SingletonMeta(type):
"""A general purpose singleton metaclass."""
def __init__(cls, name, bases, dic):
"""Init metaclass.
The single instance is saved as an attribute of the the metaclass.
"""
cls.__single_instance = None
super().__init__(name, bases, dic)
def __call__(cls, *args, **kwargs):
"""Create or reuse the singleton."""
if cls.__single_instance:
return cls.__single_instance
single_obj = cls.__new__(cls)
single_obj.__init__(*args, **kwargs)
cls.__single_instance = single_obj
return single_obj
class APIAuthenticatorBase:
"""Base class for offloading API auth."""
def __init__(self, stream: RESTStreamBase):
......
"""REST fixtures."""
from memoization.memoization import cached
import pytest
from singer_sdk.authenticators import APIAuthenticatorBase, SingletonMeta
from singer_sdk.streams import RESTStream
from singer_sdk.tap_base import Tap
class SingletonAuthenticator(APIAuthenticatorBase, metaclass=SingletonMeta):
"""A singleton authenticator."""
class SimpleRESTStream(RESTStream):
"""A REST stream for testing."""
url_base = "https://example.com"
schema = {
"type": "object",
"properties": {},
}
@property
def authenticator(self) -> APIAuthenticatorBase:
"""Stream authenticator."""
return APIAuthenticatorBase(stream=self)
class SingletonAuthStream(SimpleRESTStream):
"""A stream with singleton authenticator."""
@property
def authenticator(self) -> SingletonAuthenticator:
"""Stream authenticator."""
return SingletonAuthenticator(stream=self)
class NaiveAuthenticator(APIAuthenticatorBase):
"""A naive authenticator class."""
class CachedAuthStream(SimpleRESTStream):
"""A stream with Naive authentication."""
@property
@cached
def authenticator(self) -> NaiveAuthenticator:
"""Stream authenticator."""
return NaiveAuthenticator(stream=self)
class SimpleTap(Tap):
"""A REST tap for testing."""
name = "tappy"
def discover_streams(self):
"""Get collection of streams."""
return [
SimpleRESTStream(self, name="some_stream"),
SimpleRESTStream(self, name="other_stream"),
SingletonAuthStream(self, name="single_auth_stream"),
SingletonAuthStream(self, name="reused_single_auth_stream"),
CachedAuthStream(self, name="cached_auth_stream"),
CachedAuthStream(self, name="other_cached_auth_stream"),
]
@pytest.fixture
def rest_tap():
"""Create a RESTful tap instance."""
return SimpleTap()
"""Tests for authentication helpers."""
import pytest
from singer_sdk.streams import RESTStream
from singer_sdk.tap_base import Tap
@pytest.mark.parametrize(
"stream_name,other_stream_name,auth_reused",
[
(
"some_stream",
"some_stream",
False,
),
(
"some_stream",
"other_stream",
False,
),
(
"single_auth_stream",
"single_auth_stream",
True,
),
(
"single_auth_stream",
"reused_single_auth_stream",
True,
),
(
"cached_auth_stream",
"cached_auth_stream",
True,
),
(
"cached_auth_stream",
"other_cached_auth_stream",
False,
),
],
ids=[
"naive-auth-not-reused-between-requests",
"naive-auth-not-reused-between-streams",
"singleton-auth-reused-between-requests",
"singleton-auth-reused-between-streams",
"cached-auth-reused-between-requests",
"cached-auth-not-reused-between-streams",
],
)
def test_authenticator_is_reused(
rest_tap: Tap, stream_name: str, other_stream_name: str, auth_reused: bool
):
"""Validate that the stream's authenticator is a singleton."""
stream: RESTStream = rest_tap.streams[stream_name]
other_stream: RESTStream = rest_tap.streams[other_stream_name]
assert (stream.authenticator is other_stream.authenticator) is auth_reused
......@@ -6,6 +6,6 @@
],
"group_ids": [
"12345"
]
],
"start_date": "2020-02-20"
}
\ No newline at end of file
}
......@@ -9,4 +9,4 @@
"token_uri": "https://oauth2.googleapis.com/token",
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/something-or-other-1234.iam.gserviceaccount.com"
}
\ No newline at end of file
}
"""External tests fixtures."""
import json
from pathlib import Path
from typing import Optional
import pytest
@pytest.fixture
def gitlab_config() -> Optional[dict]:
"""Create a tap-gitlab config object."""
config: Optional[dict] = None
path = Path("singer_sdk/tests/external/.secrets/gitlab-config.json")
if path.exists():
config = json.loads(path.read_text())
return config
@pytest.fixture
def ga_config() -> Optional[dict]:
"""Create a tap-google-analytics config object."""
config: Optional[dict] = None
path = Path("singer_sdk/tests/external/.secrets/google-analytics-config.json")
if path.exists():
config = json.loads(path.read_text())
return config
"""Test class creation."""
import json
from pathlib import Path
from typing import Optional
from singer_sdk.samples.sample_tap_google_analytics.ga_tap import (
......@@ -11,9 +9,6 @@ from singer_sdk.samples.sample_tap_google_analytics.ga_tap import (
CONFIG_FILE = "singer_sdk/tests/external/.secrets/google-analytics-config.json"
def test_tap_class():
def test_tap_class(ga_config: Optional[dict]):
"""Test class creation."""
config: Optional[dict] = None
if Path(CONFIG_FILE).exists():
config = json.loads(Path(CONFIG_FILE).read_text())
_ = SampleTapGoogleAnalytics(config=config, parse_env_config=True)
_ = SampleTapGoogleAnalytics(config=ga_config, parse_env_config=True)
"""Test class creation."""
import json
from pathlib import Path
from typing import Optional
from singer_sdk.samples.sample_tap_google_analytics.ga_tap import (
SampleTapGoogleAnalytics,
)
CONFIG_FILE = "singer_sdk/tests/external/.secrets/google-analytics-config.json"
def test_ga_sync_sample():
def test_ga_sync_sample(ga_config: Optional[dict]):
"""Test class creation."""
config: Optional[dict] = None
if Path(CONFIG_FILE).exists():
config = json.loads(Path(CONFIG_FILE).read_text())
tap = SampleTapGoogleAnalytics(config=config, parse_env_config=True)
tap = SampleTapGoogleAnalytics(config=ga_config, parse_env_config=True)
tap.sync_all()
"""Run the generic tests from `singer_sdk.testing`."""
from pathlib import Path
from typing import Optional
from singer_sdk.testing import get_standard_tap_tests
from singer_sdk.samples.sample_tap_gitlab.gitlab_tap import SampleTapGitlab
......@@ -9,24 +10,17 @@ from singer_sdk.samples.sample_tap_google_analytics.ga_tap import (
)
GA_CONFIG_FILE = Path("singer_sdk/tests/external/.secrets/google-analytics-config.json")
GITLAB_CONFIG_FILE = Path("singer_sdk/tests/external/.secrets/gitlab-config.json")
def test_gitlab_tap_standard_tests():
def test_gitlab_tap_standard_tests(gitlab_config: Optional[dict]):
"""Run standard tap tests against Gitlab tap."""
tests = get_standard_tap_tests(
SampleTapGitlab,
config=GITLAB_CONFIG_FILE if GITLAB_CONFIG_FILE.exists() else None,
)
tests = get_standard_tap_tests(SampleTapGitlab, config=gitlab_config)
for test in tests:
test()
def test_ga_tap_standard_tests():
def test_ga_tap_standard_tests(ga_config: Optional[dict]):
"""Run standard tap tests against Google Analytics tap."""
tests = get_standard_tap_tests(
SampleTapGoogleAnalytics,
config=GA_CONFIG_FILE if GA_CONFIG_FILE.exists() else None,
)
tests = get_standard_tap_tests(SampleTapGoogleAnalytics, config=ga_config)
for test in tests:
test()
"""Tests discovery features for Parquet."""
import json
from pathlib import Path
from typing import Optional
from singer_sdk.samples.sample_tap_gitlab.gitlab_tap import SampleTapGitlab
CONFIG_FILE = "singer_sdk/tests/external/.secrets/gitlab-config.json"
def test_gitlab_tap_discovery():
def test_gitlab_tap_discovery(gitlab_config: Optional[dict]):
"""Test class creation."""
config: Optional[dict] = None
if Path(CONFIG_FILE).exists():
config = json.loads(Path(CONFIG_FILE).read_text())
tap = SampleTapGitlab(config=config, state=None, parse_env_config=True)
tap = SampleTapGitlab(config=gitlab_config, state=None, parse_env_config=True)
catalog_json = tap.run_discovery()
assert catalog_json
def test_gitlab_replication_keys():
def test_gitlab_replication_keys(gitlab_config: Optional[dict]):
stream_name = "issues"
expected_replication_key = "updated_at"
config: Optional[dict] = None
if Path(CONFIG_FILE).exists():
config = json.loads(Path(CONFIG_FILE).read_text())
tap = SampleTapGitlab(config=config, state=None, parse_env_config=True)
tap = SampleTapGitlab(config=gitlab_config, state=None, parse_env_config=True)
catalog = tap.catalog_dict
catalog_entries = catalog["streams"]
for catalog_entry in [c for c in catalog_entries if c["stream"] == stream_name]:
......
"""Test sample sync."""
import json
from pathlib import Path
from typing import Optional
from singer_sdk.helpers import _catalog
from singer_sdk.samples.sample_tap_gitlab.gitlab_tap import SampleTapGitlab
COUNTER = 0
CONFIG_FILE = "singer_sdk/tests/external/.secrets/gitlab-config.json"
SAMPLE_CONFIG_BAD = {"not": "correct"}
config: Optional[dict] = None
if Path(CONFIG_FILE).exists():
config = json.loads(Path(CONFIG_FILE).read_text())
def test_gitlab_sync_all():
def test_gitlab_sync_all(gitlab_config: Optional[dict]):
"""Test sync_all() for gitlab sample."""
tap = SampleTapGitlab(config=config, parse_env_config=True)
tap = SampleTapGitlab(config=gitlab_config, parse_env_config=True)
tap.sync_all()
def test_gitlab_sync_epic_issues():
def test_gitlab_sync_epic_issues(gitlab_config: Optional[dict]):
"""Test sync for just the 'epic_issues' child stream."""
# Initialize with basic config
stream_name = "epic_issues"
tap1 = SampleTapGitlab(config=config, parse_env_config=True)
tap1 = SampleTapGitlab(config=gitlab_config, parse_env_config=True)
# Test discovery
tap1.run_discovery()
catalog1 = tap1.catalog_dict
......@@ -39,5 +27,7 @@ def test_gitlab_sync_epic_issues():
catalog=catalog1, stream_name=stream_name, selected=True
)
tap1 = None
tap2 = SampleTapGitlab(config=config, parse_env_config=True, catalog=catalog1)
tap2 = SampleTapGitlab(
config=gitlab_config, parse_env_config=True, catalog=catalog1
)
tap2.sync_all()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment