Skip to content

errors caused by invalid url encoding of database URI

👋 hello, thank you first of all for writing this useful plugin and making it available to others under a permissive license, which has saved me a bunch of time. i apprecicate your work here.

when using this module for RDS IAM authentication, we found that there was an error when connecting:

2024/06/26 01:27:31 WARNING mlflow.store.db.utils: SQLAlchemy engine could not be created. The following exception is caught.
(psycopg2.OperationalError) connection to server at "<redacted>" (<redacted>), port 5432 failed: FATAL:  PAM authentication failed for user "<redacted>"

after confirming via this guide that the environment in question was able to connect to the database, we found that the major delta between the two approaches was how the URI was constructed.

i found two related issues which seem to referencing the same issue:

in order to fix this, we applied the following patch to the repo:

diff --git a/mlflow_aws_rds_iam/model_registry.py b/mlflow_aws_rds_iam/model_registry.py
index e02aa6b..b97d764 100644
--- a/mlflow_aws_rds_iam/model_registry.py
+++ b/mlflow_aws_rds_iam/model_registry.py
@@ -5,7 +5,7 @@ from urllib.parse import quote_plus
 
 import boto3
 from mlflow.store.model_registry.sqlalchemy_store import SqlAlchemyStore
-from sqlalchemy import event, make_url
+from sqlalchemy import event, make_url, URL
 
 from .listeners import make_token_provider
 from .ssl import IS_SSL_DISABLED_ENV_VAR, set_ssl_params
@@ -44,22 +44,29 @@ class RDSIAMStore(SqlAlchemyStore):  # type: ignore[misc]
 
         rds = boto3.client("rds")
 
-        # Bootstrap URL with initial RDS IAM token
-        db_uri = db_uri.set(
-            password=quote_plus(
-                rds.generate_db_auth_token(
-                    DBHostname=db_uri.host,
-                    Port=db_uri.port or get_db_default_port(db_uri),
-                    DBUsername=db_uri.username,
-                )
-            )
+        # Generate the password
+        password = rds.generate_db_auth_token(
+            DBHostname=db_uri.host,
+            Port=db_uri.port or get_db_default_port(db_uri),
+            DBUsername=db_uri.username,
+        )
+
+        # Ensure to use URL object instead of make_url function like above
+        # to ensure we handle proper escaping of query parameters
+        url_object = URL.create(
+            "postgresql+psycopg2",
+            username=db_uri.username,
+            password=password,
+            host=db_uri.host,
+            database=db_uri.database,
         )
+
         logger.debug(
-            "Model registry store bootstrap database URI: %s", db_uri.render_as_string()
+            "Model registry store bootstrap database URI: %s", url_object.render_as_string()
         )
 
         # Let MLflow's own SqlAlchemyStore initialize the SA engine
-        super().__init__(db_uri.render_as_string(hide_password=False))
+        super().__init__(url_object.render_as_string(hide_password=False))
 
         # Add token rotation listener to the engine
         event.listen(self.engine, "do_connect", make_token_provider(rds))
diff --git a/mlflow_aws_rds_iam/tracking.py b/mlflow_aws_rds_iam/tracking.py
index 9e3e95a..07a4e72 100644
--- a/mlflow_aws_rds_iam/tracking.py
+++ b/mlflow_aws_rds_iam/tracking.py
@@ -5,7 +5,7 @@ from urllib.parse import quote_plus
 
 import boto3
 from mlflow.store.tracking.sqlalchemy_store import SqlAlchemyStore
-from sqlalchemy import event, make_url
+from sqlalchemy import event, make_url, URL
 
 from .listeners import make_token_provider
 from .ssl import IS_SSL_DISABLED_ENV_VAR, set_ssl_params
@@ -47,23 +47,30 @@ class RDSIAMStore(SqlAlchemyStore):  # type: ignore[misc]
 
         rds = boto3.client("rds")
 
-        # Bootstrap URL with initial RDS IAM token
-        db_uri = db_uri.set(
-            password=quote_plus(
-                rds.generate_db_auth_token(
-                    DBHostname=db_uri.host,
-                    Port=db_uri.port or get_db_default_port(db_uri),
-                    DBUsername=db_uri.username,
-                )
-            )
+        # Generate the password
+        password = rds.generate_db_auth_token(
+            DBHostname=db_uri.host,
+            Port=db_uri.port or get_db_default_port(db_uri),
+            DBUsername=db_uri.username,
+        )
+
+        # Ensure to use URL object instead of make_url function like above
+        # to ensure we handle proper escaping of query parameters
+        url_object = URL.create(
+            "postgresql+psycopg2",
+            username=db_uri.username,
+            password=password,
+            host=db_uri.host,
+            database=db_uri.database,
         )
+
         logger.debug(
             "Tracking store bootstrap database URI: %s", db_uri.render_as_string()
         )
 
         # Let MLflow's own SqlAlchemyStore initialize the SA engine
         super().__init__(
-            db_uri.render_as_string(hide_password=False),
+            db_uri=url_object.render_as_string(hide_password=False),
             default_artifact_root=artifact_uri,
         )

after, which, we no longer had the error above. it seems either the password or ssl query parameters are being incorrectly URL encoded with the existing approach.