Skip to content

Etags support

Etags are pretty cool to use with Unpoly

See https://unpoly.com/up-etag

Do you think that this could be something python-unpoly could support? Or it's out of scope for this projet?

Here's an example of a Starlette middleware I'm using:

from __future__ import annotations

import hashlib
import re
from datetime import datetime
from typing import TYPE_CHECKING

from starlette.middleware.base import BaseHTTPMiddleware
from starlette.responses import Response, StreamingResponse
from starlette.status import HTTP_304_NOT_MODIFIED

if TYPE_CHECKING:
  from starlette.requests import Request


# Regex to split Cache-Control headers
CC_DELIM_RE = re.compile(r"\s*,\s*")


class ConditionalGetMiddleware(BaseHTTPMiddleware):
  """
  Handle conditional GET operations. If the response has an ETag or
  Last-Modified header and the request has If-None-Match or If-Modified-Since,
  replace the response with 304 Not Modified. Add an ETag header if needed.
  """

  async def dispatch(self, request: Request, call_next):
    response = await call_next(request)

    # Only process GET requests
    if request.method != "GET":
      return response

    # For streaming responses, we need to consume the body first
    if isinstance(response, StreamingResponse):
      # Collect the streaming body
      body_parts = []
      async for chunk in response.body_iterator:
        body_parts.append(chunk)
      body = b"".join(body_parts)

      # Create a new response with the collected body
      new_response = Response(
        content=body,
        status_code=response.status_code,
        headers=dict(response.headers),
        media_type=response.media_type,
      )
      response = new_response

    # Add ETag if needed
    if self.needs_etag(response) and "etag" not in response.headers:
      self.set_response_etag(response)

    # Check conditional headers
    etag = response.headers.get("etag")
    last_modified = response.headers.get("last-modified")

    if etag or last_modified:
      # Check If-None-Match
      if etag and "if-none-match" in request.headers:
        if self.etag_matches(etag, request.headers["if-none-match"]):
          return self.not_modified_response(response)

      # Check If-Modified-Since
      if last_modified and "if-modified-since" in request.headers:
        if not self.was_modified_since(
          last_modified, request.headers["if-modified-since"]
        ):
          return self.not_modified_response(response)

    return response

  def needs_etag(self, response: Response) -> bool:
    """Return True if an ETag header should be added to response."""
    cache_control = response.headers.get("cache-control", "")
    cache_control_headers = CC_DELIM_RE.split(cache_control)
    return all(header.lower() != "no-store" for header in cache_control_headers)

  def set_response_etag(self, response: Response) -> None:
    """Calculate and set ETag header based on response content."""
    # Get response body
    body = response.body
    if isinstance(body, str):
      body = body.encode("utf-8")

    # Calculate ETag from content
    etag = hashlib.md5(body).hexdigest()
    response.headers["etag"] = etag

  def etag_matches(self, etag: str, if_none_match: str) -> bool:
    """Check if the ETag matches any of the ETags in If-None-Match."""
    # Handle wildcard
    if if_none_match.strip() == "*":
      return True

    # Parse ETags from If-None-Match header
    etags = [tag.strip().strip('"') for tag in if_none_match.split(",")]

    # Clean the response ETag for comparison
    clean_etag = etag.strip('"')

    return clean_etag in etags

  def was_modified_since(self, last_modified: str, if_modified_since: str) -> bool:
    """Check if resource was modified since the given date."""
    try:
      # Parse Last-Modified header
      last_modified_dt = datetime.strptime(last_modified, "%a, %d %b %Y %H:%M:%S GMT")

      # Parse If-Modified-Since header
      if_modified_since_dt = datetime.strptime(
        if_modified_since, "%a, %d %b %Y %H:%M:%S GMT"
      )

      return last_modified_dt > if_modified_since_dt
    except ValueError:
      # If parsing fails, assume it was modified
      return True

  def not_modified_response(self, original_response: Response) -> Response:
    """Create a 304 Not Modified response."""
    # Headers to keep in 304 response
    keep_headers = {
      "cache-control",
      "content-location",
      "date",
      "etag",
      "expires",
      "last-modified",
      "vary",
    }

    headers = {
      k: v for k, v in original_response.headers.items() if k.lower() in keep_headers
    }

    return Response(content=b"", status_code=HTTP_304_NOT_MODIFIED, headers=headers)