Etags support
Etags are pretty cool to use with Unpoly
See https://unpoly.com/up-etag
Do you think that this could be something python-unpoly could support? Or it's out of scope for this projet?
Here's an example of a Starlette middleware I'm using:
from __future__ import annotations
import hashlib
import re
from datetime import datetime
from typing import TYPE_CHECKING
from starlette.middleware.base import BaseHTTPMiddleware
from starlette.responses import Response, StreamingResponse
from starlette.status import HTTP_304_NOT_MODIFIED
if TYPE_CHECKING:
from starlette.requests import Request
# Regex to split Cache-Control headers
CC_DELIM_RE = re.compile(r"\s*,\s*")
class ConditionalGetMiddleware(BaseHTTPMiddleware):
"""
Handle conditional GET operations. If the response has an ETag or
Last-Modified header and the request has If-None-Match or If-Modified-Since,
replace the response with 304 Not Modified. Add an ETag header if needed.
"""
async def dispatch(self, request: Request, call_next):
response = await call_next(request)
# Only process GET requests
if request.method != "GET":
return response
# For streaming responses, we need to consume the body first
if isinstance(response, StreamingResponse):
# Collect the streaming body
body_parts = []
async for chunk in response.body_iterator:
body_parts.append(chunk)
body = b"".join(body_parts)
# Create a new response with the collected body
new_response = Response(
content=body,
status_code=response.status_code,
headers=dict(response.headers),
media_type=response.media_type,
)
response = new_response
# Add ETag if needed
if self.needs_etag(response) and "etag" not in response.headers:
self.set_response_etag(response)
# Check conditional headers
etag = response.headers.get("etag")
last_modified = response.headers.get("last-modified")
if etag or last_modified:
# Check If-None-Match
if etag and "if-none-match" in request.headers:
if self.etag_matches(etag, request.headers["if-none-match"]):
return self.not_modified_response(response)
# Check If-Modified-Since
if last_modified and "if-modified-since" in request.headers:
if not self.was_modified_since(
last_modified, request.headers["if-modified-since"]
):
return self.not_modified_response(response)
return response
def needs_etag(self, response: Response) -> bool:
"""Return True if an ETag header should be added to response."""
cache_control = response.headers.get("cache-control", "")
cache_control_headers = CC_DELIM_RE.split(cache_control)
return all(header.lower() != "no-store" for header in cache_control_headers)
def set_response_etag(self, response: Response) -> None:
"""Calculate and set ETag header based on response content."""
# Get response body
body = response.body
if isinstance(body, str):
body = body.encode("utf-8")
# Calculate ETag from content
etag = hashlib.md5(body).hexdigest()
response.headers["etag"] = etag
def etag_matches(self, etag: str, if_none_match: str) -> bool:
"""Check if the ETag matches any of the ETags in If-None-Match."""
# Handle wildcard
if if_none_match.strip() == "*":
return True
# Parse ETags from If-None-Match header
etags = [tag.strip().strip('"') for tag in if_none_match.split(",")]
# Clean the response ETag for comparison
clean_etag = etag.strip('"')
return clean_etag in etags
def was_modified_since(self, last_modified: str, if_modified_since: str) -> bool:
"""Check if resource was modified since the given date."""
try:
# Parse Last-Modified header
last_modified_dt = datetime.strptime(last_modified, "%a, %d %b %Y %H:%M:%S GMT")
# Parse If-Modified-Since header
if_modified_since_dt = datetime.strptime(
if_modified_since, "%a, %d %b %Y %H:%M:%S GMT"
)
return last_modified_dt > if_modified_since_dt
except ValueError:
# If parsing fails, assume it was modified
return True
def not_modified_response(self, original_response: Response) -> Response:
"""Create a 304 Not Modified response."""
# Headers to keep in 304 response
keep_headers = {
"cache-control",
"content-location",
"date",
"etag",
"expires",
"last-modified",
"vary",
}
headers = {
k: v for k, v in original_response.headers.items() if k.lower() in keep_headers
}
return Response(content=b"", status_code=HTTP_304_NOT_MODIFIED, headers=headers)