Skip to content

Cache

Responses are cached to ~/.go2web_cache.json using a simple TTL-based key/value store. The cache respects Cache-Control: no-store and no-cache.

CacheStore

A file-backed key/value store for HTTP responses.

Entries are keyed by URL and serialised as JSON to cache_file. Expired entries are evicted lazily on read. The store respects Cache-Control: no-store and no-cache directives — responses carrying these headers are never persisted.

Attributes:

Name Type Description
DEFAULT_TTL

Default time-to-live in seconds (30).

Example

Using a temporary cache file:

from pathlib import Path from go2web.cache.store import CacheStore cache = CacheStore(cache_file=Path("/tmp/test_cache.json")) cache.set("https://example.com", 200, {}, "hello") cache.get("https://example.com").body 'hello' cache.clear()

Source code in src/go2web/cache/store.py
class CacheStore:
    """A file-backed key/value store for HTTP responses.

    Entries are keyed by URL and serialised as JSON to *cache_file*.
    Expired entries are evicted lazily on read. The store respects
    ``Cache-Control: no-store`` and ``no-cache`` directives — responses
    carrying these headers are never persisted.

    Attributes:
        DEFAULT_TTL: Default time-to-live in seconds (``30``).

    Example:
        Using a temporary cache file:

        >>> from pathlib import Path
        >>> from go2web.cache.store import CacheStore
        >>> cache = CacheStore(cache_file=Path("/tmp/test_cache.json"))
        >>> cache.set("https://example.com", 200, {}, "hello")
        >>> cache.get("https://example.com").body
        'hello'
        >>> cache.clear()
    """

    DEFAULT_TTL = 30

    def __init__(self, cache_file: Path = CACHE_FILE) -> None:
        """Initialise the store, loading existing entries from *cache_file*.

        Args:
            cache_file: Path to the JSON file used for persistence. Created
                on the first :meth:`set` call if it does not exist.
        """
        self._file = cache_file
        self._store: dict[str, CacheEntry] = self._load()

    def get(self, url: str) -> CacheEntry | None:
        """Return the cached entry for *url*, or ``None`` if missing or expired.

        A dim info message is printed to stderr when a valid entry is returned.

        Args:
            url: The exact URL used as the cache key.

        Returns:
            A :class:`CacheEntry` if a fresh entry exists, otherwise ``None``.
        """
        entry = self._store.get(url)
        if entry is None:
            return None
        if time.time() > entry.expires_at:
            del self._store[url]
            return None
        print_info("Entry retrieved from cache.")
        return entry

    def set(self, url: str, status: int, headers: dict[str, str], body: str) -> None:
        """Store a response in the cache and persist to disk.

        The entry is **not** stored when the server sends
        ``Cache-Control: no-store`` or ``no-cache``.

        Args:
            url: Cache key — should be the final (post-redirect) URL.
            status: HTTP status code of the response.
            headers: Response headers (lower-cased keys).
            body: Decoded response body.
        """
        ttl = self._parse_ttl(headers)
        if ttl == 0:
            return
        self._store[url] = CacheEntry(
            body=body,
            status=status,
            headers=headers,
            expires_at=time.time() + ttl,
        )
        self._persist()

    def clear(self) -> None:
        """Remove all cached entries and delete the cache file from disk."""
        self._store.clear()
        self._file.unlink(missing_ok=True)

    def _parse_ttl(self, headers: dict[str, str]) -> int:
        """Return the TTL in seconds based on *Cache-Control* headers.

        Returns ``0`` when the server explicitly forbids caching, otherwise
        :attr:`DEFAULT_TTL`.
        """
        cc = headers.get("cache-control", "")

        if "no-store" in cc or "no-cache" in cc:
            return 0

        return self.DEFAULT_TTL

    def _load(self) -> dict[str, CacheEntry]:
        """Load entries from the JSON file, returning an empty dict if absent."""
        if not self._file.exists():
            return {}
        raw = json.loads(self._file.read_text(encoding="utf-8"))
        return {k: CacheEntry(**v) for k, v in raw.items()}

    def _persist(self) -> None:
        """Serialise the in-memory store to disk."""
        self._file.write_text(
            json.dumps({k: asdict(v) for k, v in self._store.items()}, ensure_ascii=False),
            encoding="utf-8",
        )

__init__(cache_file=CACHE_FILE)

Initialise the store, loading existing entries from cache_file.

Parameters:

Name Type Description Default
cache_file Path

Path to the JSON file used for persistence. Created on the first :meth:set call if it does not exist.

CACHE_FILE
Source code in src/go2web/cache/store.py
def __init__(self, cache_file: Path = CACHE_FILE) -> None:
    """Initialise the store, loading existing entries from *cache_file*.

    Args:
        cache_file: Path to the JSON file used for persistence. Created
            on the first :meth:`set` call if it does not exist.
    """
    self._file = cache_file
    self._store: dict[str, CacheEntry] = self._load()

clear()

Remove all cached entries and delete the cache file from disk.

Source code in src/go2web/cache/store.py
def clear(self) -> None:
    """Remove all cached entries and delete the cache file from disk."""
    self._store.clear()
    self._file.unlink(missing_ok=True)

get(url)

Return the cached entry for url, or None if missing or expired.

A dim info message is printed to stderr when a valid entry is returned.

Parameters:

Name Type Description Default
url str

The exact URL used as the cache key.

required

Returns:

Name Type Description
A CacheEntry | None

class:CacheEntry if a fresh entry exists, otherwise None.

Source code in src/go2web/cache/store.py
def get(self, url: str) -> CacheEntry | None:
    """Return the cached entry for *url*, or ``None`` if missing or expired.

    A dim info message is printed to stderr when a valid entry is returned.

    Args:
        url: The exact URL used as the cache key.

    Returns:
        A :class:`CacheEntry` if a fresh entry exists, otherwise ``None``.
    """
    entry = self._store.get(url)
    if entry is None:
        return None
    if time.time() > entry.expires_at:
        del self._store[url]
        return None
    print_info("Entry retrieved from cache.")
    return entry

set(url, status, headers, body)

Store a response in the cache and persist to disk.

The entry is not stored when the server sends Cache-Control: no-store or no-cache.

Parameters:

Name Type Description Default
url str

Cache key — should be the final (post-redirect) URL.

required
status int

HTTP status code of the response.

required
headers dict[str, str]

Response headers (lower-cased keys).

required
body str

Decoded response body.

required
Source code in src/go2web/cache/store.py
def set(self, url: str, status: int, headers: dict[str, str], body: str) -> None:
    """Store a response in the cache and persist to disk.

    The entry is **not** stored when the server sends
    ``Cache-Control: no-store`` or ``no-cache``.

    Args:
        url: Cache key — should be the final (post-redirect) URL.
        status: HTTP status code of the response.
        headers: Response headers (lower-cased keys).
        body: Decoded response body.
    """
    ttl = self._parse_ttl(headers)
    if ttl == 0:
        return
    self._store[url] = CacheEntry(
        body=body,
        status=status,
        headers=headers,
        expires_at=time.time() + ttl,
    )
    self._persist()

CacheEntry

A single cached HTTP response.

Attributes:

Name Type Description
body str

The decoded response body.

status int

The HTTP status code at the time of caching.

headers dict[str, str]

Response headers (lower-cased keys).

expires_at float

Unix timestamp after which this entry is considered stale.

Source code in src/go2web/cache/store.py
@dataclass
class CacheEntry:
    """A single cached HTTP response.

    Attributes:
        body: The decoded response body.
        status: The HTTP status code at the time of caching.
        headers: Response headers (lower-cased keys).
        expires_at: Unix timestamp after which this entry is considered stale.
    """

    body: str
    status: int
    headers: dict[str, str]
    expires_at: float