Skip to content

Search

The search layer abstracts web search engines behind a common interface. BingEngine is the built-in backend; custom engines can be plugged in by subclassing BaseSearchEngine.

BingEngine

Bases: BaseSearchEngine

Search engine backend powered by Bing.

Fetches the Bing search results page for a given query and scrapes the top organic results. Bing encodes destination URLs in a base64 tracking wrapper — this is decoded transparently so callers always receive the real destination URL.

Attributes:

Name Type Description
BASE_URL

Bing search URL template. {query} is replaced with the URL-encoded query string.

Example

from go2web.search.engines.bing import BingEngine engine = BingEngine() results = engine.search("python packaging", limit=3) for r in results: ... print(r.rank, r.title)

Source code in src/go2web/search/engines/bing.py
class BingEngine(BaseSearchEngine):
    """Search engine backend powered by Bing.

    Fetches the Bing search results page for a given query and scrapes
    the top organic results. Bing encodes destination URLs in a base64
    tracking wrapper — this is decoded transparently so callers always
    receive the real destination URL.

    Attributes:
        BASE_URL: Bing search URL template. ``{query}`` is replaced with
            the URL-encoded query string.

    Example:
        >>> from go2web.search.engines.bing import BingEngine
        >>> engine = BingEngine()
        >>> results = engine.search("python packaging", limit=3)
        >>> for r in results:
        ...     print(r.rank, r.title)
    """

    BASE_URL = "https://www.bing.com/search?q={query}"

    def __init__(self, client: HTTPClient | None = None) -> None:
        """Initialise the engine.

        Args:
            client: An :class:`~go2web.http.client.HTTPClient` instance used
                to fetch the Bing results page. A default client (with caching
                enabled) is created when *client* is ``None``.
        """
        self._client = client or HTTPClient()

    def search(self, query: str, limit: int = 10) -> list[SearchResult]:
        """Query Bing and return up to *limit* organic results.

        Args:
            query: The search query string.
            limit: Maximum number of results to return (default ``10``).

        Returns:
            A list of :class:`~go2web.search.result.SearchResult` objects
            ordered by their position on the results page.
        """
        url = self.BASE_URL.format(query=quote_plus(query))
        response = self._client.get(url)
        return self._parse(response.body, limit)

    def _parse(self, html: str, limit: int) -> list[SearchResult]:
        """Scrape *html* for Bing result entries and return up to *limit* items."""
        soup = BeautifulSoup(html, "html.parser")
        results = []

        for i, result in enumerate(soup.select("li.b_algo")[:limit]):
            title_el = result.select_one("h2 a")

            if not title_el:
                continue

            href = title_el.get("href")

            if not isinstance(href, str):
                continue

            url = self._extract_url(href)

            results.append(
                SearchResult(
                    rank=i + 1,
                    title=title_el.get_text(strip=True),
                    url=url,
                )
            )

        return results

    def _extract_url(self, href: str) -> str:
        """Decode a Bing tracking URL into the real destination URL.

        Bing wraps destination URLs in a query parameter ``u`` that is
        base64-encoded with an ``a1`` prefix. When this pattern is detected
        the prefix is stripped and the remainder is decoded. Falls back to
        returning *href* unchanged when the pattern does not match.

        Args:
            href: The raw ``href`` value from a Bing result anchor tag.

        Returns:
            The decoded destination URL, or *href* itself as a fallback.
        """
        qs = parse_qs(urlparse(href).query)
        raw = qs.get("u", [None])[0]
        if raw and raw.startswith("a1"):
            # strip the "a1" prefix and base64-decode
            return b64decode(raw[2:] + "==").decode("utf-8", errors="replace")
        return href

__init__(client=None)

Initialise the engine.

Parameters:

Name Type Description Default
client HTTPClient | None

An :class:~go2web.http.client.HTTPClient instance used to fetch the Bing results page. A default client (with caching enabled) is created when client is None.

None
Source code in src/go2web/search/engines/bing.py
def __init__(self, client: HTTPClient | None = None) -> None:
    """Initialise the engine.

    Args:
        client: An :class:`~go2web.http.client.HTTPClient` instance used
            to fetch the Bing results page. A default client (with caching
            enabled) is created when *client* is ``None``.
    """
    self._client = client or HTTPClient()

search(query, limit=10)

Query Bing and return up to limit organic results.

Parameters:

Name Type Description Default
query str

The search query string.

required
limit int

Maximum number of results to return (default 10).

10

Returns:

Type Description
list[SearchResult]

A list of :class:~go2web.search.result.SearchResult objects

list[SearchResult]

ordered by their position on the results page.

Source code in src/go2web/search/engines/bing.py
def search(self, query: str, limit: int = 10) -> list[SearchResult]:
    """Query Bing and return up to *limit* organic results.

    Args:
        query: The search query string.
        limit: Maximum number of results to return (default ``10``).

    Returns:
        A list of :class:`~go2web.search.result.SearchResult` objects
        ordered by their position on the results page.
    """
    url = self.BASE_URL.format(query=quote_plus(query))
    response = self._client.get(url)
    return self._parse(response.body, limit)

SearchResult

A single search engine result.

Attributes:

Name Type Description
rank int

1-based position on the results page.

title str

The page title as displayed in search results.

url str

The destination URL after Bing tracking is decoded.

Example

from go2web.search.result import SearchResult r = SearchResult(rank=1, title="Python.org", url="https://python.org") r.rank 1

Source code in src/go2web/search/result.py
@dataclass
class SearchResult:
    """A single search engine result.

    Attributes:
        rank: 1-based position on the results page.
        title: The page title as displayed in search results.
        url: The destination URL after Bing tracking is decoded.

    Example:
        >>> from go2web.search.result import SearchResult
        >>> r = SearchResult(rank=1, title="Python.org", url="https://python.org")
        >>> r.rank
        1
    """

    rank: int
    title: str
    url: str

BaseSearchEngine (abstract)

Bases: ABC

Interface for web search engine backends.

Subclass :class:BaseSearchEngine to add support for a new search provider. The only required method is :meth:search.

Example

A minimal custom engine:

from go2web.search.engines.base import BaseSearchEngine from go2web.search.result import SearchResult

class MyEngine(BaseSearchEngine): ... def search(self, query: str, limit: int = 10) -> list[SearchResult]: ... return [SearchResult(rank=1, title="Example", url="https://example.com")]

Source code in src/go2web/search/engines/base.py
class BaseSearchEngine(ABC):
    """Interface for web search engine backends.

    Subclass :class:`BaseSearchEngine` to add support for a new search
    provider. The only required method is :meth:`search`.

    Example:
        A minimal custom engine:

        >>> from go2web.search.engines.base import BaseSearchEngine
        >>> from go2web.search.result import SearchResult
        >>>
        >>> class MyEngine(BaseSearchEngine):
        ...     def search(self, query: str, limit: int = 10) -> list[SearchResult]:
        ...         return [SearchResult(rank=1, title="Example", url="https://example.com")]
    """

    @abstractmethod
    def search(self, query: str, limit: int = 10) -> list[SearchResult]:
        """Search for *query* and return up to *limit* results.

        Args:
            query: The search query string.
            limit: Maximum number of results to return.

        Returns:
            A list of :class:`~go2web.search.result.SearchResult` objects,
            ordered by relevance rank.
        """
        ...

search(query, limit=10) abstractmethod

Search for query and return up to limit results.

Parameters:

Name Type Description Default
query str

The search query string.

required
limit int

Maximum number of results to return.

10

Returns:

Type Description
list[SearchResult]

A list of :class:~go2web.search.result.SearchResult objects,

list[SearchResult]

ordered by relevance rank.

Source code in src/go2web/search/engines/base.py
@abstractmethod
def search(self, query: str, limit: int = 10) -> list[SearchResult]:
    """Search for *query* and return up to *limit* results.

    Args:
        query: The search query string.
        limit: Maximum number of results to return.

    Returns:
        A list of :class:`~go2web.search.result.SearchResult` objects,
        ordered by relevance rank.
    """
    ...