Source code for intelliscraper.rate_limiter

"""Token-bucket rate limiter for controlling request throughput.

Enforces a global request rate across all concurrent browser pages.
When multiple pages share a single ``RateLimiter``, the combined
throughput of all pages is capped at the configured limit.

Example::

    # 15 requests per second = 900 per minute
    limiter = RateLimiter(max_requests_per_minute=900)

    # Before each request (called from multiple concurrent pages):
    await limiter.acquire()   # blocks until a token is available
    await page.goto(url)

    # Disable rate limiting (default):
    limiter = RateLimiter()   # or RateLimiter(max_requests_per_minute=None)
"""

from __future__ import annotations

import asyncio
import logging
import time

logger = logging.getLogger(__name__)


[docs] class RateLimiter: """Token-bucket rate limiter shared across all concurrent pages. The rate limit is enforced **globally** — if you have 4 concurrent pages and a limit of 900 requests/minute (15/sec), all 4 pages share that 15/sec budget, not 15/sec each. Args: max_requests_per_minute: Maximum requests allowed per minute. Set to ``None`` or ``0`` to disable rate limiting (default). Attributes: enabled: Whether rate limiting is active. max_rpm: The configured maximum requests per minute. Example:: # No rate limiting (default) limiter = RateLimiter() # 15 requests per second across all pages limiter = RateLimiter(max_requests_per_minute=900) # Conservative limit for protected sites limiter = RateLimiter(max_requests_per_minute=60) # 1/sec """ def __init__( self, max_requests_per_minute: int | None = None, ) -> None: self._max_rpm = max_requests_per_minute or 0 self._enabled = self._max_rpm > 0 self._lock = asyncio.Lock() if self._enabled: self._min_interval = 60.0 / self._max_rpm self._last_request_time = 0.0 logger.info( "Rate limiter enabled: %d requests/minute " "(%.2f sec interval)", self._max_rpm, self._min_interval, ) else: self._min_interval = 0.0 self._last_request_time = 0.0 logger.debug("Rate limiter disabled (no limit)") @property def enabled(self) -> bool: """Whether rate limiting is active.""" return self._enabled @property def max_rpm(self) -> int: """The configured maximum requests per minute.""" return self._max_rpm
[docs] async def acquire(self) -> None: """Wait until a request token is available. If rate limiting is disabled, returns immediately. Otherwise, blocks until enough time has elapsed since the last request to stay within the configured rate. This method is safe to call from multiple concurrent tasks — an ``asyncio.Lock`` ensures only one task checks and updates the timestamp at a time. """ if not self._enabled: return async with self._lock: now = time.monotonic() elapsed = now - self._last_request_time wait_time = self._min_interval - elapsed if wait_time > 0: logger.debug( "Rate limiter: waiting %.2fs before next request " "(%.0f rpm limit)", wait_time, self._max_rpm, ) await asyncio.sleep(wait_time) self._last_request_time = time.monotonic()