"""Managed browser backend launches Chromium via Playwright.
Launches a fresh Chromium instance managed entirely by the scraper.
Applies fingerprint spoofing, proxy configuration, anti-detection
scripts, and session cookies/storage when provided.
This is the default backend used when ``use_local_browser=False``.
"""
from __future__ import annotations
import json
import logging
import random
from typing import TYPE_CHECKING
from intelliscraper.browser.backend import BrowserBackend
from intelliscraper.common.constants import (
BROWSER_LAUNCH_OPTIONS,
DEFAULT_BROWSER_FINGERPRINT,
)
from intelliscraper.common.models import Proxy, Session
if TYPE_CHECKING:
from playwright.async_api import Browser, BrowserContext, Playwright
logger = logging.getLogger(__name__)
[docs]
class ManagedBrowserBackend(BrowserBackend):
"""Launch and manage a Chromium browser instance via Playwright.
Handles the full lifecycle of a Playwright-managed browser:
launching, configuring the context (fingerprint, proxy, cookies,
anti-detection scripts), and tearing everything down on cleanup.
Args:
headless: Run browser without a visible UI. Defaults to
``True``.
browser_launch_options: Custom Chromium launch options. Merged
with the ``headless`` flag. Defaults to
``BROWSER_LAUNCH_OPTIONS``.
proxy: Proxy configuration for network requests. Applied at
the browser-context level so all pages share the same
proxy. Defaults to ``None``.
session_data: Pre-authenticated session with cookies,
localStorage, sessionStorage, and browser fingerprint.
Defaults to ``None``.
Example::
backend = ManagedBrowserBackend(
headless=True,
proxy=my_proxy,
session_data=my_session,
)
browser, context = await backend.initialize(playwright)
"""
def __init__(
self,
headless: bool = True,
browser_launch_options: dict | None = None,
proxy: Proxy | None = None,
session_data: Session | None = None,
) -> None:
self._headless = headless
self._launch_options = dict(browser_launch_options or BROWSER_LAUNCH_OPTIONS)
self._launch_options["headless"] = headless
self._proxy = proxy
self._session_data = session_data
@property
def owns_browser(self) -> bool:
"""Managed backend owns and should close the browser process."""
return True
[docs]
async def initialize(
self,
playwright: Playwright,
) -> tuple[Browser, BrowserContext]:
"""Launch Chromium and create a fully configured context.
Steps performed:
1. Launch Chromium with the configured options.
2. Create a browser context with fingerprint spoofing and
optional proxy.
3. Inject session cookies (if provided).
4. Apply anti-detection JavaScript scripts.
Args:
playwright: A started Playwright instance.
Returns:
A ``(browser, context)`` tuple.
"""
logger.debug("Launching browser with options: %s", self._launch_options)
browser = await playwright.chromium.launch(**self._launch_options)
logger.debug("Browser launched successfully")
fingerprint = (
self._session_data.fingerprint
if self._session_data
else DEFAULT_BROWSER_FINGERPRINT
)
context = await self._create_browser_context(
browser=browser,
fingerprint=fingerprint,
)
await self._add_cookies(context)
self._apply_anti_detection_scripts(context, fingerprint)
logger.info("Managed browser ready.")
return browser, context
[docs]
async def cleanup(
self,
browser: Browser,
context: BrowserContext,
) -> None:
"""Close context and browser process.
Args:
browser: The Playwright-managed browser to close.
context: The browser context to close.
"""
if context:
await context.close()
logger.debug("Browser context closed.")
if browser:
await browser.close()
logger.debug("Browser process closed.")
[docs]
async def apply_session_storage(
self,
page,
) -> None:
"""Apply localStorage and sessionStorage to a page.
If ``session_data`` is configured with storage data, navigates
the page to the session's ``base_url`` and injects the stored
key-value pairs.
Args:
page: A Playwright ``Page`` instance to configure.
"""
if not self._session_data:
return
if not (self._session_data.localStorage or self._session_data.sessionStorage):
return
logger.debug("Applying session / local storage")
await page.goto(self._session_data.base_url)
if self._session_data.localStorage:
await page.evaluate(
"""
(items) => {
for (let key in items) {
try {
localStorage.setItem(key, items[key]);
} catch(e) {
console.error('Failed to set localStorage:', key, e);
}
}
}
""",
self._session_data.localStorage,
)
if self._session_data.sessionStorage:
await page.evaluate(
"""
(items) => {
for (let key in items) {
try {
sessionStorage.setItem(key, items[key]);
} catch(e) {
console.error('Failed to set sessionStorage:', key, e);
}
}
}
""",
self._session_data.sessionStorage,
)
logger.debug("Session storage applied successfully")
async def _create_browser_context(
self,
browser: Browser,
fingerprint: dict | None,
) -> BrowserContext:
"""Create a browser context with fingerprint and proxy config.
Args:
browser: The launched browser instance.
fingerprint: Browser fingerprint dict for anti-detection.
Returns:
A configured ``BrowserContext``.
"""
logger.debug("Creating browser context")
if fingerprint is None:
fingerprint = DEFAULT_BROWSER_FINGERPRINT
proxy_dict = self._proxy.model_dump() if self._proxy else None
screen = fingerprint.get("screenResolution", {})
context = await browser.new_context(
viewport={
"width": screen.get("width", 1920),
"height": screen.get("height", 1080),
},
screen={
"width": screen.get("width", 1920),
"height": screen.get("height", 1080),
},
proxy=proxy_dict,
geolocation={
"latitude": random.uniform(-90, 90),
"longitude": random.uniform(-180, 180),
},
user_agent=fingerprint.get(
"userAgent",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36",
),
locale=fingerprint.get("language", "en-US"),
timezone_id=fingerprint.get("timezone", "Asia/Calcutta"),
device_scale_factor=1,
is_mobile=False,
has_touch=False,
color_scheme="light",
ignore_https_errors=True,
extra_http_headers={
"Accept": (
"text/html,application/xhtml+xml,application/xml;"
"q=0.9,image/avif,image/webp,*/*;q=0.8"
),
"Accept-Language": (f"{fingerprint.get('language', 'en-US')},en;q=0.9"),
"Accept-Encoding": "gzip, deflate, br",
"DNT": "1",
"Connection": "keep-alive",
"Upgrade-Insecure-Requests": "1",
},
)
logger.debug("Browser context created successfully")
return context
async def _add_cookies(self, context: BrowserContext) -> None:
"""Add cookies from session data to the browser context.
Args:
context: The browser context to add cookies to.
"""
if self._session_data and self._session_data.cookies:
logger.debug("Adding %d cookies", len(self._session_data.cookies))
await context.add_cookies(self._session_data.cookies)
logger.debug("Cookies added successfully")
def _apply_anti_detection_scripts(
self,
context: BrowserContext,
fingerprint: dict | None,
) -> None:
"""Inject JavaScript to mask automation and avoid bot detection.
Spoofs ``navigator.webdriver``, plugins, languages, hardware
concurrency, device memory, platform, screen properties, and
WebGL renderer strings.
Args:
context: The browser context to inject scripts into.
fingerprint: Browser fingerprint dict with values to spoof.
"""
logger.debug("Applying anti-detection scripts")
if fingerprint is None:
fingerprint = DEFAULT_BROWSER_FINGERPRINT
context.add_init_script(
f"""
// Remove webdriver flag (MOST IMPORTANT!)
Object.defineProperty(navigator, 'webdriver', {{
get: () => undefined
}});
// Add chrome object
window.chrome = {{
runtime: {{}}
}};
// Override permissions
const originalQuery = window.navigator.permissions.query;
window.navigator.permissions.query = (parameters) => (
parameters.name === 'notifications' ?
Promise.resolve({{ state: Notification.permission }}) :
originalQuery(parameters)
);
// Spoof plugins
Object.defineProperty(navigator, 'plugins', {{
get: () => [
{{
0: {{type: "application/x-google-chrome-pdf", suffixes: "pdf"}},
description: "Portable Document Format",
filename: "internal-pdf-viewer",
length: 1,
name: "Chrome PDF Plugin"
}},
{{
0: {{type: "application/pdf", suffixes: "pdf"}},
description: "Portable Document Format",
filename: "mhjfbmdgcfjbbpaeojofohoefgiehjai",
length: 1,
name: "Chrome PDF Viewer"
}}
]
}});
// Languages
Object.defineProperty(navigator, 'languages', {{
get: () => {json.dumps(fingerprint.get('languages', ['en-US']))}
}});
// Hardware (from fingerprint)
Object.defineProperty(navigator, 'hardwareConcurrency', {{
get: () => {fingerprint.get('hardwareConcurrency', 8)}
}});
Object.defineProperty(navigator, 'deviceMemory', {{
get: () => {fingerprint.get('deviceMemory', 8)}
}});
Object.defineProperty(navigator, 'platform', {{
get: () => "{fingerprint.get('platform', 'Linux x86_64')}"
}});
// Screen properties
Object.defineProperty(screen, 'colorDepth', {{
get: () => {fingerprint.get("screenResolution", {{}}).get('colorDepth', 24)}
}});
Object.defineProperty(screen, 'pixelDepth', {{
get: () => {fingerprint.get("screenResolution", {{}}).get('colorDepth', 24)}
}});
// WebGL (from fingerprint)
const getParameter = WebGLRenderingContext.prototype.getParameter;
WebGLRenderingContext.prototype.getParameter = function(parameter) {{
if (parameter === 37445) {{
return "{fingerprint.get('webglVendor', 'Google Inc. (Intel)')}";
}}
if (parameter === 37446) {{
return "{fingerprint.get('webglRenderer', 'ANGLE (Intel)')}";
}}
return getParameter.call(this, parameter);
}};
"""
)
logger.debug("Anti-detection scripts applied")