Tutorials

Python Selenium + CaptchaAI Complete Integration Guide

Selenium handles browser automation; CaptchaAI handles CAPTCHA solving. Together, they solve the most common automation challenge — pages that require both JavaScript execution and CAPTCHA completion. This guide covers the complete integration pattern for reCAPTCHA v2/v3, Cloudflare Turnstile, and image CAPTCHAs.


Prerequisites

pip install selenium requests webdriver-manager

Setup: Stealth-configuredion Selenium

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager

def create_driver():
    """Create a Selenium driver with stealth-configuredion settings."""
    options = webdriver.ChromeOptions()
    options.add_argument("--disable-blink-features=AutomationControlled")
    options.add_experimental_option("excludeSwitches", ["enable-automation"])
    options.add_experimental_option("useAutomationExtension", False)

    driver = webdriver.Chrome(
        service=Service(ChromeDriverManager().install()),
        options=options,
    )

    # Remove webdriver flag
    driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
        "source": "Object.defineProperty(navigator, 'webdriver', {get: () => undefined})"
    })

    return driver

CaptchaAI solver helper

import time
import requests

API_KEY = "YOUR_API_KEY"


def solve_captcha(method, **params):
    """Generic CaptchaAI solver — works for all CAPTCHA types."""
    submit_data = {
        "key": API_KEY,
        "method": method,
        "json": 1,
        **params,
    }

    submit = requests.post("https://ocr.captchaai.com/in.php", data=submit_data, timeout=30)
    data = submit.json()

    if data.get("status") != 1:
        raise Exception(f"Submit error: {data.get('request')}")

    task_id = data["request"]

    for _ in range(30):
        time.sleep(5)
        result = requests.get("https://ocr.captchaai.com/res.php", params={
            "key": API_KEY,
            "action": "get",
            "id": task_id,
            "json": 1,
        }, timeout=30).json()

        if result.get("status") == 1:
            return result["request"]
        if result.get("request") == "ERROR_CAPTCHA_UNSOLVABLE":
            raise Exception("CAPTCHA unsolvable")

    raise TimeoutError("Solve timed out")

reCAPTCHA v2 with Selenium

Full flow: detect, solve, inject token, submit

from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import re

def solve_recaptcha_v2_selenium(driver, url):
    """Complete reCAPTCHA v2 solve in Selenium."""
    driver.get(url)

    # Wait for page to load
    WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.TAG_NAME, "body"))
    )

    # Extract sitekey
    page_source = driver.page_source
    match = re.search(r'data-sitekey=["\']([A-Za-z0-9_-]{40})["\']', page_source)
    if not match:
        raise ValueError("reCAPTCHA sitekey not found")
    sitekey = match.group(1)
    print(f"Sitekey: {sitekey}")

    # Solve via CaptchaAI
    token = solve_captcha(
        "userrecaptcha",
        googlekey=sitekey,
        pageurl=url,
    )
    print(f"Token received: {token[:50]}...")

    # Inject token into the page
    driver.execute_script(f"""
        document.getElementById('g-recaptcha-response').value = '{token}';
        document.getElementById('g-recaptcha-response').style.display = 'block';
    """)

    # If there's a callback function, call it
    driver.execute_script(f"""
        if (typeof ___grecaptcha_cfg !== 'undefined') {{
            var clients = ___grecaptcha_cfg.clients;
            for (var key in clients) {{
                var client = clients[key];
                if (client.rr && client.rr.l) {{
                    client.rr.l.callback('{token}');
                }}
            }}
        }}
    """)

    # Submit the form
    submit_button = driver.find_element(By.CSS_SELECTOR, "button[type='submit'], input[type='submit']")
    submit_button.click()

    return token


# Usage
driver = create_driver()
try:
    solve_recaptcha_v2_selenium(driver, "https://example.com/login")
    print(f"Current URL: {driver.current_url}")
finally:
    driver.quit()

Cloudflare Turnstile with Selenium

def solve_turnstile_selenium(driver, url):
    """Complete Turnstile solve in Selenium."""
    driver.get(url)

    WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.TAG_NAME, "body"))
    )

    # Extract sitekey
    page_source = driver.page_source
    match = re.search(r'data-sitekey=["\']([0-9x][A-Za-z0-9_-]+)["\']', page_source)
    if not match:
        # Try JavaScript API pattern
        match = re.search(r"sitekey\s*:\s*['\"]([0-9x][A-Za-z0-9_-]+)['\"]", page_source)
    if not match:
        raise ValueError("Turnstile sitekey not found")

    sitekey = match.group(1)
    print(f"Turnstile sitekey: {sitekey}")

    # Solve via CaptchaAI
    token = solve_captcha(
        "turnstile",
        sitekey=sitekey,
        pageurl=url,
    )
    print(f"Turnstile token: {token[:50]}...")

    # Inject token
    driver.execute_script(f"""
        // Set the hidden input value
        var inputs = document.querySelectorAll('[name="cf-turnstile-response"]');
        inputs.forEach(function(input) {{ input.value = '{token}'; }});

        // Also try the callback approach
        if (typeof turnstile !== 'undefined' && turnstile.getResponse) {{
            // Widget already rendered
        }}
    """)

    # Submit form
    submit_button = driver.find_element(By.CSS_SELECTOR, "button[type='submit'], input[type='submit']")
    submit_button.click()

    return token

Image CAPTCHA with Selenium

For traditional image/text CAPTCHAs (type the letters you see):

import base64

def solve_image_captcha_selenium(driver, captcha_selector):
    """Solve image CAPTCHA visible in the browser."""
    # Find the CAPTCHA image element
    captcha_img = driver.find_element(By.CSS_SELECTOR, captcha_selector)

    # Get image as base64
    img_base64 = captcha_img.screenshot_as_base64

    # Solve via CaptchaAI
    answer = solve_captcha("base64", body=img_base64)
    print(f"CAPTCHA answer: {answer}")

    # Type the answer into the input field
    captcha_input = driver.find_element(
        By.CSS_SELECTOR, "input[name='captcha'], input[name='code'], input.captcha-input"
    )
    captcha_input.clear()
    captcha_input.send_keys(answer)

    return answer

reCAPTCHA v3 with Selenium

def solve_recaptcha_v3_selenium(driver, url, sitekey, action="verify"):
    """Solve reCAPTCHA v3 in Selenium."""
    driver.get(url)

    # Solve via CaptchaAI
    token = solve_captcha(
        "userrecaptcha",
        googlekey=sitekey,
        pageurl=url,
        version="v3",
        action=action,
        min_score="0.7",
    )

    # Inject token
    driver.execute_script(f"""
        // Set reCAPTCHA response
        var textarea = document.getElementById('g-recaptcha-response');
        if (textarea) {{
            textarea.value = '{token}';
            textarea.style.display = 'block';
        }}

        // Set any hidden input fields
        var inputs = document.querySelectorAll('input[name="g-recaptcha-response"]');
        inputs.forEach(function(input) {{ input.value = '{token}'; }});
    """)

    return token

Complete automation example

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import re
import time
import requests

API_KEY = "YOUR_API_KEY"


class SeleniumCaptchaSolver:
    """Complete Selenium + CaptchaAI automation class."""

    def __init__(self, api_key):
        self.api_key = api_key
        self.driver = None

    def start(self):
        """Initialize the browser."""
        options = webdriver.ChromeOptions()
        options.add_argument("--disable-blink-features=AutomationControlled")
        options.add_experimental_option("excludeSwitches", ["enable-automation"])
        self.driver = webdriver.Chrome(options=options)
        self.driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
            "source": "Object.defineProperty(navigator, 'webdriver', {get: () => undefined})"
        })

    def stop(self):
        """Close the browser."""
        if self.driver:
            self.driver.quit()

    def navigate(self, url):
        """Navigate to URL and wait for load."""
        self.driver.get(url)
        WebDriverWait(self.driver, 15).until(
            lambda d: d.execute_script("return document.readyState") == "complete"
        )

    def detect_captcha(self):
        """Detect which CAPTCHA type is on the page."""
        source = self.driver.page_source

        if re.search(r'data-sitekey=["\'][A-Za-z0-9_-]{40}["\']', source):
            if "recaptcha/api.js" in source or "grecaptcha" in source:
                return "recaptcha_v2"

        if "cf-turnstile" in source or "challenges.cloudflare.com/turnstile" in source:
            return "turnstile"

        if re.search(r'recaptcha.*v3|render=[A-Za-z0-9_-]{40}', source):
            return "recaptcha_v3"

        captcha_imgs = self.driver.find_elements(
            By.CSS_SELECTOR, "img.captcha, img[alt*='captcha'], img[src*='captcha']"
        )
        if captcha_imgs:
            return "image"

        return None

    def solve_and_submit(self, url, form_data=None):
        """Navigate, solve CAPTCHA, fill form, and submit."""
        self.navigate(url)
        captcha_type = self.detect_captcha()

        if not captcha_type:
            print("No CAPTCHA detected")
            return self._fill_and_submit(form_data)

        print(f"Detected: {captcha_type}")

        if captcha_type == "recaptcha_v2":
            self._solve_recaptcha_v2()
        elif captcha_type == "turnstile":
            self._solve_turnstile()
        elif captcha_type == "image":
            self._solve_image()

        return self._fill_and_submit(form_data)

    def _solve_recaptcha_v2(self):
        source = self.driver.page_source
        match = re.search(r'data-sitekey=["\']([A-Za-z0-9_-]{40})["\']', source)
        sitekey = match.group(1)

        token = self._api_solve("userrecaptcha", googlekey=sitekey, pageurl=self.driver.current_url)

        self.driver.execute_script(f"""
            document.getElementById('g-recaptcha-response').value = '{token}';
        """)

    def _solve_turnstile(self):
        source = self.driver.page_source
        match = re.search(r'data-sitekey=["\']([0-9x][A-Za-z0-9_-]+)["\']', source)
        sitekey = match.group(1)

        token = self._api_solve("turnstile", sitekey=sitekey, pageurl=self.driver.current_url)

        self.driver.execute_script(f"""
            document.querySelectorAll('[name="cf-turnstile-response"]')
                .forEach(function(el) {{ el.value = '{token}'; }});
        """)

    def _solve_image(self):
        img = self.driver.find_element(
            By.CSS_SELECTOR, "img.captcha, img[alt*='captcha'], img[src*='captcha']"
        )
        answer = self._api_solve("base64", body=img.screenshot_as_base64)

        captcha_input = self.driver.find_element(
            By.CSS_SELECTOR, "input[name='captcha'], input[name='code']"
        )
        captcha_input.clear()
        captcha_input.send_keys(answer)

    def _fill_and_submit(self, form_data):
        if form_data:
            for name, value in form_data.items():
                try:
                    field = self.driver.find_element(By.NAME, name)
                    field.clear()
                    field.send_keys(value)
                except Exception:
                    pass

        submit = self.driver.find_element(
            By.CSS_SELECTOR, "button[type='submit'], input[type='submit']"
        )
        submit.click()
        time.sleep(3)
        return self.driver.current_url

    def _api_solve(self, method, **params):
        submit = requests.post("https://ocr.captchaai.com/in.php", data={
            "key": self.api_key, "method": method, "json": 1, **params,
        }, timeout=30)
        data = submit.json()
        if data.get("status") != 1:
            raise Exception(f"Submit error: {data.get('request')}")

        task_id = data["request"]
        for _ in range(30):
            time.sleep(5)
            result = requests.get("https://ocr.captchaai.com/res.php", params={
                "key": self.api_key, "action": "get", "id": task_id, "json": 1,
            }, timeout=30).json()
            if result.get("status") == 1:
                return result["request"]
        raise TimeoutError("Solve timed out")


# Usage
solver = SeleniumCaptchaSolver(API_KEY)
solver.start()
try:
    result_url = solver.solve_and_submit(
        "https://example.com/login",
        form_data={"username": "user@example.com", "password": "pass123"},
    )
    print(f"Result: {result_url}")
finally:
    solver.stop()

Troubleshooting

Symptom Cause Fix
CAPTCHA detected but token injection fails g-recaptcha-response not found Check for iframes — reCAPTCHA may be in an iframe
Token injected but form doesn't submit Callback not triggered Call the reCAPTCHA callback function explicitly
"webdriver detected" by site Stealth-configuredion not applied Add CDP command to remove webdriver flag
Image CAPTCHA screenshot blank Element not visible Scroll element into view first
Turnstile token rejected Wrong sitekey Re-extract from live page source

Frequently asked questions

Should I use Selenium or pure requests?

Use Selenium when the site requires JavaScript execution, dynamic content loading, or complex multi-page flows. Use requests for simple API-based form submissions where you just need the token.

How do I handle reCAPTCHA in iframes?

Switch to the reCAPTCHA iframe first: driver.switch_to.frame(driver.find_element(By.CSS_SELECTOR, "iframe[src*='recaptcha']")). Extract the sitekey, then switch back: driver.switch_to.default_content().

Can I run Selenium headless?

Yes, but some CAPTCHAs detect headless mode. Use --headless=new and ensure WebGL/Canvas fingerprints are valid. CaptchaAI solves on its own workers, so headless detection doesn't affect the solve.

How much slower is Selenium vs requests?

Selenium adds 3-5 seconds overhead for browser launch and page rendering. CAPTCHA solve time is the same. Use requests when possible, Selenium when necessary.


Summary

Python Selenium + CaptchaAI handles any CAPTCHA in browser automation: detect the CAPTCHA type from the page source, solve via CaptchaAI's API, inject the token via JavaScript, and submit the form. The SeleniumCaptchaSolver class automates the full detect-solve-submit workflow.

Full Working Code

Complete runnable examples for this article in Python, Node.js, PHP, Go, Java, C#, Ruby, Rust, Kotlin & Bash.

View on GitHub →

Discussions (0)

No comments yet.