Solve Grid Image CAPTCHA with Python and CaptchaAI

Grid image CAPTCHAs present a 3×3 or 4×4 grid of tiles with a text instruction like "select all squares with crosswalks." These are commonly seen in reCAPTCHA v2 image challenges. CaptchaAI analyzes the full grid image and returns which cell indices to click.

This guide shows you how to capture the grid, submit it to CaptchaAI, and click the correct tiles.

Prerequisites

Item	Value
CaptchaAI API key	From captchaai.com
Python	3.7+
Libraries	`requests`, `selenium`

Grid numbering

Cells are numbered left-to-right, top-to-bottom:

3×3 Grid:          4×4 Grid:
1 2 3              1  2  3  4
4 5 6              5  6  7  8
7 8 9              9  10 11 12
                   13 14 15 16

Step 1: Capture the grid image

Screenshot the full CAPTCHA grid — do not crop individual tiles.

from selenium import webdriver
from selenium.webdriver.common.by import By

driver = webdriver.Chrome()
driver.get("https://example.com/page-with-recaptcha")

# Switch to the reCAPTCHA iframe if needed
iframe = driver.find_element(By.CSS_SELECTOR, "iframe[title='recaptcha challenge expires']")
driver.switch_to.frame(iframe)

# Get the instruction text
instruction = driver.find_element(By.CSS_SELECTOR, ".rc-imageselect-desc-no-canonical").text
# e.g., "Select all images with crosswalks"

# Screenshot the grid
grid = driver.find_element(By.CSS_SELECTOR, ".rc-imageselect-target")
grid.screenshot("grid_image.png")

Step 2: Submit to CaptchaAI

import requests
import time
import json

API_KEY = "YOUR_API_KEY"

with open("grid_image.png", "rb") as f:
    response = requests.post("https://ocr.captchaai.com/in.php",
        data={
            "key": API_KEY,
            "method": "post",
            "grid_size": "3x3",
            "img_type": "recaptcha",
            "instructions": "crosswalks",
            "json": 1,
        },
        files={"file": ("grid.png", f, "image/png")},
    )

result = response.json()
if result["status"] != 1:
    raise Exception(f"Submit failed: {result['request']}")

task_id = result["request"]
print(f"Task submitted: {task_id}")

Step 3: Poll for the solution

time.sleep(5)

for _ in range(30):
    result = requests.get("https://ocr.captchaai.com/res.php", params={
        "key": API_KEY,
        "action": "get",
        "id": task_id,
        "json": 1,
    }).json()

    if result["status"] == 1:
        cells_to_click = json.loads(result["request"])
        print(f"Click cells: {cells_to_click}")
        # e.g., [1, 3, 6, 9]
        break

    if result["request"] != "CAPCHA_NOT_READY":
        raise Exception(f"Error: {result['request']}")

    time.sleep(5)

Step 4: Click the correct tiles

# Get all grid tiles
tiles = driver.find_elements(By.CSS_SELECTOR, ".rc-imageselect-tile")

for cell_num in cells_to_click:
    tiles[cell_num - 1].click()  # Convert to 0-based index
    time.sleep(0.3)  # Small delay between clicks

# Click verify
driver.find_element(By.CSS_SELECTOR, "#recaptcha-verify-button").click()
print("Grid CAPTCHA solved")

Complete working example

import requests
import time
import json
from selenium import webdriver
from selenium.webdriver.common.by import By

API_KEY = "YOUR_API_KEY"
driver = webdriver.Chrome()
driver.get("https://example.com/page-with-recaptcha")

# Switch to challenge iframe
iframe = driver.find_element(By.CSS_SELECTOR, "iframe[title='recaptcha challenge expires']")
driver.switch_to.frame(iframe)

# Get instruction and screenshot grid
instruction = driver.find_element(By.CSS_SELECTOR, ".rc-imageselect-desc-no-canonical").text
grid = driver.find_element(By.CSS_SELECTOR, ".rc-imageselect-target")
grid.screenshot("grid.png")

# Submit to CaptchaAI
with open("grid.png", "rb") as f:
    submit = requests.post("https://ocr.captchaai.com/in.php",
        data={"key": API_KEY, "method": "post", "grid_size": "3x3",
              "img_type": "recaptcha", "instructions": instruction, "json": 1},
        files={"file": ("grid.png", f, "image/png")}
    ).json()
task_id = submit["request"]

# Poll
time.sleep(5)
for _ in range(30):
    poll = requests.get("https://ocr.captchaai.com/res.php", params={
        "key": API_KEY, "action": "get", "id": task_id, "json": 1
    }).json()
    if poll["status"] == 1:
        cells = json.loads(poll["request"])
        break
    if poll["request"] != "CAPCHA_NOT_READY":
        raise Exception(poll["request"])
    time.sleep(5)

# Click and verify
tiles = driver.find_elements(By.CSS_SELECTOR, ".rc-imageselect-tile")
for c in cells:
    tiles[c - 1].click()
    time.sleep(0.3)
driver.find_element(By.CSS_SELECTOR, "#recaptcha-verify-button").click()
print(f"Solved: clicked tiles {cells}")
driver.quit()

Expected output: