Tutorials

Grid Image CAPTCHA: Coordinate Mapping and Cell Selection

Grid image CAPTCHAs — like reCAPTCHA v2 image challenges — present a 3×3 or 4×4 grid and ask users to select cells matching an instruction ("Select all squares with traffic lights"). CaptchaAI returns the cell indices. This guide covers how to capture the grid, map cells to coordinates, and click the correct tiles.


Grid layouts

CAPTCHAs use two standard grid sizes:

3×3 Grid:          4×4 Grid:
1  2  3            1   2   3   4
4  5  6            5   6   7   8
7  8  9            9  10  11  12
                   13  14  15  16

Cells are numbered left-to-right, top-to-bottom — reading order.


Step 1: Capture the grid image

Python (Selenium)

import base64
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

driver = webdriver.Chrome()
driver.get("https://example.com/form")

# Wait for reCAPTCHA iframe
WebDriverWait(driver, 10).until(
    EC.presence_of_element_located((By.CSS_SELECTOR, "iframe[src*='recaptcha']"))
)

# Switch to challenge iframe
iframes = driver.find_elements(By.CSS_SELECTOR, "iframe[src*='recaptcha']")
challenge_iframe = iframes[-1]  # Challenge iframe is typically the last one
driver.switch_to.frame(challenge_iframe)

# Get the grid image
grid_img = driver.find_element(By.CSS_SELECTOR, "img.rc-image-tile-33, img.rc-image-tile-44")
img_src = grid_img.get_attribute("src")

# Get instruction text
instruction = driver.find_element(
    By.CSS_SELECTOR, ".rc-imageselect-desc-wrapper"
).text
print(f"Instruction: {instruction}")

# Screenshot the grid as base64
img_b64 = grid_img.screenshot_as_base64

# Determine grid size
classes = grid_img.get_attribute("class")
grid_size = "4x4" if "44" in classes else "3x3"
print(f"Grid size: {grid_size}")

driver.switch_to.default_content()

JavaScript (Puppeteer)

const puppeteer = require('puppeteer');
const fs = require('fs');

const browser = await puppeteer.launch({ headless: false });
const page = await browser.newPage();
await page.goto('https://example.com/form');

// Find the challenge iframe
const frames = page.frames();
const challengeFrame = frames.find(f => f.url().includes('recaptcha'));

// Get instruction
const instruction = await challengeFrame.$eval(
  '.rc-imageselect-desc-wrapper',
  el => el.textContent.trim()
);

// Screenshot the grid image
const gridImg = await challengeFrame.$('img.rc-image-tile-33, img.rc-image-tile-44');
const imgBuffer = await gridImg.screenshot();
const imgBase64 = imgBuffer.toString('base64');

// Determine grid size
const className = await challengeFrame.$eval(
  'img.rc-image-tile-33, img.rc-image-tile-44',
  el => el.className
);
const gridSize = className.includes('44') ? '4x4' : '3x3';
console.log(`Grid: ${gridSize}, Instruction: ${instruction}`);

Step 2: Submit to CaptchaAI

import requests
import time
import json

API_KEY = "YOUR_API_KEY"

# Parse the instruction to a simple keyword
# "Select all images with traffic lights" → "traffic lights"
import re
keyword_match = re.search(r'(?:with|of|containing)\s+(.+?)\.?$', instruction, re.I)
keyword = keyword_match.group(1) if keyword_match else instruction

# Submit
with open("/tmp/grid.png", "wb") as f:
    f.write(base64.b64decode(img_b64))

with open("/tmp/grid.png", "rb") as f:
    resp = requests.post("https://ocr.captchaai.com/in.php", 
        files={"file": f},
        data={
            "key": API_KEY,
            "method": "post",
            "grid_size": grid_size,
            "img_type": "recaptcha",
            "instructions": keyword,
            "json": "1",
        }
    ).json()

if resp["status"] != 1:
    raise Exception(f"Submit error: {resp['request']}")

task_id = resp["request"]

# Poll
for _ in range(20):
    time.sleep(5)
    result = requests.get("https://ocr.captchaai.com/res.php", params={
        "key": API_KEY, "action": "get", "id": task_id, "json": "1"
    }).json()

    if result["status"] == 1:
        cells = json.loads(result["request"])
        print(f"Cells to click: {cells}")  # e.g., [1, 3, 6, 9]
        break
    if result["request"] != "CAPCHA_NOT_READY":
        raise Exception(f"Error: {result['request']}")

Step 3: Map cell indices to click coordinates

Convert 1-based cell indices to pixel coordinates within the grid:

def cell_to_coordinates(cell_index, grid_size, grid_width, grid_height):
    """Convert a 1-based cell index to (x, y) center coordinates."""
    if grid_size == "3x3":
        cols, rows = 3, 3
    else:
        cols, rows = 4, 4

    cell_w = grid_width / cols
    cell_h = grid_height / rows

    # Convert 1-based index to 0-based row/col
    idx = cell_index - 1
    col = idx % cols
    row = idx // cols

    # Center of the cell
    x = col * cell_w + cell_w / 2
    y = row * cell_h + cell_h / 2

    return int(x), int(y)

# Example: grid is 300×300
for cell in cells:
    x, y = cell_to_coordinates(cell, grid_size, 300, 300)
    print(f"Cell {cell} → ({x}, {y})")

Output for a 3×3 grid (300×300):

Cell 1 → (50, 50)
Cell 3 → (250, 50)
Cell 6 → (250, 150)
Cell 9 → (250, 250)

Step 4: Click the cells

Selenium

from selenium.webdriver.common.action_chains import ActionChains

driver.switch_to.frame(challenge_iframe)

# Get grid element position and size
grid_el = driver.find_element(By.CSS_SELECTOR, ".rc-imageselect-target")
grid_rect = grid_el.rect
grid_w = grid_rect["width"]
grid_h = grid_rect["height"]

actions = ActionChains(driver)

for cell in cells:
    x, y = cell_to_coordinates(cell, grid_size, grid_w, grid_h)
    # Click relative to grid element's top-left corner
    actions.move_to_element_with_offset(
        grid_el,
        x - grid_w / 2,  # offset from center
        y - grid_h / 2
    ).click()

actions.perform()

# Click verify
verify_btn = driver.find_element(By.ID, "recaptcha-verify-button")
verify_btn.click()

driver.switch_to.default_content()

Puppeteer

// Click each cell by index
const tableRows = await challengeFrame.$$('table.rc-imageselect-table tr');
for (const cellIdx of cells) {
  const row = Math.floor((cellIdx - 1) / (gridSize === '4x4' ? 4 : 3));
  const col = (cellIdx - 1) % (gridSize === '4x4' ? 4 : 3);
  const cell = (await tableRows[row].$$('td'))[col];
  await cell.click();
  await new Promise(r => setTimeout(r, 200));
}

await challengeFrame.click('#recaptcha-verify-button');

Handling dynamic tiles

Some reCAPTCHA v2 grids replace clicked tiles with new images. Handle this with a retry loop:

def solve_with_dynamic_tiles(driver, api_key, max_rounds=3):
    for round_num in range(max_rounds):
        driver.switch_to.frame(challenge_iframe)

        # Re-capture grid and instruction
        img_b64 = driver.find_element(
            By.CSS_SELECTOR, "img.rc-image-tile-33"
        ).screenshot_as_base64

        # Submit and get cells (same as above)
        cells = submit_and_poll(api_key, img_b64, "3x3", keyword)

        if not cells:
            break

        # Click cells
        click_cells(driver, cells, "3x3")

        # Click verify
        driver.find_element(By.ID, "recaptcha-verify-button").click()

        driver.switch_to.default_content()
        time.sleep(2)

        # Check if solved (no more challenge iframe)
        try:
            driver.switch_to.frame(challenge_iframe)
            driver.switch_to.default_content()
        except Exception:
            return True  # Solved

    return False

Troubleshooting

Problem Cause Fix
Wrong cells returned Wrong grid_size Check if grid is 3×3 or 4×4
Clicks miss cells Coordinate offset wrong Verify grid element dimensions
ERROR_WRONG_FILE_EXTENSION Bad image format Use PNG or JPEG
New tiles appear after clicking Dynamic grid Re-solve after each round

FAQ

Does CaptchaAI support 4×4 grids?

Yes. Set grid_size=4x4 and the response will use indices 1-16.

How accurate is grid image solving?

Accuracy depends on image quality. Send the original CAPTCHA image without cropping or compression.


Solve grid image CAPTCHAs with CaptchaAI

Get your API key at captchaai.com.


Full Working Code

Complete runnable examples for this article in Python, Node.js, PHP, Go, Java, C#, Ruby, Rust, Kotlin & Bash.

View on GitHub →

Discussions (0)

No comments yet.