Reference

Parallel CAPTCHA Solving with CaptchaAI

Solving CAPTCHAs one at a time wastes 10-15 seconds per solve waiting for results. Parallel solving lets you process dozens simultaneously, matching your scraping throughput.

Architecture Overview

Your Application
    ├── Task 1: Submit → Poll → Token ──┐
    ├── Task 2: Submit → Poll → Token ──┤
    ├── Task 3: Submit → Poll → Token ──┼──→ Process Results
    ├── Task 4: Submit → Poll → Token ──┤
    └── Task 5: Submit → Poll → Token ──┘

Each task runs independently. Submit and poll calls happen concurrently.

Python: asyncio

The most efficient approach for Python:

import asyncio
import aiohttp
import os


class AsyncSolver:
    def __init__(self, api_key):
        self.api_key = api_key
        self.base = "https://ocr.captchaai.com"

    async def solve(self, session, params, timeout=300):
        params["key"] = self.api_key

        async with session.get(f"{self.base}/in.php", params=params) as r:
            text = await r.text()
        if not text.startswith("OK|"):
            raise Exception(text)
        task_id = text.split("|")[1]

        deadline = asyncio.get_event_loop().time() + timeout
        poll_params = {"key": self.api_key, "action": "get", "id": task_id}

        while asyncio.get_event_loop().time() < deadline:
            await asyncio.sleep(5)
            async with session.get(f"{self.base}/res.php", params=poll_params) as r:
                text = await r.text()
            if text == "CAPCHA_NOT_READY":
                continue
            if text.startswith("OK|"):
                return text.split("|", 1)[1]
            raise Exception(text)
        raise TimeoutError()


async def solve_batch(tasks, max_concurrent=20):
    solver = AsyncSolver(os.environ["CAPTCHAAI_API_KEY"])
    semaphore = asyncio.Semaphore(max_concurrent)

    async def limited_solve(session, params):
        async with semaphore:
            return await solver.solve(session, params)

    async with aiohttp.ClientSession() as session:
        futures = [limited_solve(session, t) for t in tasks]
        results = await asyncio.gather(*futures, return_exceptions=True)

    solved = sum(1 for r in results if not isinstance(r, Exception))
    print(f"Solved {solved}/{len(tasks)}")
    return results


# Example: solve 50 CAPTCHAs concurrently
tasks = [
    {"method": "userrecaptcha", "googlekey": "6Le-wvkS...",
     "pageurl": f"https://example.com/page{i}"}
    for i in range(50)
]
results = asyncio.run(solve_batch(tasks, max_concurrent=20))

Python: ThreadPoolExecutor

For sync code, use threads:

from concurrent.futures import ThreadPoolExecutor, as_completed
import requests
import time
import os

API_KEY = os.environ["CAPTCHAAI_API_KEY"]
BASE = "https://ocr.captchaai.com"


def solve_one(params):
    params["key"] = API_KEY
    resp = requests.get(f"{BASE}/in.php", params=params)
    if not resp.text.startswith("OK|"):
        raise Exception(resp.text)

    task_id = resp.text.split("|")[1]
    deadline = time.time() + 300

    while time.time() < deadline:
        time.sleep(5)
        result = requests.get(f"{BASE}/res.php", params={
            "key": API_KEY, "action": "get", "id": task_id,
        })
        if result.text == "CAPCHA_NOT_READY":
            continue
        if result.text.startswith("OK|"):
            return result.text.split("|", 1)[1]
        raise Exception(result.text)
    raise TimeoutError()


tasks = [
    {"method": "userrecaptcha", "googlekey": "6Le-wvkS...",
     "pageurl": f"https://example.com/page{i}"}
    for i in range(20)
]

with ThreadPoolExecutor(max_workers=10) as pool:
    futures = {pool.submit(solve_one, t): i for i, t in enumerate(tasks)}

    for future in as_completed(futures):
        idx = futures[future]
        try:
            token = future.result()
            print(f"Task {idx}: solved ({len(token)} chars)")
        except Exception as e:
            print(f"Task {idx}: failed ({e})")

Node.js: Promise.all

const axios = require("axios");
const API_KEY = process.env.CAPTCHAAI_API_KEY;

async function solveOne(params) {
  params.key = API_KEY;
  const submit = await axios.get("https://ocr.captchaai.com/in.php", { params });
  const taskId = String(submit.data).split("|")[1];

  const deadline = Date.now() + 300000;
  while (Date.now() < deadline) {
    await new Promise((r) => setTimeout(r, 5000));
    const poll = await axios.get("https://ocr.captchaai.com/res.php", {
      params: { key: API_KEY, action: "get", id: taskId },
    });
    const text = String(poll.data);
    if (text === "CAPCHA_NOT_READY") continue;
    if (text.startsWith("OK|")) return text.split("|").slice(1).join("|");
    throw new Error(text);
  }
  throw new Error("Timeout");
}

async function solveBatch(taskList, concurrency = 10) {
  // Process in chunks to limit concurrency
  const results = [];
  for (let i = 0; i < taskList.length; i += concurrency) {
    const chunk = taskList.slice(i, i + concurrency);
    const chunkResults = await Promise.allSettled(
      chunk.map((task) => solveOne(task))
    );
    results.push(...chunkResults);
  }

  const solved = results.filter((r) => r.status === "fulfilled").length;
  console.log(`Solved ${solved}/${results.length}`);
  return results;
}

// Solve 30 CAPTCHAs, 10 at a time
const tasks = Array.from({ length: 30 }, (_, i) => ({
  method: "userrecaptcha",
  googlekey: "6Le-wvkS...",
  pageurl: `https://example.com/page${i}`,
}));

solveBatch(tasks, 10);

Queue-Based Architecture

For production systems, use a task queue:

import asyncio
from asyncio import Queue


async def worker(name, queue, solver, session, results):
    while True:
        task_id, params = await queue.get()
        try:
            token = await solver.solve(session, params)
            results[task_id] = {"status": "ok", "token": token}
        except Exception as e:
            results[task_id] = {"status": "error", "error": str(e)}
        finally:
            queue.task_done()


async def run_queue(tasks, num_workers=10):
    solver = AsyncSolver(os.environ["CAPTCHAAI_API_KEY"])
    queue = Queue()
    results = {}

    async with aiohttp.ClientSession() as session:
        workers = [
            asyncio.create_task(worker(f"w-{i}", queue, solver, session, results))
            for i in range(num_workers)
        ]

        for task_id, params in tasks.items():
            await queue.put((task_id, params))

        await queue.join()
        for w in workers:
            w.cancel()

    return results

Performance Tips

Factor Recommendation
Concurrency level Start with 10-20, scale based on balance
Poll interval 5 seconds per task
Timeout 300 seconds per task
Error handling Use return_exceptions=True with gather
Session reuse Reuse aiohttp/requests sessions
Rate limiting Use semaphore to cap concurrent solves

FAQ

How many CAPTCHAs can I solve in parallel?

CaptchaAI supports 100+ concurrent requests. Your limit is usually your balance and your proxy pool, not the API.

Does parallel solving cost more?

No. Each solve costs the same regardless of whether it's sequential or parallel.

Should I use threads or asyncio?

Use asyncio for Python. It handles thousands of concurrent tasks with minimal memory. Threads are fine for smaller workloads (< 50 concurrent).

Discussions (0)

No comments yet.