Tutorials

SQLite for Local CAPTCHA Solve Caching and Tracking

SQLite needs no server setup, ships with Python, and handles thousands of writes per second. It's the fastest way to add local solve tracking, token caching, and request deduplication to a CAPTCHA solving workflow.

When to Use SQLite

Use Case SQLite Better Alternative
Single-machine development
Small-scale production (< 1K solves/hr)
Test result tracking
Multi-server production PostgreSQL, MongoDB
High-throughput distributed Redis, DynamoDB
Real-time analytics dashboard TimescaleDB, InfluxDB

Schema Design

CREATE TABLE IF NOT EXISTS captcha_solves (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    captcha_id TEXT,
    type TEXT NOT NULL,
    sitekey TEXT,
    pageurl TEXT,
    status TEXT NOT NULL DEFAULT 'submitted',
    solution TEXT,
    error TEXT,
    submitted_at TEXT NOT NULL DEFAULT (datetime('now')),
    solved_at TEXT,
    elapsed_ms INTEGER,
    polls INTEGER DEFAULT 0,
    project TEXT
);

CREATE INDEX IF NOT EXISTS idx_submitted_at ON captcha_solves(submitted_at);
CREATE INDEX IF NOT EXISTS idx_type_status ON captcha_solves(type, status);
CREATE INDEX IF NOT EXISTS idx_sitekey ON captcha_solves(sitekey);

-- Token cache for reuse within TTL
CREATE TABLE IF NOT EXISTS token_cache (
    sitekey TEXT NOT NULL,
    pageurl TEXT NOT NULL,
    token TEXT NOT NULL,
    created_at TEXT NOT NULL DEFAULT (datetime('now')),
    expires_at TEXT NOT NULL,
    used INTEGER DEFAULT 0,
    PRIMARY KEY (sitekey, pageurl, token)
);

CREATE INDEX IF NOT EXISTS idx_cache_lookup
ON token_cache(sitekey, pageurl, used, expires_at);

Python Implementation

Database Setup

import os
import time
import sqlite3
from datetime import datetime, timedelta, timezone
import requests

DB_PATH = os.environ.get("CAPTCHA_DB", "captcha_solves.db")
API_KEY = os.environ["CAPTCHAAI_API_KEY"]


def get_db():
    conn = sqlite3.connect(DB_PATH)
    conn.row_factory = sqlite3.Row
    conn.execute("PRAGMA journal_mode=WAL")  # Better concurrent read performance
    conn.execute("PRAGMA busy_timeout=5000")
    return conn


def init_db():
    conn = get_db()
    conn.executescript("""
        CREATE TABLE IF NOT EXISTS captcha_solves (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            captcha_id TEXT,
            type TEXT NOT NULL,
            sitekey TEXT,
            pageurl TEXT,
            status TEXT NOT NULL DEFAULT 'submitted',
            solution TEXT,
            error TEXT,
            submitted_at TEXT NOT NULL DEFAULT (datetime('now')),
            solved_at TEXT,
            elapsed_ms INTEGER,
            polls INTEGER DEFAULT 0,
            project TEXT
        );
        CREATE INDEX IF NOT EXISTS idx_submitted_at ON captcha_solves(submitted_at);
        CREATE INDEX IF NOT EXISTS idx_type_status ON captcha_solves(type, status);

        CREATE TABLE IF NOT EXISTS token_cache (
            sitekey TEXT NOT NULL,
            pageurl TEXT NOT NULL,
            token TEXT NOT NULL,
            created_at TEXT NOT NULL DEFAULT (datetime('now')),
            expires_at TEXT NOT NULL,
            used INTEGER DEFAULT 0,
            PRIMARY KEY (sitekey, pageurl, token)
        );
        CREATE INDEX IF NOT EXISTS idx_cache_lookup
        ON token_cache(sitekey, pageurl, used, expires_at);
    """)
    conn.close()

init_db()

Solve and Track

def solve_recaptcha(sitekey, pageurl, project=None):
    conn = get_db()

    # Check cache first
    cached = get_cached_token(conn, sitekey, pageurl)
    if cached:
        conn.close()
        return cached

    # Insert tracking record
    now = datetime.now(timezone.utc).isoformat()
    cursor = conn.execute(
        "INSERT INTO captcha_solves (type, sitekey, pageurl, submitted_at, project) "
        "VALUES (?, ?, ?, ?, ?)",
        ("recaptcha_v2", sitekey, pageurl, now, project)
    )
    row_id = cursor.lastrowid
    conn.commit()

    # Submit to CaptchaAI
    resp = requests.post("https://ocr.captchaai.com/in.php", data={
        "key": API_KEY,
        "method": "userrecaptcha",
        "googlekey": sitekey,
        "pageurl": pageurl,
        "json": 1
    })
    data = resp.json()

    if data.get("status") != 1:
        conn.execute(
            "UPDATE captcha_solves SET status=?, error=? WHERE id=?",
            ("error", data.get("request"), row_id)
        )
        conn.commit()
        conn.close()
        return None

    captcha_id = data["request"]
    conn.execute(
        "UPDATE captcha_solves SET captcha_id=?, status=? WHERE id=?",
        (captcha_id, "polling", row_id)
    )
    conn.commit()

    # Poll
    polls = 0
    for _ in range(60):
        time.sleep(5)
        polls += 1
        result = requests.get("https://ocr.captchaai.com/res.php", params={
            "key": API_KEY, "action": "get",
            "id": captcha_id, "json": 1
        }).json()

        if result.get("status") == 1:
            solved_at = datetime.now(timezone.utc).isoformat()
            submitted = datetime.fromisoformat(now)
            elapsed = int((datetime.now(timezone.utc) - submitted).total_seconds() * 1000)

            conn.execute(
                "UPDATE captcha_solves SET status=?, solution=?, solved_at=?, "
                "elapsed_ms=?, polls=? WHERE id=?",
                ("solved", result["request"], solved_at, elapsed, polls, row_id)
            )
            # Cache the token
            cache_token(conn, sitekey, pageurl, result["request"])
            conn.commit()
            conn.close()
            return result["request"]

        if result.get("request") != "CAPCHA_NOT_READY":
            conn.execute(
                "UPDATE captcha_solves SET status=?, error=?, polls=? WHERE id=?",
                ("error", result.get("request"), polls, row_id)
            )
            conn.commit()
            conn.close()
            return None

    conn.execute(
        "UPDATE captcha_solves SET status=?, polls=? WHERE id=?",
        ("timeout", polls, row_id)
    )
    conn.commit()
    conn.close()
    return None

Token Caching

def cache_token(conn, sitekey, pageurl, token, ttl_seconds=90):
    expires_at = (datetime.now(timezone.utc) + timedelta(seconds=ttl_seconds)).isoformat()
    conn.execute(
        "INSERT OR REPLACE INTO token_cache (sitekey, pageurl, token, expires_at) "
        "VALUES (?, ?, ?, ?)",
        (sitekey, pageurl, token, expires_at)
    )


def get_cached_token(conn, sitekey, pageurl):
    now = datetime.now(timezone.utc).isoformat()
    row = conn.execute(
        "SELECT token FROM token_cache "
        "WHERE sitekey=? AND pageurl=? AND used=0 AND expires_at > ? "
        "ORDER BY expires_at ASC LIMIT 1",
        (sitekey, pageurl, now)
    ).fetchone()

    if row:
        conn.execute(
            "UPDATE token_cache SET used=1 WHERE token=?",
            (row["token"],)
        )
        conn.commit()
        return row["token"]
    return None

Analytics Queries

def get_stats(hours=24):
    conn = get_db()
    cutoff = (datetime.now(timezone.utc) - timedelta(hours=hours)).isoformat()

    total = conn.execute(
        "SELECT COUNT(*) FROM captcha_solves WHERE submitted_at >= ?", (cutoff,)
    ).fetchone()[0]

    solved = conn.execute(
        "SELECT COUNT(*) FROM captcha_solves WHERE submitted_at >= ? AND status='solved'",
        (cutoff,)
    ).fetchone()[0]

    avg_time = conn.execute(
        "SELECT AVG(elapsed_ms) FROM captcha_solves "
        "WHERE submitted_at >= ? AND status='solved'",
        (cutoff,)
    ).fetchone()[0]

    conn.close()
    return {
        "total": total,
        "solved": solved,
        "success_rate": (solved / total * 100) if total else 0,
        "avg_time_ms": round(avg_time) if avg_time else 0
    }


def cleanup_old_records(days=30):
    conn = get_db()
    cutoff = (datetime.now(timezone.utc) - timedelta(days=days)).isoformat()
    conn.execute("DELETE FROM captcha_solves WHERE submitted_at < ?", (cutoff,))
    conn.execute("DELETE FROM token_cache WHERE expires_at < ?",
                 (datetime.now(timezone.utc).isoformat(),))
    conn.execute("VACUUM")
    conn.commit()
    conn.close()

JavaScript Implementation

const Database = require("better-sqlite3");
const axios = require("axios");

const db = new Database(process.env.CAPTCHA_DB || "captcha_solves.db");
const API_KEY = process.env.CAPTCHAAI_API_KEY;

db.pragma("journal_mode = WAL");
db.exec(`
  CREATE TABLE IF NOT EXISTS captcha_solves (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    captcha_id TEXT, type TEXT NOT NULL, sitekey TEXT, pageurl TEXT,
    status TEXT DEFAULT 'submitted', solution TEXT, error TEXT,
    submitted_at TEXT DEFAULT (datetime('now')),
    solved_at TEXT, elapsed_ms INTEGER, polls INTEGER DEFAULT 0
  );
  CREATE INDEX IF NOT EXISTS idx_submitted ON captcha_solves(submitted_at);
`);

async function solveAndStore(sitekey, pageurl) {
  const submittedAt = new Date().toISOString();
  const insert = db.prepare(
    "INSERT INTO captcha_solves (type, sitekey, pageurl, submitted_at) VALUES (?, ?, ?, ?)"
  );
  const { lastInsertRowid } = insert.run("recaptcha_v2", sitekey, pageurl, submittedAt);

  const submit = await axios.post("https://ocr.captchaai.com/in.php", null, {
    params: { key: API_KEY, method: "userrecaptcha", googlekey: sitekey, pageurl, json: 1 },
  });

  if (submit.data.status !== 1) {
    db.prepare("UPDATE captcha_solves SET status=?, error=? WHERE id=?")
      .run("error", submit.data.request, lastInsertRowid);
    return null;
  }

  const captchaId = submit.data.request;
  db.prepare("UPDATE captcha_solves SET captcha_id=?, status=? WHERE id=?")
    .run(captchaId, "polling", lastInsertRowid);

  let polls = 0;
  for (let i = 0; i < 60; i++) {
    await new Promise((r) => setTimeout(r, 5000));
    polls++;
    const poll = await axios.get("https://ocr.captchaai.com/res.php", {
      params: { key: API_KEY, action: "get", id: captchaId, json: 1 },
    });

    if (poll.data.status === 1) {
      const elapsed = Date.now() - new Date(submittedAt).getTime();
      db.prepare(
        "UPDATE captcha_solves SET status=?, solution=?, solved_at=?, elapsed_ms=?, polls=? WHERE id=?"
      ).run("solved", poll.data.request, new Date().toISOString(), elapsed, polls, lastInsertRowid);
      return poll.data.request;
    }
    if (poll.data.request !== "CAPCHA_NOT_READY") {
      db.prepare("UPDATE captcha_solves SET status=?, error=?, polls=? WHERE id=?")
        .run("error", poll.data.request, polls, lastInsertRowid);
      return null;
    }
  }

  db.prepare("UPDATE captcha_solves SET status=?, polls=? WHERE id=?")
    .run("timeout", polls, lastInsertRowid);
  return null;
}

Troubleshooting

Issue Cause Fix
database is locked Concurrent writes without WAL mode Add PRAGMA journal_mode=WAL
Database file grows large No cleanup routine Run cleanup_old_records() daily
Slow queries on large datasets Missing indexes Add indexes on submitted_at and type
Token cache stale Expired tokens not cleaned Cleanup expired entries before lookup

FAQ

Can SQLite handle high-volume CAPTCHA solving?

SQLite handles up to ~1,000 writes/second with WAL mode enabled. For single-machine setups doing fewer than 1,000 solves/hour, it's reliable. Beyond that, consider PostgreSQL or MongoDB.

Should I store the full solution token?

Store it briefly for debugging, then let cleanup routines remove old records. Tokens expire in 90–120 seconds, so they're useless after that.

How do I migrate from SQLite to a production database?

Export with sqlite3 captcha_solves.db ".dump" and import into PostgreSQL or MongoDB. The schema translates directly — SQLite is a great development-to-production stepping stone.

Next Steps

Add lightweight solve tracking to your pipeline — get your CaptchaAI API key and start logging results today.

Related guides:

Full Working Code

Complete runnable examples for this article in Python, Node.js, PHP, Go, Java, C#, Ruby, Rust, Kotlin & Bash.

View on GitHub →

Discussions (0)

No comments yet.