Use Cases

Puppeteer CAPTCHA Solving with Node.js and CaptchaAI

Puppeteer is the go-to headless browser for Node.js automation. When target sites serve CAPTCHAs, CaptchaAI's API solves them externally — Puppeteer extracts the parameters, CaptchaAI returns the token, and Puppeteer injects it back.

Requirements

Requirement Details
Node.js 16+ With npm
Puppeteer npm install puppeteer
axios npm install axios
CaptchaAI API key From captchaai.com

How It Works

  1. Puppeteer navigates to the page with the CAPTCHA
  2. Your script extracts the CAPTCHA site key from the DOM
  3. CaptchaAI solves the challenge server-side
  4. Your script injects the token and submits the form

Step 1: Create the Solver Module

// solver.js
const axios = require("axios");

const API_KEY = "YOUR_API_KEY";
const POLL_INTERVAL = 5000;
const MAX_ATTEMPTS = 60;

async function solveRecaptchaV2(siteKey, pageUrl) {
  // Submit task
  const submitResp = await axios.get("https://ocr.captchaai.com/in.php", {
    params: {
      key: API_KEY,
      method: "userrecaptcha",
      googlekey: siteKey,
      pageurl: pageUrl,
    },
  });

  if (!submitResp.data.startsWith("OK|")) {
    throw new Error(`Submit failed: ${submitResp.data}`);
  }

  const taskId = submitResp.data.split("|")[1];
  console.log(`Task submitted: ${taskId}`);

  // Poll for result
  for (let i = 0; i < MAX_ATTEMPTS; i++) {
    await new Promise((r) => setTimeout(r, POLL_INTERVAL));

    const result = await axios.get("https://ocr.captchaai.com/res.php", {
      params: { key: API_KEY, action: "get", id: taskId },
    });

    if (result.data === "CAPCHA_NOT_READY") continue;
    if (result.data.startsWith("OK|")) {
      return result.data.split("|")[1];
    }
    throw new Error(`Solve failed: ${result.data}`);
  }
  throw new Error("Solve timed out");
}

async function solveTurnstile(siteKey, pageUrl) {
  const submitResp = await axios.get("https://ocr.captchaai.com/in.php", {
    params: {
      key: API_KEY,
      method: "turnstile",
      sitekey: siteKey,
      pageurl: pageUrl,
    },
  });

  if (!submitResp.data.startsWith("OK|")) {
    throw new Error(`Submit failed: ${submitResp.data}`);
  }

  const taskId = submitResp.data.split("|")[1];

  for (let i = 0; i < MAX_ATTEMPTS; i++) {
    await new Promise((r) => setTimeout(r, POLL_INTERVAL));
    const result = await axios.get("https://ocr.captchaai.com/res.php", {
      params: { key: API_KEY, action: "get", id: taskId },
    });
    if (result.data === "CAPCHA_NOT_READY") continue;
    if (result.data.startsWith("OK|")) return result.data.split("|")[1];
    throw new Error(`Solve failed: ${result.data}`);
  }
  throw new Error("Solve timed out");
}

module.exports = { solveRecaptchaV2, solveTurnstile };

Step 2: Set Up Puppeteer with Stealth

const puppeteer = require("puppeteer");

async function createBrowser() {
  const browser = await puppeteer.launch({
    headless: "new",
    args: [
      "--no-sandbox",
      "--disable-setuid-sandbox",
      "--disable-blink-features=AutomationControlled",
    ],
  });

  const page = await browser.newPage();
  await page.setUserAgent(
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
  );

  // Hide automation indicators
  await page.evaluateOnNewDocument(() => {
    Object.defineProperty(navigator, "webdriver", { get: () => false });
  });

  return { browser, page };
}

Step 3: Solve reCAPTCHA on a Page

const { solveRecaptchaV2 } = require("./solver");

async function scrapeWithCaptcha(url) {
  const { browser, page } = await createBrowser();

  try {
    await page.goto(url, { waitUntil: "networkidle2" });

    // Extract site key
    const siteKey = await page.$eval(
      ".g-recaptcha",
      (el) => el.getAttribute("data-sitekey")
    );
    console.log("Site key:", siteKey);

    // Solve with CaptchaAI
    const token = await solveRecaptchaV2(siteKey, url);
    console.log("Token received:", token.substring(0, 50));

    // Inject token
    await page.evaluate((token) => {
      document.getElementById("g-recaptcha-response").innerHTML = token;
      document.getElementById("g-recaptcha-response").style.display = "";
    }, token);

    // Submit the form
    await page.click('button[type="submit"]');
    await page.waitForNavigation({ waitUntil: "networkidle2" });

    // Scrape the content
    const content = await page.content();
    console.log("Page loaded successfully");
    return content;
  } finally {
    await browser.close();
  }
}

Step 4: Handle Callbacks

Some sites use JavaScript callbacks instead of form submission:

// Trigger the reCAPTCHA callback
await page.evaluate((token) => {
  // Method 1: Direct callback
  if (typeof ___grecaptcha_cfg !== "undefined") {
    const clients = ___grecaptcha_cfg.clients;
    Object.keys(clients).forEach((key) => {
      const client = clients[key];
      // Find the callback function
      const findCallback = (obj) => {
        for (const prop in obj) {
          if (typeof obj[prop] === "function") {
            obj[prop](token);
            return true;
          }
          if (typeof obj[prop] === "object" && obj[prop] !== null) {
            if (findCallback(obj[prop])) return true;
          }
        }
        return false;
      };
      findCallback(client);
    });
  }
}, token);

Full Working Example

const puppeteer = require("puppeteer");
const axios = require("axios");

const API_KEY = "YOUR_API_KEY";

async function solveCaptcha(siteKey, pageUrl) {
  const submit = await axios.get("https://ocr.captchaai.com/in.php", {
    params: {
      key: API_KEY,
      method: "userrecaptcha",
      googlekey: siteKey,
      pageurl: pageUrl,
    },
  });
  const taskId = submit.data.split("|")[1];

  while (true) {
    await new Promise((r) => setTimeout(r, 5000));
    const result = await axios.get("https://ocr.captchaai.com/res.php", {
      params: { key: API_KEY, action: "get", id: taskId },
    });
    if (result.data === "CAPCHA_NOT_READY") continue;
    if (result.data.startsWith("OK|")) return result.data.split("|")[1];
    throw new Error(result.data);
  }
}

(async () => {
  const browser = await puppeteer.launch({
    headless: "new",
    args: ["--disable-blink-features=AutomationControlled"],
  });
  const page = await browser.newPage();

  try {
    await page.goto("https://example.com/login", {
      waitUntil: "networkidle2",
    });

    // Get the site key
    const siteKey = await page.$eval(".g-recaptcha", (el) =>
      el.getAttribute("data-sitekey")
    );

    // Solve
    const token = await solveCaptcha(siteKey, page.url());

    // Inject and submit
    await page.evaluate((t) => {
      document.getElementById("g-recaptcha-response").innerHTML = t;
    }, token);

    await page.click("#submit-btn");
    await page.waitForNavigation();

    console.log("Done:", page.url());
  } finally {
    await browser.close();
  }
})();

Troubleshooting

Issue Cause Fix
page.$eval fails CAPTCHA loads after initial render Use page.waitForSelector('.g-recaptcha')
Token doesn't work Expired before submission Inject immediately after receiving
Site detects Puppeteer Missing stealth config Use puppeteer-extra-plugin-stealth
Navigation timeout Page didn't navigate after submit Check if site uses AJAX instead of form post

FAQ

Should I use headless or headed mode?

Headless mode works fine with CaptchaAI since the CAPTCHA is solved server-side. Use headed mode only for debugging.

Can I use Puppeteer with Cloudflare Turnstile?

Yes. Extract the data-sitekey from the .cf-turnstile div and use method=turnstile with CaptchaAI. See the solveTurnstile function above.

How do I handle multiple CAPTCHAs on one page?

Extract each site key separately and solve them in parallel using Promise.all().

Full Working Code

Complete runnable examples for this article in Python, Node.js, PHP, Go, Java, C#, Ruby, Rust, Kotlin & Bash.

View on GitHub →

Discussions (0)

No comments yet.