Integrations

Oxylabs + CaptchaAI: Datacenter Proxy Integration

Oxylabs offers 100M+ residential IPs and 2M+ datacenter IPs — known for reliability and enterprise-grade infrastructure. This guide shows how to integrate Oxylabs proxies with CaptchaAI for CAPTCHA-protected scraping workflows.


Oxylabs Proxy Products

Product IPs Speed Use Case
Datacenter Proxies 2M+ Very fast High-volume, speed-first
Residential Proxies 100M+ Medium CAPTCHA-sensitive sites
ISP Proxies Residential-grade Fast E-commerce, social
Mobile Proxies 20M+ Slow Mobile-specific targets
SERP Scraper API Managed Fast Google/Bing results

Python Integration

Requests

import requests
import time

OXYLABS_USER = "customer-USERNAME"
OXYLABS_PASS = "PASSWORD"
OXYLABS_ENDPOINT = "pr.oxylabs.io:7777"

CAPTCHAAI_KEY = "YOUR_API_KEY"
CAPTCHAAI_URL = "https://ocr.captchaai.com"

# Residential rotating proxy
proxies = {
    "http": f"http://{OXYLABS_USER}:{OXYLABS_PASS}@{OXYLABS_ENDPOINT}",
    "https": f"http://{OXYLABS_USER}:{OXYLABS_PASS}@{OXYLABS_ENDPOINT}",
}


def fetch_page(url, country=None):
    """Fetch page through Oxylabs proxy."""
    user = OXYLABS_USER
    if country:
        user += f"-cc-{country}"

    proxy = {
        "http": f"http://{user}:{OXYLABS_PASS}@{OXYLABS_ENDPOINT}",
        "https": f"http://{user}:{OXYLABS_PASS}@{OXYLABS_ENDPOINT}",
    }

    return requests.get(
        url,
        proxies=proxy,
        headers={
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
            "AppleWebKit/537.36 Chrome/126.0.0.0 Safari/537.36"
        },
        timeout=30,
    )


def solve_recaptcha(site_url, sitekey):
    """Solve via CaptchaAI."""
    resp = requests.post(f"{CAPTCHAAI_URL}/in.php", data={
        "key": CAPTCHAAI_KEY,
        "method": "userrecaptcha",
        "googlekey": sitekey,
        "pageurl": site_url,
        "json": 1,
    })
    data = resp.json()
    if data["status"] != 1:
        raise Exception(f"Submit: {data['request']}")

    task_id = data["request"]

    for _ in range(60):
        time.sleep(5)
        resp = requests.get(f"{CAPTCHAAI_URL}/res.php", params={
            "key": CAPTCHAAI_KEY,
            "action": "get",
            "id": task_id,
            "json": 1,
        })
        data = resp.json()
        if data["request"] == "CAPCHA_NOT_READY":
            continue
        if data["status"] == 1:
            return data["request"]
        raise Exception(f"Solve: {data['request']}")

    raise TimeoutError("Timeout")

Sticky Sessions

def get_oxylabs_sticky_proxy(session_id, country=None):
    """Create Oxylabs sticky session (same IP)."""
    user = OXYLABS_USER + f"-sessid-{session_id}"
    if country:
        user += f"-cc-{country}"

    proxy_url = f"http://{user}:{OXYLABS_PASS}@{OXYLABS_ENDPOINT}"
    return {"http": proxy_url, "https": proxy_url}


# CAPTCHA workflow with sticky IP
import random, string
session = "".join(random.choices(string.ascii_lowercase, k=8))
proxy = get_oxylabs_sticky_proxy(session, country="us")

# All requests use the same IP
resp = requests.get("https://target.com/form", proxies=proxy)
# ... solve CAPTCHA ...
resp = requests.post("https://target.com/submit", proxies=proxy, data={
    "g-recaptcha-response": token,
})

Oxylabs Web Scraper API + CaptchaAI

For sites where Oxylabs' built-in rendering isn't enough for CAPTCHAs:

def scrape_with_oxylabs_api(url):
    """Use Oxylabs Web Scraper API for rendering, CaptchaAI for CAPTCHAs."""
    resp = requests.post(
        "https://realtime.oxylabs.io/v1/queries",
        auth=(OXYLABS_USER, OXYLABS_PASS),
        json={
            "source": "universal",
            "url": url,
            "render": "html",
            "browser_instructions": [
                {"type": "wait", "wait_time_s": 3},
            ],
        },
    )

    result = resp.json()
    html = result["results"][0]["content"]

    # Check for CAPTCHA in rendered HTML
    import re
    sitekey_match = re.search(r'data-sitekey="([^"]+)"', html)

    if sitekey_match:
        sitekey = sitekey_match.group(1)
        token = solve_recaptcha(url, sitekey)
        return {"html": html, "captcha_solved": True, "token": token}

    return {"html": html, "captcha_solved": False}

Datacenter vs Residential for CAPTCHA

# Datacenter: Fast but higher CAPTCHA rate
DC_PROXY = {
    "http": f"http://{OXYLABS_USER}:{OXYLABS_PASS}@dc.pr.oxylabs.io:10000",
    "https": f"http://{OXYLABS_USER}:{OXYLABS_PASS}@dc.pr.oxylabs.io:10000",
}

# Residential: Slower but lower CAPTCHA rate
RES_PROXY = {
    "http": f"http://{OXYLABS_USER}:{OXYLABS_PASS}@pr.oxylabs.io:7777",
    "https": f"http://{OXYLABS_USER}:{OXYLABS_PASS}@pr.oxylabs.io:7777",
}


def smart_proxy_selection(url, captcha_sensitive=True):
    """Pick proxy type based on target site sensitivity."""
    if captcha_sensitive:
        return RES_PROXY  # Less likely to trigger CAPTCHA
    return DC_PROXY  # Faster, CaptchaAI handles any CAPTCHAs

Concurrent Pipeline

from concurrent.futures import ThreadPoolExecutor, as_completed

def process_url(url):
    session = "".join(random.choices(string.ascii_lowercase, k=8))
    proxy = get_oxylabs_sticky_proxy(session, country="us")

    try:
        resp = requests.get(url, proxies=proxy, timeout=30)

        import re
        match = re.search(r'data-sitekey="([^"]+)"', resp.text)

        if match:
            token = solve_recaptcha(url, match.group(1))
            return {"url": url, "status": "solved"}
        return {"url": url, "status": "no_captcha"}

    except Exception as e:
        return {"url": url, "status": "error", "error": str(e)}


urls = ["https://site1.com", "https://site2.com", "https://site3.com"]

with ThreadPoolExecutor(max_workers=5) as executor:
    futures = {executor.submit(process_url, u): u for u in urls}
    for future in as_completed(futures):
        print(future.result())

Troubleshooting

Issue Cause Fix
403 from proxy IP blacklisted Switch to residential or rotate
Auth failed Wrong endpoint Datacenter: dc.pr.oxylabs.io:10000, Residential: pr.oxylabs.io:7777
Sticky session expires Default 10 min Create new session ID
CAPTCHA on every page Datacenter IP recognized Use residential proxy
Slow response Residential network latency Accept or switch to ISP proxy

FAQ

Which Oxylabs product should I use with CaptchaAI?

Residential for CAPTCHA-heavy sites (fewer challenges). Datacenter for speed-first workflows where CaptchaAI handles all CAPTCHAs.

Can Oxylabs solve CAPTCHAs itself?

Oxylabs' Web Scraper API handles some JavaScript challenges but doesn't solve CAPTCHAs. Use CaptchaAI for reCAPTCHA, Turnstile, and other CAPTCHA types.

Is the pricing per GB or per request?

Residential: per GB. Datacenter: per IP. Check Oxylabs pricing page for current rates.



Combine Oxylabs' proxy infrastructure with CaptchaAI solving — get your API key.

Full Working Code

Complete runnable examples for this article in Python, Node.js, PHP, Go, Java, C#, Ruby, Rust, Kotlin & Bash.

View on GitHub →

Discussions (0)

No comments yet.