Use Cases

Supply Chain Monitoring with CAPTCHA Handling

Supply chain visibility requires data from hundreds of supplier portals, logistics platforms, and inventory systems. Many of these protect their data behind CAPTCHAs. CaptchaAI handles these challenges so monitoring pipelines run uninterrupted.


Where CAPTCHAs Block Supply Chain Data

Source Type CAPTCHA Type Data Frequency
Supplier portals reCAPTCHA v2 Inventory, pricing, lead times Daily
Shipping carriers Cloudflare Turnstile Tracking, rates, delivery ETAs Hourly
Manufacturer catalogs Image CAPTCHA Product specs, MOQs Weekly
Customs portals reCAPTCHA v2 Duty rates, tariff codes Daily
Port authorities Image CAPTCHA Vessel schedules, port congestion Every 6 hours
Commodity exchanges reCAPTCHA v3 Spot prices, futures Real-time

Multi-Supplier Monitor

import requests
import time
import re
import json
import base64
from datetime import datetime

CAPTCHAAI_KEY = "YOUR_API_KEY"
CAPTCHAAI_URL = "https://ocr.captchaai.com"


def solve_recaptcha(sitekey, pageurl):
    resp = requests.post(f"{CAPTCHAAI_URL}/in.php", data={
        "key": CAPTCHAAI_KEY, "method": "userrecaptcha",
        "googlekey": sitekey, "pageurl": pageurl, "json": 1,
    })
    task_id = resp.json()["request"]
    for _ in range(60):
        time.sleep(5)
        result = requests.get(f"{CAPTCHAAI_URL}/res.php", params={
            "key": CAPTCHAAI_KEY, "action": "get",
            "id": task_id, "json": 1,
        })
        data = result.json()
        if data["request"] != "CAPCHA_NOT_READY":
            return data["request"]
    raise TimeoutError("Timeout")


def solve_turnstile(sitekey, pageurl):
    resp = requests.post(f"{CAPTCHAAI_URL}/in.php", data={
        "key": CAPTCHAAI_KEY, "method": "turnstile",
        "sitekey": sitekey, "pageurl": pageurl, "json": 1,
    })
    task_id = resp.json()["request"]
    for _ in range(60):
        time.sleep(5)
        result = requests.get(f"{CAPTCHAAI_URL}/res.php", params={
            "key": CAPTCHAAI_KEY, "action": "get",
            "id": task_id, "json": 1,
        })
        data = result.json()
        if data["request"] != "CAPCHA_NOT_READY":
            return data["request"]
    raise TimeoutError("Timeout")


def solve_image(image_bytes):
    img_b64 = base64.b64encode(image_bytes).decode()
    resp = requests.post(f"{CAPTCHAAI_URL}/in.php", data={
        "key": CAPTCHAAI_KEY, "method": "base64",
        "body": img_b64, "json": 1,
    })
    task_id = resp.json()["request"]
    for _ in range(20):
        time.sleep(3)
        result = requests.get(f"{CAPTCHAAI_URL}/res.php", params={
            "key": CAPTCHAAI_KEY, "action": "get",
            "id": task_id, "json": 1,
        })
        data = result.json()
        if data["request"] != "CAPCHA_NOT_READY":
            return data["request"]
    raise TimeoutError("Timeout")


class SupplyChainMonitor:
    def __init__(self, suppliers, proxy=None):
        self.suppliers = suppliers
        self.session = requests.Session()
        if proxy:
            self.session.proxies = {"http": proxy, "https": proxy}
        self.session.headers.update({
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
            "AppleWebKit/537.36 Chrome/126.0.0.0 Safari/537.36",
        })

    def check_all(self):
        """Check inventory and pricing across all suppliers."""
        report = {
            "timestamp": datetime.now().isoformat(),
            "suppliers": {},
        }

        for supplier in self.suppliers:
            try:
                data = self._check_supplier(supplier)
                report["suppliers"][supplier["name"]] = {
                    "status": "success",
                    "data": data,
                }
            except Exception as e:
                report["suppliers"][supplier["name"]] = {
                    "status": "error",
                    "error": str(e),
                }
            time.sleep(3)

        return report

    def _check_supplier(self, supplier):
        url = supplier["url"]
        resp = self.session.get(url, timeout=30)

        # Handle CAPTCHA based on type
        captcha_type = supplier.get("captcha_type")
        if captcha_type and self._has_captcha(resp.text):
            resp = self._solve_captcha(resp, url, supplier)

        from bs4 import BeautifulSoup
        soup = BeautifulSoup(resp.text, "html.parser")

        return {
            "products": self._extract_inventory(soup),
            "last_updated": self._extract_date(soup),
        }

    def _has_captcha(self, html):
        return any(tag in html.lower() for tag in [
            'data-sitekey', 'g-recaptcha', 'cf-turnstile', 'captcha',
        ])

    def _solve_captcha(self, resp, url, supplier):
        captcha_type = supplier.get("captcha_type", "recaptcha")
        sitekey = supplier.get("sitekey", "")

        if not sitekey:
            match = re.search(r'data-sitekey="([^"]+)"', resp.text)
            sitekey = match.group(1) if match else ""

        if captcha_type == "turnstile":
            token = solve_turnstile(sitekey, url)
            return self.session.post(url, data={"cf-turnstile-response": token})
        elif captcha_type == "image":
            match = re.search(r'src="(/captcha[^"]+)"', resp.text)
            if match:
                img_resp = self.session.get(url.rstrip("/") + match.group(1))
                answer = solve_image(img_resp.content)
                return self.session.post(url, data={"captcha": answer})
        else:
            token = solve_recaptcha(sitekey, url)
            return self.session.post(url, data={"g-recaptcha-response": token})

        return resp

    def _extract_inventory(self, soup):
        items = []
        for row in soup.select("table.inventory tr, .product-row"):
            cols = row.select("td, .col")
            if len(cols) >= 3:
                items.append({
                    "sku": cols[0].get_text(strip=True),
                    "stock": cols[1].get_text(strip=True),
                    "price": cols[2].get_text(strip=True),
                })
        return items

    def _extract_date(self, soup):
        date_el = soup.select_one(".last-updated, .update-time")
        return date_el.get_text(strip=True) if date_el else ""


# Configure suppliers
suppliers = [
    {
        "name": "Supplier A",
        "url": "https://supplier-a.example.com/inventory",
        "captcha_type": "recaptcha",
        "sitekey": "6Lc_xxxxxxx",
    },
    {
        "name": "Carrier B",
        "url": "https://carrier-b.example.com/rates",
        "captcha_type": "turnstile",
        "sitekey": "0x4AAAAAAA_xxx",
    },
    {
        "name": "Manufacturer C",
        "url": "https://manufacturer-c.example.com/catalog",
        "captcha_type": "image",
    },
]

monitor = SupplyChainMonitor(
    suppliers=suppliers,
    proxy="http://user:pass@residential.proxy.com:5000",
)
report = monitor.check_all()
print(json.dumps(report, indent=2))

Shipping Rate Monitoring

class ShippingRateTracker:
    def __init__(self, proxy=None):
        self.session = requests.Session()
        if proxy:
            self.session.proxies = {"http": proxy, "https": proxy}
        self.session.headers.update({
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
            "AppleWebKit/537.36 Chrome/126.0.0.0 Safari/537.36",
        })

    def get_rates(self, carrier_url, origin, destination, weight):
        """Fetch shipping rates, handling Turnstile CAPTCHA."""
        resp = self.session.get(carrier_url, timeout=30)

        sitekey_match = re.search(r'data-sitekey="([^"]+)"', resp.text)
        if sitekey_match:
            token = solve_turnstile(sitekey_match.group(1), carrier_url)
            resp = self.session.post(carrier_url, data={
                "origin": origin,
                "destination": destination,
                "weight": weight,
                "cf-turnstile-response": token,
            })

        if resp.status_code == 200:
            return resp.json().get("rates", [])
        return []

Alerting on Stock Changes

def monitor_with_alerts(monitor, alert_thresholds, check_interval=3600):
    """Continuously monitor and alert on inventory changes."""
    previous_data = {}

    while True:
        report = monitor.check_all()

        for supplier, info in report["suppliers"].items():
            if info["status"] != "success":
                continue

            for product in info["data"].get("products", []):
                sku = product["sku"]
                stock = product.get("stock", "")

                # Parse stock level
                try:
                    stock_qty = int(re.sub(r'\D', '', stock))
                except ValueError:
                    continue

                key = f"{supplier}:{sku}"
                prev_qty = previous_data.get(key, stock_qty)

                threshold = alert_thresholds.get(sku, 10)
                if stock_qty < threshold and prev_qty >= threshold:
                    print(f"ALERT: {supplier} - {sku} dropped to {stock_qty}")

                previous_data[key] = stock_qty

        time.sleep(check_interval)

Troubleshooting

Issue Cause Fix
Supplier page layout changed Site redesign Update CSS selectors
CAPTCHA on every check Checking too frequently Increase interval between checks
Session expires mid-check Portal timeout Use sticky session, check faster
Rate data missing Login required Add authentication step
Wrong prices displayed Geo-based pricing Match proxy location to market

FAQ

How often should I check supplier inventory?

Daily for most suppliers. Hourly for critical components during supply shortages. Too-frequent checks trigger CAPTCHAs faster.

Can I monitor hundreds of suppliers?

Yes. Rotate across suppliers with delays between each. Use rotating proxies to distribute the load across IPs.

Which CAPTCHA type is most common on supply chain sites?

reCAPTCHA v2 on supplier portals, Cloudflare Turnstile on logistics/carrier sites. Older manufacturer sites often use image CAPTCHAs.



Keep your supply chain visible — get your CaptchaAI key and automate data collection across all supplier portals.

Full Working Code

Complete runnable examples for this article in Python, Node.js, PHP, Go, Java, C#, Ruby, Rust, Kotlin & Bash.

View on GitHub →

Discussions (0)

No comments yet.