Use Cases

Shipping and Logistics Rate Scraping with CAPTCHA Solving

Shipping carriers and freight platforms protect rate calculators and tracking portals with CAPTCHAs. Logistics teams and comparison platforms need automated access to compare rates, track shipments, and monitor delivery performance across carriers.


CAPTCHAs in Logistics

Carrier Type CAPTCHA Protected Pages Use Case
Parcel carriers (UPS, FedEx) reCAPTCHA v2 Rate calculator, tracking Rate comparison
Freight brokers Cloudflare Turnstile Quote requests Freight rate shopping
Ocean carriers Image CAPTCHA Vessel tracking Container tracking
Customs portals reCAPTCHA v2 Duty calculators Duty estimation
Last-mile delivery reCAPTCHA v2 Tracking pages Delivery monitoring
Postal services Image CAPTCHA Rate calculators International rates

Multi-Carrier Rate Scraper

import requests
import time
import re
import base64
from bs4 import BeautifulSoup

CAPTCHAAI_KEY = "YOUR_API_KEY"
CAPTCHAAI_URL = "https://ocr.captchaai.com"


def solve_recaptcha(sitekey, pageurl):
    resp = requests.post(f"{CAPTCHAAI_URL}/in.php", data={
        "key": CAPTCHAAI_KEY, "method": "userrecaptcha",
        "googlekey": sitekey, "pageurl": pageurl, "json": 1,
    })
    task_id = resp.json()["request"]
    for _ in range(60):
        time.sleep(5)
        result = requests.get(f"{CAPTCHAAI_URL}/res.php", params={
            "key": CAPTCHAAI_KEY, "action": "get",
            "id": task_id, "json": 1,
        })
        data = result.json()
        if data["request"] != "CAPCHA_NOT_READY":
            return data["request"]
    raise TimeoutError("Timeout")


def solve_turnstile(sitekey, pageurl):
    resp = requests.post(f"{CAPTCHAAI_URL}/in.php", data={
        "key": CAPTCHAAI_KEY, "method": "turnstile",
        "sitekey": sitekey, "pageurl": pageurl, "json": 1,
    })
    task_id = resp.json()["request"]
    for _ in range(60):
        time.sleep(5)
        result = requests.get(f"{CAPTCHAAI_URL}/res.php", params={
            "key": CAPTCHAAI_KEY, "action": "get",
            "id": task_id, "json": 1,
        })
        data = result.json()
        if data["request"] != "CAPCHA_NOT_READY":
            return data["request"]
    raise TimeoutError("Timeout")


class ShippingRateScraper:
    def __init__(self, proxy=None):
        self.session = requests.Session()
        if proxy:
            self.session.proxies = {"http": proxy, "https": proxy}
        self.session.headers.update({
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
            "AppleWebKit/537.36 Chrome/126.0.0.0 Safari/537.36",
        })

    def get_rate(self, carrier, shipment):
        """Get shipping rate from a single carrier."""
        url = carrier["rate_url"]
        resp = self.session.get(url, timeout=30)

        # Handle CAPTCHA
        sitekey = self._extract_sitekey(resp.text)
        if sitekey:
            if carrier.get("captcha_type") == "turnstile":
                token = solve_turnstile(sitekey, url)
                field = "cf-turnstile-response"
            else:
                token = solve_recaptcha(sitekey, url)
                field = "g-recaptcha-response"

            resp = self.session.post(url, data={
                **shipment,
                field: token,
            })
        else:
            resp = self.session.post(url, data=shipment)

        if resp.status_code == 200:
            return self._parse_rates(resp.text)
        return None

    def compare_rates(self, carriers, shipment):
        """Compare rates across multiple carriers."""
        results = []

        for carrier in carriers:
            try:
                rates = self.get_rate(carrier, shipment)
                results.append({
                    "carrier": carrier["name"],
                    "status": "success",
                    "rates": rates,
                })
            except Exception as e:
                results.append({
                    "carrier": carrier["name"],
                    "status": "error",
                    "error": str(e),
                })
            time.sleep(3)

        # Sort by cheapest
        successful = [r for r in results if r["rates"]]
        if successful:
            successful.sort(
                key=lambda x: float(
                    re.sub(r'[^\d.]', '', x["rates"][0].get("price", "9999"))
                )
            )

        return {
            "shipment": shipment,
            "results": results,
            "cheapest": successful[0] if successful else None,
        }

    def _extract_sitekey(self, html):
        match = re.search(r'data-sitekey="([^"]+)"', html)
        return match.group(1) if match else None

    def _parse_rates(self, html):
        soup = BeautifulSoup(html, "html.parser")
        rates = []
        for row in soup.select(".rate-option, .shipping-option, tr.rate"):
            service = row.select_one(".service-name, td:first-child")
            price = row.select_one(".price, .rate-amount, td:nth-child(2)")
            eta = row.select_one(".delivery-time, .eta, td:nth-child(3)")
            if service:
                rates.append({
                    "service": service.get_text(strip=True),
                    "price": price.get_text(strip=True) if price else "",
                    "eta": eta.get_text(strip=True) if eta else "",
                })
        return rates


# Usage
scraper = ShippingRateScraper(
    proxy="http://user:pass@residential.proxy.com:5000"
)

carriers = [
    {"name": "Carrier A", "rate_url": "https://carrier-a.example.com/rates", "captcha_type": "recaptcha"},
    {"name": "Carrier B", "rate_url": "https://carrier-b.example.com/rates", "captcha_type": "turnstile"},
    {"name": "Carrier C", "rate_url": "https://carrier-c.example.com/rates", "captcha_type": "recaptcha"},
]

shipment = {
    "origin_zip": "10001",
    "dest_zip": "90210",
    "weight": "10",
    "length": "12",
    "width": "8",
    "height": "6",
}

comparison = scraper.compare_rates(carriers, shipment)
print(f"Cheapest: {comparison['cheapest']}")

Bulk Tracking Monitor

class ShipmentTracker:
    def __init__(self, proxy=None):
        self.session = requests.Session()
        if proxy:
            self.session.proxies = {"http": proxy, "https": proxy}
        self.session.headers.update({
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
            "AppleWebKit/537.36 Chrome/126.0.0.0 Safari/537.36",
        })

    def track(self, carrier_url, tracking_number, sitekey=None):
        """Track a single shipment."""
        resp = self.session.get(carrier_url, timeout=30)

        data = {"tracking_number": tracking_number}

        if sitekey:
            token = solve_recaptcha(sitekey, carrier_url)
            data["g-recaptcha-response"] = token

        resp = self.session.post(carrier_url, data=data)

        if resp.status_code == 200:
            soup = BeautifulSoup(resp.text, "html.parser")
            return {
                "tracking": tracking_number,
                "status": self._text(soup, ".status, .tracking-status"),
                "location": self._text(soup, ".location, .current-location"),
                "eta": self._text(soup, ".eta, .estimated-delivery"),
                "events": self._parse_events(soup),
            }
        return None

    def bulk_track(self, carrier_url, tracking_numbers, sitekey=None):
        """Track multiple shipments."""
        results = []
        for tn in tracking_numbers:
            try:
                info = self.track(carrier_url, tn, sitekey)
                results.append(info or {"tracking": tn, "status": "not found"})
            except Exception as e:
                results.append({"tracking": tn, "error": str(e)})
            time.sleep(3)
        return results

    def _text(self, soup, selector):
        el = soup.select_one(selector)
        return el.get_text(strip=True) if el else ""

    def _parse_events(self, soup):
        events = []
        for event in soup.select(".tracking-event, .event-row"):
            events.append({
                "date": self._text(event, ".date, .event-date"),
                "description": self._text(event, ".desc, .event-desc"),
                "location": self._text(event, ".loc, .event-location"),
            })
        return events

Rate Monitoring Schedule

Data Type Frequency Volume
Domestic parcel rates Weekly 50-100 lane pairs
International rates Monthly 20-50 country pairs
LTL freight Daily 10-30 lane pairs
Ocean freight Weekly 10-20 port pairs
Shipment tracking Every 4 hours Per active shipment

Troubleshooting

Issue Cause Fix
Rate calculator returns blank CAPTCHA not solved before submit Solve CAPTCHA first, then submit form
Tracking shows "not found" Wrong carrier or delayed scan Verify tracking number format
Different rate than website Missing surcharges/fees Check for fuel surcharge fields
Session timeout on rate form Multi-step rate flow took too long Use sticky proxy, solve faster
IP blocked by carrier Too many rate requests Rate limit to 20-30 quotes/hour

FAQ

How often should I check shipping rates?

Weekly for general rate monitoring. Daily for time-sensitive freight decisions. More frequent checks trigger CAPTCHAs faster.

Can I compare rates automatically?

Yes. Use the multi-carrier comparison approach above. Submit the same shipment details to each carrier and compare responses.

Which carriers have the hardest CAPTCHAs?

UPS and FedEx use reCAPTCHA v2 on rate calculators. Freight brokers behind Cloudflare require Turnstile solving. Ocean carriers often use simpler image CAPTCHAs.



Compare shipping rates across carriers — get your CaptchaAI key and automate logistics data collection.

Full Working Code

Complete runnable examples for this article in Python, Node.js, PHP, Go, Java, C#, Ruby, Rust, Kotlin & Bash.

View on GitHub →

Discussions (0)

No comments yet.