Use Cases

Travel Fare Monitoring with CAPTCHA Handling

Travel booking sites aggressively block automated fare checks with CAPTCHAs and bot detection. CaptchaAI lets you monitor flight and hotel prices reliably through these defenses.

CAPTCHA Landscape on Travel Sites

Site Category CAPTCHA Type Difficulty
Airlines (direct) reCAPTCHA v3, Cloudflare Medium
OTAs (Expedia, Booking) reCAPTCHA v2, Turnstile Medium-High
Meta-search (Google Flights, Kayak) reCAPTCHA v3 Medium
Budget airlines Image CAPTCHA, reCAPTCHA Low-Medium
Hotel aggregators Cloudflare Challenge High

Fare Monitor Implementation

import requests
import time
import re
import json
import os
from datetime import datetime, timedelta

API_KEY = os.environ["CAPTCHAAI_API_KEY"]


def solve_captcha(params):
    params["key"] = API_KEY
    resp = requests.get("https://ocr.captchaai.com/in.php", params=params)
    if not resp.text.startswith("OK|"):
        raise Exception(f"Submit: {resp.text}")

    task_id = resp.text.split("|")[1]
    for _ in range(60):
        time.sleep(5)
        result = requests.get("https://ocr.captchaai.com/res.php", params={
            "key": API_KEY, "action": "get", "id": task_id,
        })
        if result.text == "CAPCHA_NOT_READY":
            continue
        if result.text.startswith("OK|"):
            return result.text.split("|", 1)[1]
        raise Exception(f"Solve: {result.text}")
    raise TimeoutError()


class FareMonitor:
    def __init__(self):
        self.session = requests.Session()
        self.session.headers["User-Agent"] = (
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
            "AppleWebKit/537.36 Chrome/120.0.0.0"
        )
        self.history = []

    def fetch_with_captcha(self, url):
        """Fetch a travel page, solving CAPTCHAs if encountered."""
        resp = self.session.get(url)

        # reCAPTCHA v2/v3
        match = re.search(
            r'data-sitekey=["\']([A-Za-z0-9_-]+)["\']', resp.text
        )
        if match:
            site_key = match.group(1)

            # Detect v3 vs v2
            if "recaptcha/api.js?render=" in resp.text:
                token = solve_captcha({
                    "method": "userrecaptcha",
                    "googlekey": site_key,
                    "pageurl": url,
                    "version": "v3",
                    "action": "search",
                })
            else:
                token = solve_captcha({
                    "method": "userrecaptcha",
                    "googlekey": site_key,
                    "pageurl": url,
                })

            resp = self.session.post(url, data={
                "g-recaptcha-response": token,
            })

        # Cloudflare Turnstile
        if "cf-turnstile" in resp.text:
            match = re.search(
                r'data-sitekey=["\']([^"\']+)', resp.text
            )
            if match:
                token = solve_captcha({
                    "method": "turnstile",
                    "sitekey": match.group(1),
                    "pageurl": url,
                })
                resp = self.session.post(url, data={
                    "cf-turnstile-response": token,
                })

        return resp.text

    def check_fares(self, routes):
        """Check fares for a list of routes."""
        results = []
        for route in routes:
            try:
                html = self.fetch_with_captcha(route["url"])
                prices = self._extract_prices(html)

                result = {
                    "route": f"{route['origin']}-{route['destination']}",
                    "date": route["date"],
                    "prices": prices,
                    "min_price": min(prices) if prices else None,
                    "timestamp": datetime.utcnow().isoformat(),
                }
                results.append(result)
                self.history.append(result)

                if prices:
                    print(f"  {result['route']} ({route['date']}): "
                          f"${min(prices)}-${max(prices)}")
                else:
                    print(f"  {result['route']}: No prices found")

                time.sleep(3)  # Respectful delay

            except Exception as e:
                print(f"  {route.get('origin', '?')}-"
                      f"{route.get('destination', '?')}: ERROR - {e}")

        return results

    def _extract_prices(self, html):
        """Extract prices from travel page HTML."""
        prices = []
        # Common price patterns
        for match in re.finditer(
            r'\$\s*([\d,]+(?:\.\d{2})?)', html
        ):
            price = float(match.group(1).replace(",", ""))
            if 20 < price < 10000:  # Filter noise
                prices.append(price)
        return sorted(set(prices))

    def detect_price_drops(self, threshold_pct=5):
        """Detect significant price drops in history."""
        route_prices = {}
        for entry in self.history:
            key = f"{entry['route']}_{entry['date']}"
            if key not in route_prices:
                route_prices[key] = []
            if entry["min_price"]:
                route_prices[key].append(entry["min_price"])

        alerts = []
        for key, prices in route_prices.items():
            if len(prices) >= 2:
                prev = prices[-2]
                current = prices[-1]
                change_pct = ((current - prev) / prev) * 100
                if change_pct < -threshold_pct:
                    alerts.append({
                        "route": key,
                        "previous": prev,
                        "current": current,
                        "change": f"{change_pct:.1f}%",
                    })

        return alerts

    def export_report(self, filename="fare_report.json"):
        """Export fare history to JSON."""
        with open(filename, "w") as f:
            json.dump(self.history, f, indent=2)
        print(f"Exported {len(self.history)} fare checks to {filename}")


# Define routes to monitor
routes = [
    {
        "origin": "JFK",
        "destination": "LAX",
        "date": "2025-03-15",
        "url": "https://example-airline.com/flights?from=JFK&to=LAX&date=2025-03-15",
    },
    {
        "origin": "SFO",
        "destination": "ORD",
        "date": "2025-03-20",
        "url": "https://example-airline.com/flights?from=SFO&to=ORD&date=2025-03-20",
    },
]

monitor = FareMonitor()
results = monitor.check_fares(routes)
monitor.export_report()

Scheduling

# Check fares every 4 hours
0 */4 * * * cd /opt/fare-monitor && python fare_monitor.py

Cost Analysis

Monitoring Level Routes Checks/Day CAPTCHAs/Day Est. Cost
Personal 5 6/route ~30 $0.50
Small Agency 50 4/route ~200 $2-5
Enterprise 500 6/route ~3,000 $20-40

FAQ

How often should I check fares?

Every 4-6 hours for personal use. Every 1-2 hours for business use. Airlines update prices in batches, so more frequent checks yield diminishing returns.

Can I monitor hotel prices too?

Yes. The same approach works for Booking.com, Expedia, and hotel direct sites. Adjust the price extraction patterns for hotel page formats.

How do I handle dynamic pricing pages?

Some travel sites require JavaScript rendering. Use Selenium or Playwright for the page fetch, then CaptchaAI for CAPTCHA solving.

Full Working Code

Complete runnable examples for this article in Python, Node.js, PHP, Go, Java, C#, Ruby, Rust, Kotlin & Bash.

View on GitHub →

Discussions (0)

No comments yet.