Integrations

Colly + CaptchaAI: Go-Based Scraping with CAPTCHA Solving

Colly is a popular Go web scraping framework. Here's how to integrate CaptchaAI to handle CAPTCHAs in your Go scrapers.


CaptchaAI Client in Go

package captchaai

import (
    "encoding/json"
    "errors"
    "fmt"
    "net/http"
    "net/url"
    "strings"
    "time"
)

type Client struct {
    APIKey     string
    HTTPClient *http.Client
}

type apiResponse struct {
    Status  int    `json:"status"`
    Request string `json:"request"`
}

func NewClient(apiKey string) *Client {
    return &Client{
        APIKey: apiKey,
        HTTPClient: &http.Client{Timeout: 30 * time.Second},
    }
}

func (c *Client) SolveRecaptchaV2(sitekey, pageurl string) (string, error) {
    // Submit task
    data := url.Values{
        "key":       {c.APIKey},
        "method":    {"userrecaptcha"},
        "googlekey": {sitekey},
        "pageurl":   {pageurl},
        "json":      {"1"},
    }

    resp, err := c.HTTPClient.PostForm("https://ocr.captchaai.com/in.php", data)
    if err != nil {
        return "", fmt.Errorf("submit error: %w", err)
    }
    defer resp.Body.Close()

    var result apiResponse
    if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
        return "", fmt.Errorf("decode error: %w", err)
    }

    if result.Status != 1 {
        return "", fmt.Errorf("submit failed: %s", result.Request)
    }

    taskID := result.Request

    // Poll for result
    time.Sleep(15 * time.Second)

    for i := 0; i < 24; i++ {
        pollURL := fmt.Sprintf(
            "https://ocr.captchaai.com/res.php?key=%s&action=get&id=%s&json=1",
            c.APIKey, taskID,
        )
        resp, err := c.HTTPClient.Get(pollURL)
        if err != nil {
            time.Sleep(5 * time.Second)
            continue
        }

        var pollResult apiResponse
        json.NewDecoder(resp.Body).Decode(&pollResult)
        resp.Body.Close()

        if pollResult.Status == 1 {
            return pollResult.Request, nil
        }
        if pollResult.Request != "CAPCHA_NOT_READY" {
            return "", fmt.Errorf("solve error: %s", pollResult.Request)
        }

        time.Sleep(5 * time.Second)
    }

    return "", errors.New("solve timeout")
}

Colly Integration

package main

import (
    "fmt"
    "log"
    "os"
    "strings"

    "github.com/gocolly/colly/v2"
)

func main() {
    apiKey := os.Getenv("CAPTCHAAI_API_KEY")
    solver := captchaai.NewClient(apiKey)

    c := colly.NewCollector(
        colly.AllowedDomains("example.com"),
        colly.MaxDepth(2),
    )

    // Detect CAPTCHA pages
    c.OnHTML("[data-sitekey]", func(e *colly.HTMLElement) {
        sitekey := e.Attr("data-sitekey")
        pageURL := e.Request.URL.String()

        log.Printf("CAPTCHA detected on %s, solving...", pageURL)

        token, err := solver.SolveRecaptchaV2(sitekey, pageURL)
        if err != nil {
            log.Printf("Solve failed: %v", err)
            return
        }

        log.Printf("CAPTCHA solved, token length: %d", len(token))

        // Post form with token
        err = c.Post(pageURL, map[string]string{
            "g-recaptcha-response": token,
        })
        if err != nil {
            log.Printf("Form submit failed: %v", err)
        }
    })

    // Extract data
    c.OnHTML("table tr", func(e *colly.HTMLElement) {
        cols := []string{}
        e.ForEach("td", func(_ int, td *colly.HTMLElement) {
            cols = append(cols, strings.TrimSpace(td.Text))
        })
        if len(cols) > 0 {
            fmt.Printf("Row: %s\n", strings.Join(cols, " | "))
        }
    })

    c.OnError(func(r *colly.Response, err error) {
        log.Printf("Error %s: %v", r.Request.URL, err)
    })

    c.Visit("https://example.com/data")
}

Colly with Rate Limiting

package main

import (
    "time"

    "github.com/gocolly/colly/v2"
)

func main() {
    c := colly.NewCollector()

    // Rate limit: 1 request per 3 seconds per domain
    c.Limit(&colly.LimitRule{
        DomainGlob:  "*",
        Parallelism: 1,
        Delay:       3 * time.Second,
        RandomDelay: 2 * time.Second,
    })

    // ... CAPTCHA handling as above ...

    c.Visit("https://example.com")
}

Turnstile Solving in Go

func (c *Client) SolveTurnstile(sitekey, pageurl string) (string, error) {
    data := url.Values{
        "key":       {c.APIKey},
        "method":    {"turnstile"},
        "sitekey":   {sitekey},
        "pageurl":   {pageurl},
        "json":      {"1"},
    }

    resp, err := c.HTTPClient.PostForm("https://ocr.captchaai.com/in.php", data)
    if err != nil {
        return "", fmt.Errorf("submit error: %w", err)
    }
    defer resp.Body.Close()

    var result apiResponse
    json.NewDecoder(resp.Body).Decode(&result)

    if result.Status != 1 {
        return "", fmt.Errorf("submit failed: %s", result.Request)
    }

    // Poll (same as reCAPTCHA)
    time.Sleep(5 * time.Second)
    for i := 0; i < 20; i++ {
        pollURL := fmt.Sprintf(
            "https://ocr.captchaai.com/res.php?key=%s&action=get&id=%s&json=1",
            c.APIKey, result.Request,
        )
        resp, err := c.HTTPClient.Get(pollURL)
        if err != nil {
            time.Sleep(3 * time.Second)
            continue
        }

        var pr apiResponse
        json.NewDecoder(resp.Body).Decode(&pr)
        resp.Body.Close()

        if pr.Status == 1 {
            return pr.Request, nil
        }
        if pr.Request != "CAPCHA_NOT_READY" {
            return "", fmt.Errorf("error: %s", pr.Request)
        }
        time.Sleep(3 * time.Second)
    }

    return "", errors.New("timeout")
}

FAQ

Why use Colly over other Go scrapers?

Colly is the most popular Go scraping framework with built-in caching, rate limiting, and concurrent request handling. It pairs well with CaptchaAI's HTTP API.

Can I use Colly with headless browsers?

For pages requiring JavaScript rendering, use chromedp or rod alongside Colly. Use Colly for static pages and the headless browser for CAPTCHA-protected dynamic pages.

Is CaptchaAI's API compatible with Go?

Yes. CaptchaAI uses standard HTTP endpoints that work with Go's net/http package. No SDK is required.



Add CAPTCHA solving to your Go scrapers — get CaptchaAI.

Full Working Code

Complete runnable examples for this article in Python, Node.js, PHP, Go, Java, C#, Ruby, Rust, Kotlin & Bash.

View on GitHub →

Discussions (0)

No comments yet.