API Tutorials

CAPTCHA Image Preprocessing for Better Solve Rates

Preprocessing CAPTCHA images before submitting to CaptchaAI can improve solve rates on challenging images. These Python techniques clean up noise, improve contrast, and help the solver focus on the text.


When to Preprocess

Most CAPTCHAs solve fine without preprocessing. Consider it when:

  • Solve rate drops below 80% on a specific site
  • CAPTCHA images have heavy background noise
  • Text is very faint or low contrast
  • Background and text colors are similar

Grayscale Conversion

# grayscale.py
from PIL import Image
import base64
import io


def to_grayscale(image_path):
    """Convert to grayscale — simplifies analysis."""
    img = Image.open(image_path)
    gray = img.convert("L")
    return gray


def image_to_base64(img):
    """Convert PIL Image to base64 string."""
    buffer = io.BytesIO()
    img.save(buffer, format="PNG")
    return base64.b64encode(buffer.getvalue()).decode("ascii")

Contrast Enhancement

# contrast.py
from PIL import ImageEnhance


def enhance_contrast(img, factor=2.0):
    """Increase contrast to make text stand out.

    factor: 1.0 = original, 2.0 = double contrast, 3.0 = triple.
    """
    enhancer = ImageEnhance.Contrast(img)
    return enhancer.enhance(factor)


def enhance_sharpness(img, factor=2.0):
    """Sharpen blurred text edges."""
    enhancer = ImageEnhance.Sharpness(img)
    return enhancer.enhance(factor)

Noise Removal

# noise.py
from PIL import ImageFilter


def remove_dots(img, min_neighbors=5):
    """Remove isolated dot noise using median filter."""
    return img.filter(ImageFilter.MedianFilter(size=3))


def remove_lines(img):
    """Reduce line noise using morphological operations."""
    import numpy as np

    arr = np.array(img)

    # For each pixel, if too many neighbors are the same value,
    # it's likely a line — keep only isolated text patterns
    from PIL import ImageFilter
    smoothed = img.filter(ImageFilter.ModeFilter(size=3))
    return smoothed


def smooth_edges(img):
    """Smooth character edges for cleaner text."""
    return img.filter(ImageFilter.SMOOTH)

Binarization (Thresholding)

# binarize.py
from PIL import Image


def binarize_simple(img, threshold=128):
    """Convert to pure black and white."""
    gray = img.convert("L")
    return gray.point(lambda p: 255 if p > threshold else 0)


def binarize_adaptive(img, block_size=11):
    """Adaptive threshold — handles uneven lighting."""
    import numpy as np

    arr = np.array(img.convert("L"), dtype=np.float64)
    h, w = arr.shape

    # Compute local mean
    result = np.zeros_like(arr, dtype=np.uint8)

    half = block_size // 2
    for y in range(h):
        for x in range(w):
            y1 = max(0, y - half)
            y2 = min(h, y + half + 1)
            x1 = max(0, x - half)
            x2 = min(w, x + half + 1)
            local_mean = arr[y1:y2, x1:x2].mean()
            result[y, x] = 255 if arr[y, x] > local_mean - 10 else 0

    return Image.fromarray(result)


def auto_threshold(img):
    """Automatically find the best threshold using Otsu's method."""
    import numpy as np

    arr = np.array(img.convert("L"))
    histogram = np.histogram(arr.flatten(), bins=256, range=(0, 256))[0]

    total = arr.size
    best_thresh = 0
    best_variance = 0

    weight_bg = 0
    sum_bg = 0
    total_sum = sum(i * histogram[i] for i in range(256))

    for t in range(256):
        weight_bg += histogram[t]
        if weight_bg == 0:
            continue
        weight_fg = total - weight_bg
        if weight_fg == 0:
            break

        sum_bg += t * histogram[t]
        mean_bg = sum_bg / weight_bg
        mean_fg = (total_sum - sum_bg) / weight_fg

        variance = weight_bg * weight_fg * (mean_bg - mean_fg) ** 2
        if variance > best_variance:
            best_variance = variance
            best_thresh = t

    return binarize_simple(img, best_thresh)

Color Isolation

Extract text by isolating specific color channels:

# color_isolation.py
from PIL import Image
import numpy as np


def isolate_dark_text(img, max_brightness=100):
    """Keep only dark pixels (likely text on light background)."""
    arr = np.array(img.convert("L"))
    result = np.where(arr < max_brightness, 0, 255).astype(np.uint8)
    return Image.fromarray(result)


def isolate_color_channel(img, channel="red"):
    """Extract a specific color channel."""
    r, g, b = img.split()
    channels = {"red": r, "green": g, "blue": b}
    return channels.get(channel, r)


def isolate_colored_text(img, target_rgb, tolerance=50):
    """Keep only pixels close to a target color."""
    arr = np.array(img.convert("RGB"))
    target = np.array(target_rgb)

    diff = np.abs(arr.astype(int) - target.astype(int)).sum(axis=2)
    mask = diff < tolerance

    result = np.where(mask, 0, 255).astype(np.uint8)
    return Image.fromarray(result)

Complete Preprocessing Pipeline

# pipeline.py
from PIL import Image, ImageEnhance, ImageFilter
import base64
import io


def preprocess_captcha(image_path, config=None):
    """Full preprocessing pipeline."""
    if config is None:
        config = {
            "grayscale": True,
            "contrast": 2.0,
            "sharpen": 1.5,
            "denoise": True,
            "threshold": 128,
        }

    img = Image.open(image_path)

    # Step 1: Grayscale
    if config.get("grayscale"):
        img = img.convert("L")

    # Step 2: Denoise
    if config.get("denoise"):
        img = img.filter(ImageFilter.MedianFilter(size=3))

    # Step 3: Contrast
    contrast = config.get("contrast", 1.0)
    if contrast != 1.0:
        img = ImageEnhance.Contrast(img.convert("L")).enhance(contrast)

    # Step 4: Sharpen
    sharpen = config.get("sharpen", 1.0)
    if sharpen != 1.0:
        img = ImageEnhance.Sharpness(img).enhance(sharpen)

    # Step 5: Binarize
    threshold = config.get("threshold")
    if threshold:
        img = img.point(lambda p: 255 if p > threshold else 0)

    # Encode
    buffer = io.BytesIO()
    img.save(buffer, format="PNG")
    return base64.b64encode(buffer.getvalue()).decode("ascii")


# Usage
b64 = preprocess_captcha("captcha.png", config={
    "grayscale": True,
    "contrast": 2.5,
    "sharpen": 2.0,
    "denoise": True,
    "threshold": 120,
})

Troubleshooting

Issue Cause Fix
Text disappears after binarization Threshold too high Lower threshold (try 90-110)
Noise remains after filtering Filter too weak Increase MedianFilter size to 5
Characters merge Contrast too high Reduce contrast factor
Preprocessing makes results worse Image was already clean Skip preprocessing for clean CAPTCHAs

FAQ

Should I always preprocess?

No. Try without preprocessing first. Only add it when solve rates are below expectations on a specific CAPTCHA type.

Does preprocessing increase API cost?

No. Preprocessing happens locally before submission. The API cost is the same regardless.

Which step has the most impact?

Contrast enhancement and binarization typically have the biggest impact on noisy, low-contrast CAPTCHAs.



Preprocess for better results — start with CaptchaAI.

Full Working Code

Complete runnable examples for this article in Python, Node.js, PHP, Go, Java, C#, Ruby, Rust, Kotlin & Bash.

View on GitHub →

Discussions (0)

No comments yet.