Back to Blog
Engineering

Rate Limiting Your Quiz API: A Practical Guide

Protect your quiz API from abuse with token bucket and sliding window rate limiters. Includes Redis-based implementation and graceful 429 handling.

Bobby Iliev2026-04-087 min read

Why Rate Limiting Is Not Optional

A quiz API without rate limiting is an invitation for abuse. A single misconfigured client can hammer your endpoints, a scraper can download your entire question bank, and a DDoS can take you offline. Rate limiting protects your infrastructure, ensures fair access across users, and keeps your API costs predictable.

This guide covers two proven rate limiting algorithms, a Redis-backed implementation you can deploy today, and client-side strategies for handling 429 responses gracefully.

Prerequisites

  • Node.js 20+
  • Redis 7+
  • Basic understanding of HTTP status codes

Token Bucket Algorithm

The token bucket is the most intuitive rate limiting algorithm. Think of it as a bucket that holds tokens. Each API request costs one token. Tokens refill at a fixed rate. When the bucket is empty, requests are rejected.

import Redis from "ioredis";

const redis = new Redis(process.env.REDIS_URL);

interface TokenBucketConfig {
  maxTokens: number;       // bucket capacity
  refillRate: number;      // tokens added per second
  keyPrefix: string;
}

async function checkTokenBucket(
  identifier: string,
  config: TokenBucketConfig
): Promise<{ allowed: boolean; remaining: number; retryAfter: number | null }> {
  const key = `${config.keyPrefix}:${identifier}`;
  const now = Date.now();

  const result = await redis.eval(
    `
    local key = KEYS[1]
    local max_tokens = tonumber(ARGV[1])
    local refill_rate = tonumber(ARGV[2])
    local now = tonumber(ARGV[3])

    local bucket = redis.call('HMGET', key, 'tokens', 'last_refill')
    local tokens = tonumber(bucket[1])
    local last_refill = tonumber(bucket[2])

    if tokens == nil then
      tokens = max_tokens
      last_refill = now
    end

    -- Calculate tokens to add based on elapsed time
    local elapsed = (now - last_refill) / 1000
    tokens = math.min(max_tokens, tokens + (elapsed * refill_rate))
    last_refill = now

    local allowed = 0
    if tokens >= 1 then
      tokens = tokens - 1
      allowed = 1
    end

    redis.call('HMSET', key, 'tokens', tokens, 'last_refill', last_refill)
    redis.call('EXPIRE', key, math.ceil(max_tokens / refill_rate) + 1)

    return {allowed, math.floor(tokens)}
    `,
    1,
    key,
    config.maxTokens,
    config.refillRate,
    now
  ) as [number, number];

  const allowed = result[0] === 1;
  const remaining = result[1];

  return {
    allowed,
    remaining,
    retryAfter: allowed ? null : Math.ceil(1 / config.refillRate),
  };
}

Using a Lua script ensures the check-and-decrement is atomic. Without this, two concurrent requests could both read 1 remaining token and both proceed.

Sliding Window Algorithm

The sliding window provides smoother rate limiting than fixed windows. It avoids the burst problem where a user makes all their allowed requests right at the window boundary.

interface SlidingWindowConfig {
  windowMs: number;     // window duration in milliseconds
  maxRequests: number;  // max requests per window
  keyPrefix: string;
}

async function checkSlidingWindow(
  identifier: string,
  config: SlidingWindowConfig
): Promise<{ allowed: boolean; remaining: number; retryAfter: number | null }> {
  const key = `${config.keyPrefix}:${identifier}`;
  const now = Date.now();
  const windowStart = now - config.windowMs;

  const result = await redis.eval(
    `
    local key = KEYS[1]
    local now = tonumber(ARGV[1])
    local window_start = tonumber(ARGV[2])
    local max_requests = tonumber(ARGV[3])
    local window_ms = tonumber(ARGV[4])

    -- Remove expired entries
    redis.call('ZREMRANGEBYSCORE', key, '-inf', window_start)

    -- Count current requests in the window
    local current = redis.call('ZCARD', key)

    if current < max_requests then
      redis.call('ZADD', key, now, now .. '-' .. math.random(1000000))
      redis.call('PEXPIRE', key, window_ms)
      return {1, max_requests - current - 1}
    end

    -- Find the oldest entry to calculate retry time
    local oldest = redis.call('ZRANGE', key, 0, 0, 'WITHSCORES')
    local retry_after = 0
    if #oldest > 0 then
      retry_after = tonumber(oldest[2]) + window_ms - now
    end

    return {0, 0, retry_after}
    `,
    1,
    key,
    now,
    windowStart,
    config.maxRequests,
    config.windowMs
  ) as number[];

  return {
    allowed: result[0] === 1,
    remaining: result[1],
    retryAfter: result[0] === 1 ? null : Math.ceil((result[2] || 1000) / 1000),
  };
}

Express Middleware

Wrap the rate limiter in Express middleware:

import { Request, Response, NextFunction } from "express";

function rateLimitMiddleware(config: TokenBucketConfig) {
  return async (req: Request, res: Response, next: NextFunction) => {
    // Use API key or IP as identifier
    const identifier =
      req.headers["x-api-key"]?.toString() || req.ip || "anonymous";

    const result = await checkTokenBucket(identifier, config);

    // Always set rate limit headers
    res.setHeader("X-RateLimit-Limit", config.maxTokens);
    res.setHeader("X-RateLimit-Remaining", result.remaining);

    if (!result.allowed) {
      res.setHeader("Retry-After", result.retryAfter!);
      return res.status(429).json({
        error: "Too Many Requests",
        message: "Rate limit exceeded. Please retry after the indicated time.",
        retryAfter: result.retryAfter,
      });
    }

    next();
  };
}

// Apply different limits to different endpoints
app.use(
  "/api/v1/quizzes",
  rateLimitMiddleware({
    maxTokens: 100,
    refillRate: 10,    // 10 requests per second refill
    keyPrefix: "rl:quizzes",
  })
);

app.use(
  "/api/v1/questions",
  rateLimitMiddleware({
    maxTokens: 200,
    refillRate: 20,
    keyPrefix: "rl:questions",
  })
);

// Stricter limits for submission endpoints
app.use(
  "/api/v1/quizzes/:id/submit",
  rateLimitMiddleware({
    maxTokens: 10,
    refillRate: 1,
    keyPrefix: "rl:submit",
  })
);

Handling 429 Responses on the Client

Your API clients need to handle rate limits gracefully. Here is a fetch wrapper with exponential backoff:

async function fetchWithRetry(
  url: string,
  options: RequestInit = {},
  maxRetries = 3
): Promise<Response> {
  for (let attempt = 0; attempt <= maxRetries; attempt++) {
    const response = await fetch(url, options);

    if (response.status !== 429) {
      return response;
    }

    if (attempt === maxRetries) {
      throw new Error("Rate limit exceeded after maximum retries");
    }

    // Use the Retry-After header if available
    const retryAfter = response.headers.get("Retry-After");
    const waitMs = retryAfter
      ? parseInt(retryAfter, 10) * 1000
      : Math.min(1000 * Math.pow(2, attempt), 30000);

    console.warn(
      `Rate limited. Retrying in ${waitMs}ms (attempt ${attempt + 1}/${maxRetries})`
    );

    await new Promise((resolve) => setTimeout(resolve, waitMs));
  }

  throw new Error("Unreachable");
}

Tiered Rate Limits

Different API key tiers should get different limits. Store tier information alongside the API key:

interface RateLimitTier {
  name: string;
  requestsPerMinute: number;
  burstSize: number;
}

const tiers: Record<string, RateLimitTier> = {
  free: { name: "Free", requestsPerMinute: 60, burstSize: 10 },
  pro: { name: "Pro", requestsPerMinute: 600, burstSize: 50 },
  enterprise: { name: "Enterprise", requestsPerMinute: 6000, burstSize: 200 },
};

async function getTierForApiKey(apiKey: string): Promise<RateLimitTier> {
  const cached = await redis.get(`tier:${apiKey}`);
  if (cached) return JSON.parse(cached);

  // Fall back to database lookup
  const result = await db.query(
    "SELECT tier FROM api_keys WHERE key_hash = $1",
    [hashApiKey(apiKey)]
  );

  const tier = tiers[result.rows[0]?.tier ?? "free"];
  await redis.set(`tier:${apiKey}`, JSON.stringify(tier), "EX", 300);

  return tier;
}

Monitoring Rate Limit Events

Track rate limit hits to understand usage patterns:

import { Counter, Histogram } from "prom-client";

const rateLimitHits = new Counter({
  name: "api_rate_limit_hits_total",
  help: "Total number of rate-limited requests",
  labelValues: ["endpoint", "tier"],
});

const rateLimitRemaining = new Histogram({
  name: "api_rate_limit_remaining",
  help: "Remaining rate limit when request is made",
  buckets: [0, 1, 5, 10, 25, 50, 100],
  labelValues: ["endpoint"],
});

Summary

Rate limiting protects your API, your infrastructure, and your users. The token bucket algorithm works well for most cases, providing burst tolerance with a steady refill rate. The sliding window is better when you need strict per-window limits.

Key points:

  • Use Lua scripts in Redis for atomic check-and-decrement
  • Always return X-RateLimit-Remaining and Retry-After headers
  • Apply different limits to different endpoints based on cost
  • Implement tiered limits based on API key plans
  • Build retry logic with exponential backoff into your client SDKs

Stay Updated

Get the latest tutorials and API tips delivered to your inbox.

No spam, unsubscribe anytime.

Enjoyed this article?

Share it with your team or try our quiz platform.