Rate Limiting Your Quiz API: A Practical Guide

Why Rate Limiting Is Not Optional

A quiz API without rate limiting is an invitation for abuse. A single misconfigured client can hammer your endpoints, a scraper can download your entire question bank, and a DDoS can take you offline. Rate limiting protects your infrastructure, ensures fair access across users, and keeps your API costs predictable.

This guide covers two proven rate limiting algorithms, a Redis-backed implementation you can deploy today, and client-side strategies for handling 429 responses gracefully.

Prerequisites

Node.js 20+
Redis 7+
Basic understanding of HTTP status codes

Token Bucket Algorithm

The token bucket is the most intuitive rate limiting algorithm. Think of it as a bucket that holds tokens. Each API request costs one token. Tokens refill at a fixed rate. When the bucket is empty, requests are rejected.

1import Redis from "ioredis";
2
3const redis = new Redis(process.env.REDIS_URL);
4
5interface TokenBucketConfig {
6  maxTokens: number;       // bucket capacity
7  refillRate: number;      // tokens added per second
8  keyPrefix: string;
9}
10
11async function checkTokenBucket(
12  identifier: string,
13  config: TokenBucketConfig
14): Promise<{ allowed: boolean; remaining: number; retryAfter: number | null }> {
15  const key = `${config.keyPrefix}:${identifier}`;
16  const now = Date.now();
17
18  const result = await redis.eval(
19    `
20    local key = KEYS[1]
21    local max_tokens = tonumber(ARGV[1])
22    local refill_rate = tonumber(ARGV[2])
23    local now = tonumber(ARGV[3])
24
25    local bucket = redis.call('HMGET', key, 'tokens', 'last_refill')
26    local tokens = tonumber(bucket[1])
27    local last_refill = tonumber(bucket[2])
28
29    if tokens == nil then
30      tokens = max_tokens
31      last_refill = now
32    end
33
34    -- Calculate tokens to add based on elapsed time
35    local elapsed = (now - last_refill) / 1000
36    tokens = math.min(max_tokens, tokens + (elapsed * refill_rate))
37    last_refill = now
38
39    local allowed = 0
40    if tokens >= 1 then
41      tokens = tokens - 1
42      allowed = 1
43    end
44
45    redis.call('HMSET', key, 'tokens', tokens, 'last_refill', last_refill)
46    redis.call('EXPIRE', key, math.ceil(max_tokens / refill_rate) + 1)
47
48    return {allowed, math.floor(tokens)}
49    `,
50    1,
51    key,
52    config.maxTokens,
53    config.refillRate,
54    now
55  ) as [number, number];
56
57  const allowed = result[0] === 1;
58  const remaining = result[1];
59
60  return {
61    allowed,
62    remaining,
63    retryAfter: allowed ? null : Math.ceil(1 / config.refillRate),
64  };
65}

Using a Lua script ensures the check-and-decrement is atomic. Without this, two concurrent requests could both read 1 remaining token and both proceed.

Sliding Window Algorithm

The sliding window provides smoother rate limiting than fixed windows. It avoids the burst problem where a user makes all their allowed requests right at the window boundary.

1interface SlidingWindowConfig {
2  windowMs: number;     // window duration in milliseconds
3  maxRequests: number;  // max requests per window
4  keyPrefix: string;
5}
6
7async function checkSlidingWindow(
8  identifier: string,
9  config: SlidingWindowConfig
10): Promise<{ allowed: boolean; remaining: number; retryAfter: number | null }> {
11  const key = `${config.keyPrefix}:${identifier}`;
12  const now = Date.now();
13  const windowStart = now - config.windowMs;
14
15  const result = await redis.eval(
16    `
17    local key = KEYS[1]
18    local now = tonumber(ARGV[1])
19    local window_start = tonumber(ARGV[2])
20    local max_requests = tonumber(ARGV[3])
21    local window_ms = tonumber(ARGV[4])
22
23    -- Remove expired entries
24    redis.call('ZREMRANGEBYSCORE', key, '-inf', window_start)
25
26    -- Count current requests in the window
27    local current = redis.call('ZCARD', key)
28
29    if current < max_requests then
30      redis.call('ZADD', key, now, now .. '-' .. math.random(1000000))
31      redis.call('PEXPIRE', key, window_ms)
32      return {1, max_requests - current - 1}
33    end
34
35    -- Find the oldest entry to calculate retry time
36    local oldest = redis.call('ZRANGE', key, 0, 0, 'WITHSCORES')
37    local retry_after = 0
38    if #oldest > 0 then
39      retry_after = tonumber(oldest[2]) + window_ms - now
40    end
41
42    return {0, 0, retry_after}
43    `,
44    1,
45    key,
46    now,
47    windowStart,
48    config.maxRequests,
49    config.windowMs
50  ) as number[];
51
52  return {
53    allowed: result[0] === 1,
54    remaining: result[1],
55    retryAfter: result[0] === 1 ? null : Math.ceil((result[2] || 1000) / 1000),
56  };
57}

Express Middleware

Wrap the rate limiter in Express middleware:

1import { Request, Response, NextFunction } from "express";
2
3function rateLimitMiddleware(config: TokenBucketConfig) {
4  return async (req: Request, res: Response, next: NextFunction) => {
5    // Use API key or IP as identifier
6    const identifier =
7      req.headers["x-api-key"]?.toString() || req.ip || "anonymous";
8
9    const result = await checkTokenBucket(identifier, config);
10
11    // Always set rate limit headers
12    res.setHeader("X-RateLimit-Limit", config.maxTokens);
13    res.setHeader("X-RateLimit-Remaining", result.remaining);
14
15    if (!result.allowed) {
16      res.setHeader("Retry-After", result.retryAfter!);
17      return res.status(429).json({
18        error: "Too Many Requests",
19        message: "Rate limit exceeded. Please retry after the indicated time.",
20        retryAfter: result.retryAfter,
21      });
22    }
23
24    next();
25  };
26}
27
28// Apply different limits to different endpoints
29app.use(
30  "/api/v1/quizzes",
31  rateLimitMiddleware({
32    maxTokens: 100,
33    refillRate: 10,    // 10 requests per second refill
34    keyPrefix: "rl:quizzes",
35  })
36);
37
38app.use(
39  "/api/v1/questions",
40  rateLimitMiddleware({
41    maxTokens: 200,
42    refillRate: 20,
43    keyPrefix: "rl:questions",
44  })
45);
46
47// Stricter limits for submission endpoints
48app.use(
49  "/api/v1/quizzes/:id/submit",
50  rateLimitMiddleware({
51    maxTokens: 10,
52    refillRate: 1,
53    keyPrefix: "rl:submit",
54  })
55);

Handling 429 Responses on the Client

Your API clients need to handle rate limits gracefully. Here is a fetch wrapper with exponential backoff:

1async function fetchWithRetry(
2  url: string,
3  options: RequestInit = {},
4  maxRetries = 3
5): Promise<Response> {
6  for (let attempt = 0; attempt <= maxRetries; attempt++) {
7    const response = await fetch(url, options);
8
9    if (response.status !== 429) {
10      return response;
11    }
12
13    if (attempt === maxRetries) {
14      throw new Error("Rate limit exceeded after maximum retries");
15    }
16
17    // Use the Retry-After header if available
18    const retryAfter = response.headers.get("Retry-After");
19    const waitMs = retryAfter
20      ? parseInt(retryAfter, 10) * 1000
21      : Math.min(1000 * Math.pow(2, attempt), 30000);
22
23    console.warn(
24      `Rate limited. Retrying in ${waitMs}ms (attempt ${attempt + 1}/${maxRetries})`
25    );
26
27    await new Promise((resolve) => setTimeout(resolve, waitMs));
28  }
29
30  throw new Error("Unreachable");
31}

Tiered Rate Limits

Different API key tiers should get different limits. Store tier information alongside the API key:

1interface RateLimitTier {
2  name: string;
3  requestsPerMinute: number;
4  burstSize: number;
5}
6
7const tiers: Record<string, RateLimitTier> = {
8  free: { name: "Free", requestsPerMinute: 60, burstSize: 10 },
9  pro: { name: "Pro", requestsPerMinute: 600, burstSize: 50 },
10  enterprise: { name: "Enterprise", requestsPerMinute: 6000, burstSize: 200 },
11};
12
13async function getTierForApiKey(apiKey: string): Promise<RateLimitTier> {
14  const cached = await redis.get(`tier:${apiKey}`);
15  if (cached) return JSON.parse(cached);
16
17  // Fall back to database lookup
18  const result = await db.query(
19    "SELECT tier FROM api_keys WHERE key_hash = $1",
20    [hashApiKey(apiKey)]
21  );
22
23  const tier = tiers[result.rows[0]?.tier ?? "free"];
24  await redis.set(`tier:${apiKey}`, JSON.stringify(tier), "EX", 300);
25
26  return tier;
27}

Monitoring Rate Limit Events

Track rate limit hits to understand usage patterns:

1import { Counter, Histogram } from "prom-client";
2
3const rateLimitHits = new Counter({
4  name: "api_rate_limit_hits_total",
5  help: "Total number of rate-limited requests",
6  labelValues: ["endpoint", "tier"],
7});
8
9const rateLimitRemaining = new Histogram({
10  name: "api_rate_limit_remaining",
11  help: "Remaining rate limit when request is made",
12  buckets: [0, 1, 5, 10, 25, 50, 100],
13  labelValues: ["endpoint"],
14});

Summary

Rate limiting protects your API, your infrastructure, and your users. The token bucket algorithm works well for most cases, providing burst tolerance with a steady refill rate. The sliding window is better when you need strict per-window limits.

Key points:

Use Lua scripts in Redis for atomic check-and-decrement
Always return X-RateLimit-Remaining and Retry-After headers
Apply different limits to different endpoints based on cost
Implement tiered limits based on API key plans
Build retry logic with exponential backoff into your client SDKs

Rate Limiting Your Quiz API: A Practical Guide

Why Rate Limiting Is Not Optional

Prerequisites

Token Bucket Algorithm

Sliding Window Algorithm

Express Middleware

Handling 429 Responses on the Client

Tiered Rate Limits

Monitoring Rate Limit Events

Summary

Enjoyed this article?

Stay Updated

Related Articles

Building a Quiz Import/Export System

Monitoring Quiz API Performance with Prometheus and Grafana

Scaling Quiz Delivery: From 100 to 100,000 Concurrent Players