Skip to main content
Not all endpoints are equal. Your /health endpoint can handle thousands of requests, but your /ai/generate endpoint calls an expensive LLM. This recipe shows how to apply different rate limits per endpoint.

The pattern

// Define limits per endpoint pattern
const ENDPOINT_LIMITS = {
  "/api/ai/*": { limit: 10, duration: "1m" },      // Expensive AI calls
  "/api/export/*": { limit: 5, duration: "1h" },   // Heavy data exports
  "/api/*": { limit: 100, duration: "1m" },        // Default API routes
};

// Match request path to limits
function getLimits(path: string) {
  for (const [pattern, config] of Object.entries(ENDPOINT_LIMITS)) {
    if (matchPath(pattern, path)) return config;
  }
  return { limit: 100, duration: "1m" }; // fallback
}

Full implementation

Next.js Middleware

// middleware.ts
import { Ratelimit } from "@unkey/ratelimit";
import { NextResponse } from "next/server";
import type { NextRequest } from "next/server";

const limiter = new Ratelimit({
  rootKey: process.env.UNKEY_ROOT_KEY!,
  namespace: "api",
  limit: 100,
  duration: "1m",
});

// Define endpoint-specific limits (most specific first)
const ENDPOINT_LIMITS: Array<{
  pattern: RegExp;
  limit: number;
  duration: string;
  namespace: string;
}> = [
  // Expensive AI endpoints - very tight limits
  { 
    pattern: /^\/api\/ai\/.*/,
    limit: 10,
    duration: "1m",
    namespace: "ai",
  },
  // Data export - limit per hour
  { 
    pattern: /^\/api\/export\/.*/,
    limit: 5,
    duration: "1h",
    namespace: "export",
  },
  // Write operations - moderate limits
  { 
    pattern: /^\/api\/.*\/(create|update|delete)/,
    limit: 30,
    duration: "1m",
    namespace: "writes",
  },
  // Default API routes
  { 
    pattern: /^\/api\/.*/,
    limit: 100,
    duration: "1m",
    namespace: "api",
  },
];

function getEndpointConfig(pathname: string) {
  for (const config of ENDPOINT_LIMITS) {
    if (config.pattern.test(pathname)) {
      return config;
    }
  }
  return ENDPOINT_LIMITS[ENDPOINT_LIMITS.length - 1]; // default
}

export async function middleware(request: NextRequest) {
  // Skip non-API routes
  if (!request.nextUrl.pathname.startsWith("/api")) {
    return NextResponse.next();
  }

  const userId = request.headers.get("x-user-id") ?? 
                 request.ip ?? 
                 "anonymous";

  const config = getEndpointConfig(request.nextUrl.pathname);

  // Use endpoint-specific namespace for separate counters
  const { success, remaining, reset } = await limiter.limit(
    `${config.namespace}:${userId}`,
    {
      limit: config.limit,
      duration: config.duration as any,
    }
  );

  if (!success) {
    return NextResponse.json(
      { 
        error: "Rate limit exceeded",
        endpoint: config.namespace,
        retryAfter: Math.ceil((reset - Date.now()) / 1000),
      },
      { 
        status: 429,
        headers: {
          "X-RateLimit-Limit": config.limit.toString(),
          "X-RateLimit-Remaining": "0",
          "X-RateLimit-Reset": reset.toString(),
          "Retry-After": Math.ceil((reset - Date.now()) / 1000).toString(),
        },
      }
    );
  }

  const response = NextResponse.next();
  response.headers.set("X-RateLimit-Limit", config.limit.toString());
  response.headers.set("X-RateLimit-Remaining", remaining.toString());
  response.headers.set("X-RateLimit-Reset", reset.toString());
  
  return response;
}

export const config = {
  matcher: "/api/:path*",
};

Express with route-specific middleware

// middleware/ratelimit.ts
import { Ratelimit } from "@unkey/ratelimit";
import type { Request, Response, NextFunction } from "express";

const limiter = new Ratelimit({
  rootKey: process.env.UNKEY_ROOT_KEY!,
  namespace: "api",
  limit: 100,
  duration: "1m",
});

interface RateLimitOptions {
  limit: number;
  duration: string;
  namespace?: string;
  identifierFn?: (req: Request) => string;
}

export function rateLimit(options: RateLimitOptions) {
  return async (req: Request, res: Response, next: NextFunction) => {
    const identifier = options.identifierFn?.(req) ?? 
                       req.headers["x-user-id"] as string ?? 
                       req.ip ?? 
                       "anonymous";

    const namespace = options.namespace ?? "api";

    const { success, remaining, reset } = await limiter.limit(
      `${namespace}:${identifier}`,
      {
        limit: options.limit,
        duration: options.duration as any,
      }
    );

    res.set({
      "X-RateLimit-Limit": options.limit.toString(),
      "X-RateLimit-Remaining": remaining.toString(),
      "X-RateLimit-Reset": reset.toString(),
    });

    if (!success) {
      return res.status(429).json({
        error: "Rate limit exceeded",
        retryAfter: Math.ceil((reset - Date.now()) / 1000),
      });
    }

    next();
  };
}

// Usage in routes
import express from "express";
import { rateLimit } from "./middleware/ratelimit";

const app = express();

// Expensive AI endpoint - 10 requests per minute
app.post("/api/ai/generate", 
  rateLimit({ limit: 10, duration: "1m", namespace: "ai" }),
  async (req, res) => {
    // Call your AI provider
  }
);

// Data export - 5 per hour
app.get("/api/export/:type",
  rateLimit({ limit: 5, duration: "1h", namespace: "export" }),
  async (req, res) => {
    // Generate export
  }
);

// Regular endpoints - 100 per minute (default)
app.use("/api",
  rateLimit({ limit: 100, duration: "1m" })
);

Hono with route groups

import { Hono } from "hono";
import { Ratelimit } from "@unkey/ratelimit";

const app = new Hono();

const limiter = new Ratelimit({
  rootKey: process.env.UNKEY_ROOT_KEY!,
  namespace: "api",
  limit: 100,
  duration: "1m",
});

// Middleware factory for different limits
function rateLimitMiddleware(options: { 
  limit: number; 
  duration: string; 
  namespace: string;
}) {
  return async (c: any, next: any) => {
    const identifier = c.req.header("x-user-id") ?? "anonymous";
    
    const { success, remaining, reset } = await limiter.limit(
      `${options.namespace}:${identifier}`,
      { limit: options.limit, duration: options.duration as any }
    );

    c.header("X-RateLimit-Limit", options.limit.toString());
    c.header("X-RateLimit-Remaining", remaining.toString());
    c.header("X-RateLimit-Reset", reset.toString());

    if (!success) {
      return c.json({ error: "Rate limit exceeded" }, 429);
    }

    await next();
  };
}

// AI routes - strict limits
const ai = new Hono();
ai.use("*", rateLimitMiddleware({ limit: 10, duration: "1m", namespace: "ai" }));
ai.post("/generate", (c) => c.json({ result: "..." }));
ai.post("/embed", (c) => c.json({ result: "..." }));

// Export routes - hourly limits
const exports = new Hono();
exports.use("*", rateLimitMiddleware({ limit: 5, duration: "1h", namespace: "export" }));
exports.get("/csv", (c) => c.json({ url: "..." }));
exports.get("/json", (c) => c.json({ url: "..." }));

// Mount route groups
app.route("/api/ai", ai);
app.route("/api/export", exports);

// Default API routes
app.use("/api/*", rateLimitMiddleware({ limit: 100, duration: "1m", namespace: "api" }));

export default app;

Cost-based limiting

For endpoints where some operations are more expensive than others, use cost-based limiting:
app.post("/api/ai/generate", async (req, res) => {
  const { model, tokens } = req.body;
  
  // Different models have different costs
  const cost = model === "gpt-4" ? 10 : 
               model === "gpt-3.5" ? 2 : 1;

  const { success } = await limiter.limit(userId, { cost });

  if (!success) {
    return res.status(429).json({ error: "Rate limit exceeded" });
  }

  // Process request...
});
With a limit of 100/minute:
  • 100 cheap model calls, OR
  • 50 gpt-3.5 calls, OR
  • 10 gpt-4 calls

Best practices

Use separate namespaces

Different namespaces mean separate counters. A user can hit their AI limit without affecting their regular API quota.

Order patterns correctly

When matching paths, put more specific patterns first. /api/ai/* should come before /api/*.

Consider the user experience

Tight limits on expensive endpoints are fine, but communicate them clearly in your API docs.

Monitor and adjust

Use Unkey analytics to see which endpoints hit limits most often, then adjust accordingly.

Next steps

Last modified on February 6, 2026