{
  "service": "OpenAI API",
  "version": "v1",
  "provider": "openai",
  "description": "Quotas and pricing description for OpenAI chat and completion models",
  "endpoints": [
    {
      "path": "/v1/chat/completions",
      "method": "POST",
      "identifier": "chat_completions",
      "models": [
        "gpt-4.1",
        "gpt-4.1-mini"
      ],
      "quotas": {
        "rate_limit": {
          "requests_per_unit": 500,
          "tokens_per_unit": 40000,
          "unit": "minute",
          "burst_multiplier": 2,
          "strategy": "token_bucket"
        },
        "pricing": {
          "input_cost_per_1k": 0.005,
          "output_cost_per_1k": 0.015,
          "currency": "USD"
        }
      },
      "error_handling": {
        "rate_limit_status_codes": [
          429
        ],
        "retry_after_header": "Retry-After",
        "backoff_multiplier": 1.5,
        "max_backoff_seconds": 60
      }
    }
  ],
  "metadata": {
    "schema_version": "v1",
    "documentation_url": "https://platform.openai.com/docs/guides/rate-limits",
    "last_updated": "2026-02-09T00:00:00Z"
  }
}


