first commit

2026-04-05 15:07:53 +08:00
commit d1bac85f28
30 changed files with 1071 additions and 0 deletions
@@ -0,0 +1,54 @@
+# =============================================================================
+# LiteLLM Stack — .env.example
+# Copy to .env and fill in values
+# =============================================================================
+
+# ── Postgres (DB for UI, users, virtual keys, spend) ─────────────────────────
+POSTGRES_DB=litellm
+POSTGRES_USER=llmproxy
+POSTGRES_PASSWORD=lejin2000
+
+# ── LiteLLM Logging ──────────────────────────────────────────────────────────
+LITELLM_LOG=INFO
+
+# ── Admin UI / Security ──────────────────────────────────────────────────────
+# Master key used by LiteLLM for admin auth (UI + admin APIs).
+# Must start with "sk-" and match general_settings.master_key.
+LITELLM_MASTER_KEY=sk-lejin2000
+
+# Salt key used to encrypt provider API keys & secrets in the DB.
+# Generate once, keep secret, NEVER change after first run (or you lose decryption).
+LITELLM_SALT_KEY=sk-lejin2000
+
+# Admin UI credentials (fallback / simple login) per UI quick start docs.
+UI_USERNAME=admin
+UI_PASSWORD=lejin2000
+
+# Disable admin UI? (set True if you want API-only mode)
+DISABLE_ADMIN_UI=False
+
+# ── Provider API keys ────────────────────────────────────────────────────────
+# OpenAI
+#OPENAI_API_KEY=sk-...
+
+# Anthropic
+# ANTHROPIC_API_KEY=sk-ant-...
+
+# Azure OpenAI
+AZURE_API_KEY=1kUm7k9xWjSKv9BkGTODlUKUYelonOKovMORtGHPpntJ8WhkkomGJQQJ99CCACHYHv6XJ3w3AAAAACOGF9jZ
+AZURE_API_BASE=https://eujin-mmt0hj4x-eastus2.cognitiveservices.azure.com/
+AZURE_API_VERSION=2024-12-01-preview
+
+# Groq
+# GROQ_API_KEY=gsk_...
+
+# OpenRouter
+# OPENROUTER_API_KEY=sk-or-...
+
+# Google Gemini
+# GEMINI_API_KEY=AIza...
+
+# HuggingFace
+# HUGGINGFACE_API_KEY=hf_...
+
+# ── Optional: allow Caddy to forward headers, etc. (no special vars needed) ─
@@ -0,0 +1,120 @@
+# =============================================================================
+# LiteLLM Proxy Config (DB-backed UI, no Redis)
+# - Single-node deployment (1 CPU / 2 GB RAM)
+# - Postgres used for:
+#     - Users / login
+#     - Virtual keys & teams
+#     - Spend tracking (optional)
+# - No Redis caching (cache: false)
+#
+# Docs:
+#   https://docs.litellm.ai/docs/proxy/configs
+#   https://docs.litellm.ai/docs/proxy/config_settings
+#   https://docs.litellm.ai/docs/proxy/virtual_keys
+# =============================================================================
+
+# -----------------------------------------------------------------------------#
+# Models exposed by the proxy
+# -----------------------------------------------------------------------------#
+model_list:
+  # --- OpenAI examples -------------------------------------------------------
+  #- model_name: gpt-4o
+  #  litellm_params:
+  #    model: openai/gpt-4o
+  #    api_key: os.environ/OPENAI_API_KEY
+
+  #- model_name: gpt-4o-mini
+  #  litellm_params:
+  #    model: openai/gpt-4o-mini
+  #    api_key: os.environ/OPENAI_API_KEY
+
+  # --- Anthropic example -----------------------------------------------------
+  # - model_name: claude-3-5-sonnet
+  #   litellm_params:
+  #     model: anthropic/claude-3-5-sonnet-20241022
+  #     api_key: os.environ/ANTHROPIC_API_KEY
+
+  # --- Groq example ----------------------------------------------------------
+  # - model_name: groq-llama-3.3-70b
+  #   litellm_params:
+  #     model: groq/llama-3.3-70b-versatile
+  #     api_key: os.environ/GROQ_API_KEY
+
+  # --- Azure OpenAI example --------------------------------------------------
+  # - model_name: azure-gpt-4o
+  #   litellm_params:
+  #     model: azure/my_azure_deployment
+  #     api_base: os.environ/AZURE_API_BASE
+  #     api_key: os.environ/AZURE_API_KEY
+  #     api_version: "2025-01-01-preview"
+
+  # --- Local Ollama example --------------------------------------------------
+  # - model_name: ollama-llama3
+  #   litellm_params:
+  #     model: ollama/llama3
+  #     api_base: http://host.docker.internal:11434
+
+# -----------------------------------------------------------------------------#
+# Core LiteLLM behavior (no Redis cache)
+# -----------------------------------------------------------------------------#
+litellm_settings:
+  # Retries & timeouts
+  num_retries: 2
+  request_timeout: 60        # seconds
+
+  # Disable caching entirely to avoid Redis
+  cache: false               # <- IMPORTANT: no Redis in your stack
+
+  # Drop unsupported provider-specific params instead of erroring
+  drop_params: true
+
+  # If you later enable caching, add cache_params here with Redis config.
+  # cache_params:
+  #   type: redis
+  #   host: litellm-redis
+  #   port: 6379
+  #   password: os.environ/REDIS_PASSWORD
+
+# -----------------------------------------------------------------------------#
+# Router settings (all in-memory, single-node)
+# -----------------------------------------------------------------------------#
+router_settings:
+  routing_strategy: "simple-shuffle"   # simple-shuffle | least-busy | latency-based-routing
+  num_retries: 1
+  timeout: 30
+  retry_after: 5
+
+  # No Redis routing / transaction buffer on this small single-node setup
+  # redis_host: litellm-redis
+  # redis_port: 6379
+  # redis_password: os.environ/REDIS_PASSWORD
+
+# -----------------------------------------------------------------------------#
+# General settings (DB-backed UI, no Redis)
+# -----------------------------------------------------------------------------#
+general_settings:
+  # Admin master key (used for UI + admin API).
+  # Must match LITELLM_MASTER_KEY from .env and start with "sk-".
+  master_key: os.environ/LITELLM_MASTER_KEY
+
+  # Postgres connection (from DATABASE_URL env var).
+  # Required for:
+  #   - /ui login and user management
+  #   - virtual keys
+  #   - spend tracking
+  database_url: os.environ/DATABASE_URL
+
+  # Store model definitions in DB (used by the UI)
+  store_model_in_db: true
+
+  # Telemetry & metrics
+  telemetry: false
+  enable_prometheus: true
+
+  # On a small box, you may want to reduce some DB-heavy features.
+  # You can still track spend, but disable some advanced cost mgmt.
+  disable_end_user_cost_tracking: false
+  disable_reset_budget: false
+
+  # No Redis transaction buffer here
+  # use_redis_transaction_buffer: false
@@ -0,0 +1,18 @@
+# =============================================================================
+# Prometheus Configuration — scrapes LiteLLM /metrics
+# =============================================================================
+global:
+  scrape_interval: 15s        # how often to scrape targets
+  evaluation_interval: 15s    # how often to evaluate rules
+  scrape_timeout: 10s
+
+scrape_configs:
+  - job_name: "litellm"
+    static_configs:
+      - targets: ["litellm:4000"]   # internal service name + port
+    metrics_path: "/metrics"
+    scrape_interval: 15s
+
+  - job_name: "prometheus"
+    static_configs:
+      - targets: ["localhost:9090"]
@@ -0,0 +1,99 @@
+name: litellm-stack
+
+services:
+  litellm:
+    image: ghcr.io/berriai/litellm:main-stable
+    container_name: litellm
+    restart: unless-stopped
+
+    # Caddy will reverse proxy to this container on caddy_caddy_net.
+    # For local debugging without Caddy, you can uncomment:
+    # ports:
+    #   - "4000:4000"
+
+    command: ["--config", "/app/config.yaml", "--port", "4000", "--num_workers", "1"]
+
+    env_file:
+      - .env
+
+    environment:
+      # Logging
+      LITELLM_LOG: "${LITELLM_LOG:-INFO}"
+
+      # DB connection string used by LiteLLM for UI, users, virtual keys, spend, etc.
+      # LiteLLM reads this via general_settings.database_url: os.environ/DATABASE_URL
+      DATABASE_URL: "postgresql://${POSTGRES_USER:-llmproxy}:${POSTGRES_PASSWORD:-dbpassword9090}@litellm-db:5432/${POSTGRES_DB:-litellm}"
+
+    volumes:
+      - ./config/litellm_config.yaml:/app/config.yaml:ro
+      - litellm_logs:/app/logs
+
+    depends_on:
+      litellm-db:
+        condition: service_healthy
+
+    networks:
+      - caddy_caddy_net
+
+    healthcheck:
+      # LiteLLM image is minimal (no curl/wget) → use Python
+      test: ["CMD-SHELL", "python -c \"import urllib.request; urllib.request.urlopen('http://localhost:4000/health/liveliness')\""]
+      interval: 30s
+      timeout: 10s
+      retries: 5
+      start_period: 120s
+
+    logging:
+      driver: "json-file"
+      options:
+        max-size: "20m"
+        max-file: "3"
+
+  litellm-db:
+    image: postgres:16-alpine
+    container_name: litellm-db
+    restart: unless-stopped
+
+    environment:
+      POSTGRES_DB: "${POSTGRES_DB:-litellm}"
+      POSTGRES_USER: "${POSTGRES_USER:-llmproxy}"
+      POSTGRES_PASSWORD: "${POSTGRES_PASSWORD:-dbpassword9090}"
+      PGDATA: /var/lib/postgresql/data/pgdata
+
+    volumes:
+      - litellm_postgres_data:/var/lib/postgresql/data
+
+    # No host port mapped → only other containers on the network can reach it.
+    # For debugging from host, you can temporarily uncomment:
+    # ports:
+    #   - "127.0.0.1:5432:5432"
+
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -d ${POSTGRES_DB:-litellm} -U ${POSTGRES_USER:-llmproxy}"]
+      interval: 5s
+      timeout: 5s
+      retries: 10
+      start_period: 10s
+
+    networks:
+      - caddy_caddy_net
+
+    logging:
+      driver: "json-file"
+      options:
+        max-size: "10m"
+        max-file: "3"
+
+networks:
+  # External network created/owned by your Caddy stack
+  caddy_caddy_net:
+    external: true
+
+volumes:
+  litellm_logs:
+    name: litellm_logs
+    driver: local
+
+  litellm_postgres_data:
+    name: litellm_postgres_data
+    driver: local