first commit

This commit is contained in:
2026-04-05 15:07:53 +08:00
commit d1bac85f28
30 changed files with 1071 additions and 0 deletions
+54
View File
@@ -0,0 +1,54 @@
# =============================================================================
# LiteLLM Stack — .env.example
# Copy to .env and fill in values
# =============================================================================
# ── Postgres (DB for UI, users, virtual keys, spend) ─────────────────────────
POSTGRES_DB=litellm
POSTGRES_USER=llmproxy
POSTGRES_PASSWORD=lejin2000
# ── LiteLLM Logging ──────────────────────────────────────────────────────────
LITELLM_LOG=INFO
# ── Admin UI / Security ──────────────────────────────────────────────────────
# Master key used by LiteLLM for admin auth (UI + admin APIs).
# Must start with "sk-" and match general_settings.master_key.
LITELLM_MASTER_KEY=sk-lejin2000
# Salt key used to encrypt provider API keys & secrets in the DB.
# Generate once, keep secret, NEVER change after first run (or you lose decryption).
LITELLM_SALT_KEY=sk-lejin2000
# Admin UI credentials (fallback / simple login) per UI quick start docs.
UI_USERNAME=admin
UI_PASSWORD=lejin2000
# Disable admin UI? (set True if you want API-only mode)
DISABLE_ADMIN_UI=False
# ── Provider API keys ────────────────────────────────────────────────────────
# OpenAI
#OPENAI_API_KEY=sk-...
# Anthropic
# ANTHROPIC_API_KEY=sk-ant-...
# Azure OpenAI
AZURE_API_KEY=1kUm7k9xWjSKv9BkGTODlUKUYelonOKovMORtGHPpntJ8WhkkomGJQQJ99CCACHYHv6XJ3w3AAAAACOGF9jZ
AZURE_API_BASE=https://eujin-mmt0hj4x-eastus2.cognitiveservices.azure.com/
AZURE_API_VERSION=2024-12-01-preview
# Groq
# GROQ_API_KEY=gsk_...
# OpenRouter
# OPENROUTER_API_KEY=sk-or-...
# Google Gemini
# GEMINI_API_KEY=AIza...
# HuggingFace
# HUGGINGFACE_API_KEY=hf_...
# ── Optional: allow Caddy to forward headers, etc. (no special vars needed) ─
@@ -0,0 +1,120 @@
# =============================================================================
# LiteLLM Proxy Config (DB-backed UI, no Redis)
# - Single-node deployment (1 CPU / 2 GB RAM)
# - Postgres used for:
# - Users / login
# - Virtual keys & teams
# - Spend tracking (optional)
# - No Redis caching (cache: false)
#
# Docs:
# https://docs.litellm.ai/docs/proxy/configs
# https://docs.litellm.ai/docs/proxy/config_settings
# https://docs.litellm.ai/docs/proxy/virtual_keys
# =============================================================================
# -----------------------------------------------------------------------------#
# Models exposed by the proxy
# -----------------------------------------------------------------------------#
model_list:
# --- OpenAI examples -------------------------------------------------------
#- model_name: gpt-4o
# litellm_params:
# model: openai/gpt-4o
# api_key: os.environ/OPENAI_API_KEY
#- model_name: gpt-4o-mini
# litellm_params:
# model: openai/gpt-4o-mini
# api_key: os.environ/OPENAI_API_KEY
# --- Anthropic example -----------------------------------------------------
# - model_name: claude-3-5-sonnet
# litellm_params:
# model: anthropic/claude-3-5-sonnet-20241022
# api_key: os.environ/ANTHROPIC_API_KEY
# --- Groq example ----------------------------------------------------------
# - model_name: groq-llama-3.3-70b
# litellm_params:
# model: groq/llama-3.3-70b-versatile
# api_key: os.environ/GROQ_API_KEY
# --- Azure OpenAI example --------------------------------------------------
# - model_name: azure-gpt-4o
# litellm_params:
# model: azure/my_azure_deployment
# api_base: os.environ/AZURE_API_BASE
# api_key: os.environ/AZURE_API_KEY
# api_version: "2025-01-01-preview"
# --- Local Ollama example --------------------------------------------------
# - model_name: ollama-llama3
# litellm_params:
# model: ollama/llama3
# api_base: http://host.docker.internal:11434
# -----------------------------------------------------------------------------#
# Core LiteLLM behavior (no Redis cache)
# -----------------------------------------------------------------------------#
litellm_settings:
# Retries & timeouts
num_retries: 2
request_timeout: 60 # seconds
# Disable caching entirely to avoid Redis
cache: false # <- IMPORTANT: no Redis in your stack
# Drop unsupported provider-specific params instead of erroring
drop_params: true
# If you later enable caching, add cache_params here with Redis config.
# cache_params:
# type: redis
# host: litellm-redis
# port: 6379
# password: os.environ/REDIS_PASSWORD
# -----------------------------------------------------------------------------#
# Router settings (all in-memory, single-node)
# -----------------------------------------------------------------------------#
router_settings:
routing_strategy: "simple-shuffle" # simple-shuffle | least-busy | latency-based-routing
num_retries: 1
timeout: 30
retry_after: 5
# No Redis routing / transaction buffer on this small single-node setup
# redis_host: litellm-redis
# redis_port: 6379
# redis_password: os.environ/REDIS_PASSWORD
# -----------------------------------------------------------------------------#
# General settings (DB-backed UI, no Redis)
# -----------------------------------------------------------------------------#
general_settings:
# Admin master key (used for UI + admin API).
# Must match LITELLM_MASTER_KEY from .env and start with "sk-".
master_key: os.environ/LITELLM_MASTER_KEY
# Postgres connection (from DATABASE_URL env var).
# Required for:
# - /ui login and user management
# - virtual keys
# - spend tracking
database_url: os.environ/DATABASE_URL
# Store model definitions in DB (used by the UI)
store_model_in_db: true
# Telemetry & metrics
telemetry: false
enable_prometheus: true
# On a small box, you may want to reduce some DB-heavy features.
# You can still track spend, but disable some advanced cost mgmt.
disable_end_user_cost_tracking: false
disable_reset_budget: false
# No Redis transaction buffer here
# use_redis_transaction_buffer: false
@@ -0,0 +1,18 @@
# =============================================================================
# Prometheus Configuration — scrapes LiteLLM /metrics
# =============================================================================
global:
scrape_interval: 15s # how often to scrape targets
evaluation_interval: 15s # how often to evaluate rules
scrape_timeout: 10s
scrape_configs:
- job_name: "litellm"
static_configs:
- targets: ["litellm:4000"] # internal service name + port
metrics_path: "/metrics"
scrape_interval: 15s
- job_name: "prometheus"
static_configs:
- targets: ["localhost:9090"]
+99
View File
@@ -0,0 +1,99 @@
name: litellm-stack
services:
litellm:
image: ghcr.io/berriai/litellm:main-stable
container_name: litellm
restart: unless-stopped
# Caddy will reverse proxy to this container on caddy_caddy_net.
# For local debugging without Caddy, you can uncomment:
# ports:
# - "4000:4000"
command: ["--config", "/app/config.yaml", "--port", "4000", "--num_workers", "1"]
env_file:
- .env
environment:
# Logging
LITELLM_LOG: "${LITELLM_LOG:-INFO}"
# DB connection string used by LiteLLM for UI, users, virtual keys, spend, etc.
# LiteLLM reads this via general_settings.database_url: os.environ/DATABASE_URL
DATABASE_URL: "postgresql://${POSTGRES_USER:-llmproxy}:${POSTGRES_PASSWORD:-dbpassword9090}@litellm-db:5432/${POSTGRES_DB:-litellm}"
volumes:
- ./config/litellm_config.yaml:/app/config.yaml:ro
- litellm_logs:/app/logs
depends_on:
litellm-db:
condition: service_healthy
networks:
- caddy_caddy_net
healthcheck:
# LiteLLM image is minimal (no curl/wget) → use Python
test: ["CMD-SHELL", "python -c \"import urllib.request; urllib.request.urlopen('http://localhost:4000/health/liveliness')\""]
interval: 30s
timeout: 10s
retries: 5
start_period: 120s
logging:
driver: "json-file"
options:
max-size: "20m"
max-file: "3"
litellm-db:
image: postgres:16-alpine
container_name: litellm-db
restart: unless-stopped
environment:
POSTGRES_DB: "${POSTGRES_DB:-litellm}"
POSTGRES_USER: "${POSTGRES_USER:-llmproxy}"
POSTGRES_PASSWORD: "${POSTGRES_PASSWORD:-dbpassword9090}"
PGDATA: /var/lib/postgresql/data/pgdata
volumes:
- litellm_postgres_data:/var/lib/postgresql/data
# No host port mapped → only other containers on the network can reach it.
# For debugging from host, you can temporarily uncomment:
# ports:
# - "127.0.0.1:5432:5432"
healthcheck:
test: ["CMD-SHELL", "pg_isready -d ${POSTGRES_DB:-litellm} -U ${POSTGRES_USER:-llmproxy}"]
interval: 5s
timeout: 5s
retries: 10
start_period: 10s
networks:
- caddy_caddy_net
logging:
driver: "json-file"
options:
max-size: "10m"
max-file: "3"
networks:
# External network created/owned by your Caddy stack
caddy_caddy_net:
external: true
volumes:
litellm_logs:
name: litellm_logs
driver: local
litellm_postgres_data:
name: litellm_postgres_data
driver: local