first commit
This commit is contained in:
@@ -0,0 +1,54 @@
|
||||
# =============================================================================
|
||||
# LiteLLM Stack — .env.example
|
||||
# Copy to .env and fill in values
|
||||
# =============================================================================
|
||||
|
||||
# ── Postgres (DB for UI, users, virtual keys, spend) ─────────────────────────
|
||||
POSTGRES_DB=litellm
|
||||
POSTGRES_USER=llmproxy
|
||||
POSTGRES_PASSWORD=lejin2000
|
||||
|
||||
# ── LiteLLM Logging ──────────────────────────────────────────────────────────
|
||||
LITELLM_LOG=INFO
|
||||
|
||||
# ── Admin UI / Security ──────────────────────────────────────────────────────
|
||||
# Master key used by LiteLLM for admin auth (UI + admin APIs).
|
||||
# Must start with "sk-" and match general_settings.master_key.
|
||||
LITELLM_MASTER_KEY=sk-lejin2000
|
||||
|
||||
# Salt key used to encrypt provider API keys & secrets in the DB.
|
||||
# Generate once, keep secret, NEVER change after first run (or you lose decryption).
|
||||
LITELLM_SALT_KEY=sk-lejin2000
|
||||
|
||||
# Admin UI credentials (fallback / simple login) per UI quick start docs.
|
||||
UI_USERNAME=admin
|
||||
UI_PASSWORD=lejin2000
|
||||
|
||||
# Disable admin UI? (set True if you want API-only mode)
|
||||
DISABLE_ADMIN_UI=False
|
||||
|
||||
# ── Provider API keys ────────────────────────────────────────────────────────
|
||||
# OpenAI
|
||||
#OPENAI_API_KEY=sk-...
|
||||
|
||||
# Anthropic
|
||||
# ANTHROPIC_API_KEY=sk-ant-...
|
||||
|
||||
# Azure OpenAI
|
||||
AZURE_API_KEY=1kUm7k9xWjSKv9BkGTODlUKUYelonOKovMORtGHPpntJ8WhkkomGJQQJ99CCACHYHv6XJ3w3AAAAACOGF9jZ
|
||||
AZURE_API_BASE=https://eujin-mmt0hj4x-eastus2.cognitiveservices.azure.com/
|
||||
AZURE_API_VERSION=2024-12-01-preview
|
||||
|
||||
# Groq
|
||||
# GROQ_API_KEY=gsk_...
|
||||
|
||||
# OpenRouter
|
||||
# OPENROUTER_API_KEY=sk-or-...
|
||||
|
||||
# Google Gemini
|
||||
# GEMINI_API_KEY=AIza...
|
||||
|
||||
# HuggingFace
|
||||
# HUGGINGFACE_API_KEY=hf_...
|
||||
|
||||
# ── Optional: allow Caddy to forward headers, etc. (no special vars needed) ─
|
||||
@@ -0,0 +1,120 @@
|
||||
# =============================================================================
|
||||
# LiteLLM Proxy Config (DB-backed UI, no Redis)
|
||||
# - Single-node deployment (1 CPU / 2 GB RAM)
|
||||
# - Postgres used for:
|
||||
# - Users / login
|
||||
# - Virtual keys & teams
|
||||
# - Spend tracking (optional)
|
||||
# - No Redis caching (cache: false)
|
||||
#
|
||||
# Docs:
|
||||
# https://docs.litellm.ai/docs/proxy/configs
|
||||
# https://docs.litellm.ai/docs/proxy/config_settings
|
||||
# https://docs.litellm.ai/docs/proxy/virtual_keys
|
||||
# =============================================================================
|
||||
|
||||
# -----------------------------------------------------------------------------#
|
||||
# Models exposed by the proxy
|
||||
# -----------------------------------------------------------------------------#
|
||||
model_list:
|
||||
# --- OpenAI examples -------------------------------------------------------
|
||||
#- model_name: gpt-4o
|
||||
# litellm_params:
|
||||
# model: openai/gpt-4o
|
||||
# api_key: os.environ/OPENAI_API_KEY
|
||||
|
||||
#- model_name: gpt-4o-mini
|
||||
# litellm_params:
|
||||
# model: openai/gpt-4o-mini
|
||||
# api_key: os.environ/OPENAI_API_KEY
|
||||
|
||||
# --- Anthropic example -----------------------------------------------------
|
||||
# - model_name: claude-3-5-sonnet
|
||||
# litellm_params:
|
||||
# model: anthropic/claude-3-5-sonnet-20241022
|
||||
# api_key: os.environ/ANTHROPIC_API_KEY
|
||||
|
||||
# --- Groq example ----------------------------------------------------------
|
||||
# - model_name: groq-llama-3.3-70b
|
||||
# litellm_params:
|
||||
# model: groq/llama-3.3-70b-versatile
|
||||
# api_key: os.environ/GROQ_API_KEY
|
||||
|
||||
# --- Azure OpenAI example --------------------------------------------------
|
||||
# - model_name: azure-gpt-4o
|
||||
# litellm_params:
|
||||
# model: azure/my_azure_deployment
|
||||
# api_base: os.environ/AZURE_API_BASE
|
||||
# api_key: os.environ/AZURE_API_KEY
|
||||
# api_version: "2025-01-01-preview"
|
||||
|
||||
# --- Local Ollama example --------------------------------------------------
|
||||
# - model_name: ollama-llama3
|
||||
# litellm_params:
|
||||
# model: ollama/llama3
|
||||
# api_base: http://host.docker.internal:11434
|
||||
|
||||
# -----------------------------------------------------------------------------#
|
||||
# Core LiteLLM behavior (no Redis cache)
|
||||
# -----------------------------------------------------------------------------#
|
||||
litellm_settings:
|
||||
# Retries & timeouts
|
||||
num_retries: 2
|
||||
request_timeout: 60 # seconds
|
||||
|
||||
# Disable caching entirely to avoid Redis
|
||||
cache: false # <- IMPORTANT: no Redis in your stack
|
||||
|
||||
# Drop unsupported provider-specific params instead of erroring
|
||||
drop_params: true
|
||||
|
||||
# If you later enable caching, add cache_params here with Redis config.
|
||||
# cache_params:
|
||||
# type: redis
|
||||
# host: litellm-redis
|
||||
# port: 6379
|
||||
# password: os.environ/REDIS_PASSWORD
|
||||
|
||||
# -----------------------------------------------------------------------------#
|
||||
# Router settings (all in-memory, single-node)
|
||||
# -----------------------------------------------------------------------------#
|
||||
router_settings:
|
||||
routing_strategy: "simple-shuffle" # simple-shuffle | least-busy | latency-based-routing
|
||||
num_retries: 1
|
||||
timeout: 30
|
||||
retry_after: 5
|
||||
|
||||
# No Redis routing / transaction buffer on this small single-node setup
|
||||
# redis_host: litellm-redis
|
||||
# redis_port: 6379
|
||||
# redis_password: os.environ/REDIS_PASSWORD
|
||||
|
||||
# -----------------------------------------------------------------------------#
|
||||
# General settings (DB-backed UI, no Redis)
|
||||
# -----------------------------------------------------------------------------#
|
||||
general_settings:
|
||||
# Admin master key (used for UI + admin API).
|
||||
# Must match LITELLM_MASTER_KEY from .env and start with "sk-".
|
||||
master_key: os.environ/LITELLM_MASTER_KEY
|
||||
|
||||
# Postgres connection (from DATABASE_URL env var).
|
||||
# Required for:
|
||||
# - /ui login and user management
|
||||
# - virtual keys
|
||||
# - spend tracking
|
||||
database_url: os.environ/DATABASE_URL
|
||||
|
||||
# Store model definitions in DB (used by the UI)
|
||||
store_model_in_db: true
|
||||
|
||||
# Telemetry & metrics
|
||||
telemetry: false
|
||||
enable_prometheus: true
|
||||
|
||||
# On a small box, you may want to reduce some DB-heavy features.
|
||||
# You can still track spend, but disable some advanced cost mgmt.
|
||||
disable_end_user_cost_tracking: false
|
||||
disable_reset_budget: false
|
||||
|
||||
# No Redis transaction buffer here
|
||||
# use_redis_transaction_buffer: false
|
||||
@@ -0,0 +1,18 @@
|
||||
# =============================================================================
|
||||
# Prometheus Configuration — scrapes LiteLLM /metrics
|
||||
# =============================================================================
|
||||
global:
|
||||
scrape_interval: 15s # how often to scrape targets
|
||||
evaluation_interval: 15s # how often to evaluate rules
|
||||
scrape_timeout: 10s
|
||||
|
||||
scrape_configs:
|
||||
- job_name: "litellm"
|
||||
static_configs:
|
||||
- targets: ["litellm:4000"] # internal service name + port
|
||||
metrics_path: "/metrics"
|
||||
scrape_interval: 15s
|
||||
|
||||
- job_name: "prometheus"
|
||||
static_configs:
|
||||
- targets: ["localhost:9090"]
|
||||
@@ -0,0 +1,99 @@
|
||||
name: litellm-stack
|
||||
|
||||
services:
|
||||
litellm:
|
||||
image: ghcr.io/berriai/litellm:main-stable
|
||||
container_name: litellm
|
||||
restart: unless-stopped
|
||||
|
||||
# Caddy will reverse proxy to this container on caddy_caddy_net.
|
||||
# For local debugging without Caddy, you can uncomment:
|
||||
# ports:
|
||||
# - "4000:4000"
|
||||
|
||||
command: ["--config", "/app/config.yaml", "--port", "4000", "--num_workers", "1"]
|
||||
|
||||
env_file:
|
||||
- .env
|
||||
|
||||
environment:
|
||||
# Logging
|
||||
LITELLM_LOG: "${LITELLM_LOG:-INFO}"
|
||||
|
||||
# DB connection string used by LiteLLM for UI, users, virtual keys, spend, etc.
|
||||
# LiteLLM reads this via general_settings.database_url: os.environ/DATABASE_URL
|
||||
DATABASE_URL: "postgresql://${POSTGRES_USER:-llmproxy}:${POSTGRES_PASSWORD:-dbpassword9090}@litellm-db:5432/${POSTGRES_DB:-litellm}"
|
||||
|
||||
volumes:
|
||||
- ./config/litellm_config.yaml:/app/config.yaml:ro
|
||||
- litellm_logs:/app/logs
|
||||
|
||||
depends_on:
|
||||
litellm-db:
|
||||
condition: service_healthy
|
||||
|
||||
networks:
|
||||
- caddy_caddy_net
|
||||
|
||||
healthcheck:
|
||||
# LiteLLM image is minimal (no curl/wget) → use Python
|
||||
test: ["CMD-SHELL", "python -c \"import urllib.request; urllib.request.urlopen('http://localhost:4000/health/liveliness')\""]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 5
|
||||
start_period: 120s
|
||||
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "20m"
|
||||
max-file: "3"
|
||||
|
||||
litellm-db:
|
||||
image: postgres:16-alpine
|
||||
container_name: litellm-db
|
||||
restart: unless-stopped
|
||||
|
||||
environment:
|
||||
POSTGRES_DB: "${POSTGRES_DB:-litellm}"
|
||||
POSTGRES_USER: "${POSTGRES_USER:-llmproxy}"
|
||||
POSTGRES_PASSWORD: "${POSTGRES_PASSWORD:-dbpassword9090}"
|
||||
PGDATA: /var/lib/postgresql/data/pgdata
|
||||
|
||||
volumes:
|
||||
- litellm_postgres_data:/var/lib/postgresql/data
|
||||
|
||||
# No host port mapped → only other containers on the network can reach it.
|
||||
# For debugging from host, you can temporarily uncomment:
|
||||
# ports:
|
||||
# - "127.0.0.1:5432:5432"
|
||||
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -d ${POSTGRES_DB:-litellm} -U ${POSTGRES_USER:-llmproxy}"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 10
|
||||
start_period: 10s
|
||||
|
||||
networks:
|
||||
- caddy_caddy_net
|
||||
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
|
||||
networks:
|
||||
# External network created/owned by your Caddy stack
|
||||
caddy_caddy_net:
|
||||
external: true
|
||||
|
||||
volumes:
|
||||
litellm_logs:
|
||||
name: litellm_logs
|
||||
driver: local
|
||||
|
||||
litellm_postgres_data:
|
||||
name: litellm_postgres_data
|
||||
driver: local
|
||||
Reference in New Issue
Block a user