services: ollama: image: ollama/ollama:latest container_name: ollama restart: unless-stopped environment: # Ensures Ollama listens on all network interfaces so Caddy can route to it - OLLAMA_HOST=0.0.0.0 # Prevents Ollama from trying to load models into VRAM, forcing CPU mode - OLLAMA_NOHIST=1 # Optional: Tune these based on your server's available resources - OLLAMA_NUM_PARALLEL=1 # Process one request at a time to prevent CPU thrashing - OLLAMA_KEEP_ALIVE=5m # Keep the model in memory for 5 mins after a request - OLLAMA_CONTEXT_LENGTH=1024 # Limit the context length to reduce memory usage cpus: 0.50 mem_limit: 2500m mem_reservation: 2g volumes: # Persistent storage for downloaded LLM models - ollama_data:/root/.ollama networks: - caddy_caddy_net networks: caddy_caddy_net : external: true volumes: ollama_data: name: ollama_data