30 lines
996 B
YAML
30 lines
996 B
YAML
|
|
services:
|
||
|
|
ollama:
|
||
|
|
image: ollama/ollama:latest
|
||
|
|
container_name: ollama
|
||
|
|
restart: unless-stopped
|
||
|
|
environment:
|
||
|
|
# Ensures Ollama listens on all network interfaces so Caddy can route to it
|
||
|
|
- OLLAMA_HOST=0.0.0.0
|
||
|
|
# Prevents Ollama from trying to load models into VRAM, forcing CPU mode
|
||
|
|
- OLLAMA_NOHIST=1
|
||
|
|
# Optional: Tune these based on your server's available resources
|
||
|
|
- OLLAMA_NUM_PARALLEL=1 # Process one request at a time to prevent CPU thrashing
|
||
|
|
- OLLAMA_KEEP_ALIVE=5m # Keep the model in memory for 5 mins after a request
|
||
|
|
- OLLAMA_CONTEXT_LENGTH=1024 # Limit the context length to reduce memory usage
|
||
|
|
cpus: 0.50
|
||
|
|
mem_limit: 2500m
|
||
|
|
mem_reservation: 2g
|
||
|
|
volumes:
|
||
|
|
# Persistent storage for downloaded LLM models
|
||
|
|
- ollama_data:/root/.ollama
|
||
|
|
networks:
|
||
|
|
- caddy_caddy_net
|
||
|
|
|
||
|
|
networks:
|
||
|
|
caddy_caddy_net :
|
||
|
|
external: true
|
||
|
|
|
||
|
|
volumes:
|
||
|
|
ollama_data:
|
||
|
|
name: ollama_data
|