dockercompose/ollama/docker-compose.yml

services:
  ollama:
    image: ollama/ollama:latest
    container_name: ollama
    restart: unless-stopped
    environment:
      # Ensures Ollama listens on all network interfaces so Caddy can route to it
      - OLLAMA_HOST=0.0.0.0
      # Prevents Ollama from trying to load models into VRAM, forcing CPU mode
      - OLLAMA_NOHIST=1 
      # Optional: Tune these based on your server's available resources
      - OLLAMA_NUM_PARALLEL=1   # Process one request at a time to prevent CPU thrashing
      - OLLAMA_KEEP_ALIVE=5m    # Keep the model in memory for 5 mins after a request
      - OLLAMA_CONTEXT_LENGTH=1024 # Limit the context length to reduce memory usage
    cpus: 0.50  
    mem_limit: 2500m
    mem_reservation: 2g
    volumes:
      # Persistent storage for downloaded LLM models
      - ollama_data:/root/.ollama
    networks:
      - caddy_caddy_net	

networks:
  caddy_caddy_net	:
    external: true

volumes:
  ollama_data:
    name: ollama_data
first commit 2026-04-05 15:07:53 +08:00			`services:`
			`ollama:`
			`image: ollama/ollama:latest`
			`container_name: ollama`
			`restart: unless-stopped`
			`environment:`
			`# Ensures Ollama listens on all network interfaces so Caddy can route to it`
			`- OLLAMA_HOST=0.0.0.0`
			`# Prevents Ollama from trying to load models into VRAM, forcing CPU mode`
			`- OLLAMA_NOHIST=1`
			`# Optional: Tune these based on your server's available resources`
			`- OLLAMA_NUM_PARALLEL=1 # Process one request at a time to prevent CPU thrashing`
			`- OLLAMA_KEEP_ALIVE=5m # Keep the model in memory for 5 mins after a request`
			`- OLLAMA_CONTEXT_LENGTH=1024 # Limit the context length to reduce memory usage`
			`cpus: 0.50`
			`mem_limit: 2500m`
			`mem_reservation: 2g`
			`volumes:`
			`# Persistent storage for downloaded LLM models`
			`- ollama_data:/root/.ollama`
			`networks:`
			`- caddy_caddy_net`

			`networks:`
			`caddy_caddy_net :`
			`external: true`

			`volumes:`
			`ollama_data:`
			`name: ollama_data`