services:
  ollama:
    image: ollama/ollama:latest
    container_name: ollama
    restart: unless-stopped
    environment:
      # Ensures Ollama listens on all network interfaces so Caddy can route to it
      - OLLAMA_HOST=0.0.0.0
      # Prevents Ollama from trying to load models into VRAM, forcing CPU mode
      - OLLAMA_NOHIST=1 
      # Optional: Tune these based on your server's available resources
      - OLLAMA_NUM_PARALLEL=1   # Process one request at a time to prevent CPU thrashing
      - OLLAMA_KEEP_ALIVE=5m    # Keep the model in memory for 5 mins after a request
      - OLLAMA_CONTEXT_LENGTH=1024 # Limit the context length to reduce memory usage
    cpus: 0.50  
    mem_limit: 2500m
    mem_reservation: 2g
    volumes:
      # Persistent storage for downloaded LLM models
      - ollama_data:/root/.ollama
    networks:
      - caddy_caddy_net	

networks:
  caddy_caddy_net	:
    external: true

volumes:
  ollama_data:
    name: ollama_data