mirror of
https://github.com/runyanjake/olomana.git
synced 2026-03-26 05:53:17 -07:00
39 lines
1.0 KiB
YAML
39 lines
1.0 KiB
YAML
services:
|
|
llama-cpp:
|
|
image: ghcr.io/ggml-org/llama.cpp:server-cuda
|
|
container_name: llama-cpp
|
|
restart: unless-stopped
|
|
networks:
|
|
- traefik
|
|
volumes:
|
|
- /pwspool/software/llama-cpp/models:/models
|
|
# We moved your environment variables here to guarantee they are applied
|
|
command:
|
|
- "--model"
|
|
- "/models/Qwen3.5-35B-A3B-UD-IQ2_XXS.gguf"
|
|
- "--host"
|
|
- "0.0.0.0"
|
|
- "--port"
|
|
- "8080"
|
|
- "--n-gpu-layers"
|
|
- "99"
|
|
- "--ctx-size"
|
|
- "8192"
|
|
deploy:
|
|
resources:
|
|
reservations:
|
|
devices:
|
|
- driver: nvidia
|
|
count: all
|
|
capabilities: [gpu]
|
|
labels:
|
|
- "traefik.enable=true"
|
|
- "traefik.http.routers.llama.rule=Host(`llm.whitney.rip`)"
|
|
- "traefik.http.routers.llama.entrypoints=websecure"
|
|
- "traefik.http.routers.llama.tls.certresolver=lets-encrypt"
|
|
- "traefik.http.services.llama.loadbalancer.server.port=8080"
|
|
|
|
networks:
|
|
traefik:
|
|
external: true
|