1 Commits

Author SHA1 Message Date
d20647c855 Update renovate/renovate Docker tag to v43.39.2 2026-02-26 00:00:50 +00:00
6 changed files with 29 additions and 213 deletions

View File

@@ -1,6 +1,4 @@
# yaml-language-server: $schema=https://raw.githubusercontent.com/mostlygeek/llama-swap/refs/heads/main/config-schema.json
healthCheckTimeout: 600 healthCheckTimeout: 600
logToStdout: "both" # proxy and upstream
models: models:
"DeepSeek-R1-0528-Qwen3-8B-GGUF": "DeepSeek-R1-0528-Qwen3-8B-GGUF":
@@ -8,6 +6,7 @@ models:
cmd: | cmd: |
/app/llama-server /app/llama-server
-hf unsloth/DeepSeek-R1-0528-Qwen3-8B-GGUF:Q4_K_M -hf unsloth/DeepSeek-R1-0528-Qwen3-8B-GGUF:Q4_K_M
--n-gpu-layers 37
--ctx-size 16384 --ctx-size 16384
--no-warmup --no-warmup
--port ${PORT} --port ${PORT}
@@ -17,6 +16,7 @@ models:
cmd: | cmd: |
/app/llama-server /app/llama-server
-hf unsloth/Qwen3-8B-GGUF:Q4_K_M -hf unsloth/Qwen3-8B-GGUF:Q4_K_M
--n-gpu-layers 37
--ctx-size 16384 --ctx-size 16384
--no-warmup --no-warmup
--port ${PORT} --port ${PORT}
@@ -26,6 +26,7 @@ models:
cmd: | cmd: |
/app/llama-server /app/llama-server
-hf unsloth/Qwen3-8B-GGUF:Q4_K_M -hf unsloth/Qwen3-8B-GGUF:Q4_K_M
--n-gpu-layers 37
--ctx-size 16384 --ctx-size 16384
--jinja --jinja
--chat-template-file /config/qwen_nothink_chat_template.jinja --chat-template-file /config/qwen_nothink_chat_template.jinja
@@ -38,6 +39,7 @@ models:
/app/llama-server /app/llama-server
-hf unsloth/gemma-3n-E4B-it-GGUF:UD-Q4_K_XL -hf unsloth/gemma-3n-E4B-it-GGUF:UD-Q4_K_XL
--ctx-size 16384 --ctx-size 16384
--n-gpu-layers 99
--seed 3407 --seed 3407
--prio 2 --prio 2
--temp 1.0 --temp 1.0
@@ -54,6 +56,7 @@ models:
/app/llama-server /app/llama-server
-hf unsloth/gemma-3-12b-it-GGUF:Q4_K_M -hf unsloth/gemma-3-12b-it-GGUF:Q4_K_M
--ctx-size 16384 --ctx-size 16384
--n-gpu-layers 99
--prio 2 --prio 2
--temp 1.0 --temp 1.0
--repeat-penalty 1.0 --repeat-penalty 1.0
@@ -69,6 +72,7 @@ models:
/app/llama-server /app/llama-server
-hf unsloth/gemma-3-12b-it-GGUF:Q4_K_M -hf unsloth/gemma-3-12b-it-GGUF:Q4_K_M
--ctx-size 16384 --ctx-size 16384
--n-gpu-layers 99
--prio 2 --prio 2
--temp 1.0 --temp 1.0
--repeat-penalty 1.0 --repeat-penalty 1.0
@@ -85,6 +89,7 @@ models:
/app/llama-server /app/llama-server
-hf unsloth/gemma-3-12b-it-GGUF:Q2_K_L -hf unsloth/gemma-3-12b-it-GGUF:Q2_K_L
--ctx-size 16384 --ctx-size 16384
--n-gpu-layers 99
--prio 2 --prio 2
--temp 1.0 --temp 1.0
--repeat-penalty 1.0 --repeat-penalty 1.0
@@ -100,6 +105,7 @@ models:
/app/llama-server /app/llama-server
-hf unsloth/gemma-3-4b-it-GGUF:Q4_K_M -hf unsloth/gemma-3-4b-it-GGUF:Q4_K_M
--ctx-size 16384 --ctx-size 16384
--n-gpu-layers 99
--prio 2 --prio 2
--temp 1.0 --temp 1.0
--repeat-penalty 1.0 --repeat-penalty 1.0
@@ -115,6 +121,7 @@ models:
/app/llama-server /app/llama-server
-hf unsloth/gemma-3-4b-it-GGUF:Q4_K_M -hf unsloth/gemma-3-4b-it-GGUF:Q4_K_M
--ctx-size 16384 --ctx-size 16384
--n-gpu-layers 99
--prio 2 --prio 2
--temp 1.0 --temp 1.0
--repeat-penalty 1.0 --repeat-penalty 1.0
@@ -130,6 +137,7 @@ models:
cmd: | cmd: |
/app/llama-server /app/llama-server
-hf unsloth/Qwen3-4B-Thinking-2507-GGUF:Q4_K_M -hf unsloth/Qwen3-4B-Thinking-2507-GGUF:Q4_K_M
--n-gpu-layers 99
--ctx-size 16384 --ctx-size 16384
--predict 8192 --predict 8192
--temp 0.6 --temp 0.6
@@ -145,6 +153,7 @@ models:
cmd: | cmd: |
/app/llama-server /app/llama-server
-hf unsloth/Qwen3-4B-Thinking-2507-GGUF:Q4_K_M -hf unsloth/Qwen3-4B-Thinking-2507-GGUF:Q4_K_M
--n-gpu-layers 99
--ctx-size 262144 --ctx-size 262144
--predict 81920 --predict 81920
--temp 0.6 --temp 0.6
@@ -163,6 +172,7 @@ models:
cmd: | cmd: |
/app/llama-server /app/llama-server
-hf unsloth/Qwen3-4B-Instruct-2507-GGUF:Q4_K_M -hf unsloth/Qwen3-4B-Instruct-2507-GGUF:Q4_K_M
--n-gpu-layers 99
--ctx-size 16384 --ctx-size 16384
--predict 8192 --predict 8192
--temp 0.7 --temp 0.7
@@ -178,6 +188,7 @@ models:
cmd: | cmd: |
/app/llama-server /app/llama-server
-hf unsloth/Qwen3-4B-Instruct-2507-GGUF:Q4_K_M -hf unsloth/Qwen3-4B-Instruct-2507-GGUF:Q4_K_M
--n-gpu-layers 99
--ctx-size 262144 --ctx-size 262144
--predict 81920 --predict 81920
--temp 0.7 --temp 0.7
@@ -196,6 +207,7 @@ models:
cmd: | cmd: |
/app/llama-server /app/llama-server
-hf unsloth/Qwen2.5-VL-32B-Instruct-GGUF:IQ1_S -hf unsloth/Qwen2.5-VL-32B-Instruct-GGUF:IQ1_S
--n-gpu-layers 99
--ctx-size 16384 --ctx-size 16384
--predict 8192 --predict 8192
--temp 0.7 --temp 0.7
@@ -211,6 +223,7 @@ models:
cmd: | cmd: |
/app/llama-server /app/llama-server
-hf unsloth/Qwen2.5-VL-32B-Instruct-GGUF:Q2_K_L -hf unsloth/Qwen2.5-VL-32B-Instruct-GGUF:Q2_K_L
--n-gpu-layers 99
--ctx-size 16384 --ctx-size 16384
--predict 8192 --predict 8192
--temp 0.7 --temp 0.7
@@ -226,6 +239,7 @@ models:
cmd: | cmd: |
/app/llama-server /app/llama-server
-hf unsloth/Qwen2.5-VL-7B-Instruct-GGUF:Q4_K_M -hf unsloth/Qwen2.5-VL-7B-Instruct-GGUF:Q4_K_M
--n-gpu-layers 37
--ctx-size 16384 --ctx-size 16384
--predict 8192 --predict 8192
--temp 0.7 --temp 0.7
@@ -241,6 +255,7 @@ models:
cmd: | cmd: |
/app/llama-server /app/llama-server
-hf Qwen/Qwen3-VL-2B-Instruct-GGUF:Q8_0 -hf Qwen/Qwen3-VL-2B-Instruct-GGUF:Q8_0
--n-gpu-layers 99
--ctx-size 12288 --ctx-size 12288
--predict 4096 --predict 4096
--flash-attn auto --flash-attn auto
@@ -260,6 +275,7 @@ models:
cmd: | cmd: |
/app/llama-server /app/llama-server
-hf Qwen/Qwen3-VL-4B-Instruct-GGUF:Q8_0 -hf Qwen/Qwen3-VL-4B-Instruct-GGUF:Q8_0
--n-gpu-layers 99
--ctx-size 12288 --ctx-size 12288
--predict 4096 --predict 4096
--flash-attn auto --flash-attn auto
@@ -279,6 +295,7 @@ models:
cmd: | cmd: |
/app/llama-server /app/llama-server
-hf Qwen/Qwen3-VL-8B-Instruct-GGUF:Q4_K_M -hf Qwen/Qwen3-VL-8B-Instruct-GGUF:Q4_K_M
--n-gpu-layers 99
--ctx-size 12288 --ctx-size 12288
--predict 4096 --predict 4096
--flash-attn auto --flash-attn auto
@@ -298,6 +315,7 @@ models:
cmd: | cmd: |
/app/llama-server /app/llama-server
-hf Qwen/Qwen3-VL-2B-Instruct-GGUF:Q8_0 -hf Qwen/Qwen3-VL-2B-Instruct-GGUF:Q8_0
--n-gpu-layers 99
--ctx-size 12288 --ctx-size 12288
--predict 4096 --predict 4096
--flash-attn auto --flash-attn auto
@@ -317,6 +335,7 @@ models:
cmd: | cmd: |
/app/llama-server /app/llama-server
-hf Qwen/Qwen3-VL-4B-Instruct-GGUF:Q8_0 -hf Qwen/Qwen3-VL-4B-Instruct-GGUF:Q8_0
--n-gpu-layers 99
--ctx-size 12288 --ctx-size 12288
--predict 4096 --predict 4096
--flash-attn auto --flash-attn auto
@@ -336,6 +355,7 @@ models:
cmd: | cmd: |
/app/llama-server /app/llama-server
-hf Qwen/Qwen3-VL-8B-Instruct-GGUF:Q4_K_M -hf Qwen/Qwen3-VL-8B-Instruct-GGUF:Q4_K_M
--n-gpu-layers 99
--ctx-size 12288 --ctx-size 12288
--predict 4096 --predict 4096
--flash-attn auto --flash-attn auto
@@ -355,6 +375,7 @@ models:
cmd: | cmd: |
/app/llama-server /app/llama-server
-hf Qwen/Qwen3-VL-2B-Thinking-GGUF:Q8_0 -hf Qwen/Qwen3-VL-2B-Thinking-GGUF:Q8_0
--n-gpu-layers 99
--ctx-size 12288 --ctx-size 12288
--predict 4096 --predict 4096
--flash-attn auto --flash-attn auto
@@ -373,6 +394,7 @@ models:
cmd: | cmd: |
/app/llama-server /app/llama-server
-hf Qwen/Qwen3-VL-4B-Thinking-GGUF:Q4_K_M -hf Qwen/Qwen3-VL-4B-Thinking-GGUF:Q4_K_M
--n-gpu-layers 99
--ctx-size 12288 --ctx-size 12288
--predict 4096 --predict 4096
--flash-attn auto --flash-attn auto
@@ -391,6 +413,7 @@ models:
cmd: | cmd: |
/app/llama-server /app/llama-server
-hf Qwen/Qwen3-VL-8B-Thinking-GGUF:Q4_K_M -hf Qwen/Qwen3-VL-8B-Thinking-GGUF:Q4_K_M
--n-gpu-layers 99
--ctx-size 12288 --ctx-size 12288
--predict 4096 --predict 4096
--flash-attn auto --flash-attn auto
@@ -409,6 +432,7 @@ models:
cmd: | cmd: |
/app/llama-server /app/llama-server
-hf noctrex/Huihui-Qwen3-VL-8B-Instruct-abliterated-GGUF:Q6_K -hf noctrex/Huihui-Qwen3-VL-8B-Instruct-abliterated-GGUF:Q6_K
--n-gpu-layers 99
--ctx-size 12288 --ctx-size 12288
--predict 4096 --predict 4096
--flash-attn auto --flash-attn auto
@@ -428,6 +452,7 @@ models:
cmd: | cmd: |
/app/llama-server /app/llama-server
-hf noctrex/Huihui-Qwen3-VL-8B-Thinking-abliterated-GGUF:Q6_K -hf noctrex/Huihui-Qwen3-VL-8B-Thinking-abliterated-GGUF:Q6_K
--n-gpu-layers 99
--ctx-size 12288 --ctx-size 12288
--predict 4096 --predict 4096
--flash-attn auto --flash-attn auto

View File

@@ -30,7 +30,7 @@ spec:
protocol: TCP protocol: TCP
volumeMounts: volumeMounts:
- name: models - name: models
mountPath: /root/.cache mountPath: /app/.cache
- mountPath: /dev/kfd - mountPath: /dev/kfd
name: kfd name: kfd
- mountPath: /dev/dri - mountPath: /dev/dri

View File

@@ -15,7 +15,7 @@ spec:
- name: renovate - name: renovate
# Update this to the latest available and then enable Renovate on # Update this to the latest available and then enable Renovate on
# the manifest # the manifest
image: renovate/renovate:43.43.0-full image: renovate/renovate:43.39.2-full
envFrom: envFrom:
- secretRef: - secretRef:
name: renovate-gitea-token name: renovate-gitea-token

View File

@@ -1,200 +0,0 @@
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: coredns
namespace: kube-system
labels:
k8s-app: kube-dns
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: system:coredns
labels:
k8s-app: kube-dns
rules:
- apiGroups: [""]
resources:
- endpoints
- services
- pods
- namespaces
verbs: ["list", "watch"]
- apiGroups: ["discovery.k8s.io"]
resources: ["endpointslices"]
verbs: ["list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: system:coredns
labels:
k8s-app: kube-dns
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: system:coredns
subjects:
- kind: ServiceAccount
name: coredns
namespace: kube-system
---
apiVersion: v1
kind: ConfigMap
metadata:
name: coredns
namespace: kube-system
data:
Corefile: |-
.:53 {
errors
health {
lameduck 5s
}
ready
log . {
class all
}
prometheus :9153
# Return NODATA for AAAA on selected domains to force IPv4.
template IN AAAA {
match "(^|\.)huggingface\.co\.$"
rcode NOERROR
fallthrough
}
kubernetes homelab.lumpiasty.xyz cluster.local in-addr.arpa ip6.arpa {
pods insecure
fallthrough in-addr.arpa ip6.arpa
ttl 30
}
forward . /etc/resolv.conf {
max_concurrent 1000
}
cache 30 {
disable success cluster.local
disable denial cluster.local
}
loop
reload
loadbalance
}
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: coredns
namespace: kube-system
labels:
k8s-app: kube-dns
kubernetes.io/name: CoreDNS
spec:
replicas: 1
selector:
matchLabels:
k8s-app: kube-dns
template:
metadata:
labels:
k8s-app: kube-dns
spec:
priorityClassName: system-cluster-critical
serviceAccountName: coredns
tolerations:
- key: node-role.kubernetes.io/control-plane
operator: Exists
effect: NoSchedule
nodeSelector:
kubernetes.io/os: linux
containers:
- name: coredns
image: registry.k8s.io/coredns/coredns:v1.14.1
imagePullPolicy: IfNotPresent
args: ["-conf", "/etc/coredns/Corefile"]
ports:
- containerPort: 53
name: dns
protocol: UDP
- containerPort: 53
name: dns-tcp
protocol: TCP
- containerPort: 9153
name: metrics
protocol: TCP
livenessProbe:
httpGet:
path: /health
port: 8080
scheme: HTTP
initialDelaySeconds: 60
timeoutSeconds: 5
successThreshold: 1
failureThreshold: 5
periodSeconds: 10
readinessProbe:
httpGet:
path: /ready
port: 8181
scheme: HTTP
timeoutSeconds: 1
successThreshold: 1
failureThreshold: 3
periodSeconds: 10
resources:
limits:
memory: 170Mi
requests:
cpu: 0
memory: 70Mi
securityContext:
allowPrivilegeEscalation: false
capabilities:
add:
- NET_BIND_SERVICE
drop:
- ALL
readOnlyRootFilesystem: true
volumeMounts:
- name: config-volume
mountPath: /etc/coredns
readOnly: true
dnsPolicy: Default
volumes:
- name: config-volume
configMap:
name: coredns
items:
- key: Corefile
path: Corefile
---
apiVersion: v1
kind: Service
metadata:
name: kube-dns
namespace: kube-system
labels:
k8s-app: kube-dns
kubernetes.io/name: CoreDNS
spec:
type: ClusterIP
clusterIP: 10.43.0.10
clusterIPs:
- 10.43.0.10
- 2001:470:61a3:300::a
ipFamilyPolicy: RequireDualStack
ipFamilies:
- IPv4
- IPv6
selector:
k8s-app: kube-dns
ports:
- name: dns
port: 53
protocol: UDP
targetPort: 53
- name: dns-tcp
port: 53
protocol: TCP
targetPort: 53

View File

@@ -4,7 +4,6 @@ resources:
- controllers/k8up-crd-4.8.3.yaml - controllers/k8up-crd-4.8.3.yaml
- controllers/cilium.yaml - controllers/cilium.yaml
- controllers/nginx-ingress.yaml - controllers/nginx-ingress.yaml
- controllers/coredns.yaml
- controllers/dns-public.yaml - controllers/dns-public.yaml
- controllers/cert-manager.yaml - controllers/cert-manager.yaml
- controllers/cert-manager-webhook-ovh.yaml - controllers/cert-manager-webhook-ovh.yaml

View File

@@ -14,15 +14,7 @@ machine:
hostDNS: hostDNS:
forwardKubeDNSToHost: false forwardKubeDNSToHost: false
kubelet:
clusterDNS:
- 10.43.0.10
- 2001:470:61a3:300::a
cluster: cluster:
# We're configuring CoreDNS ourselves, so disable the default one
coreDNS:
disabled: true
network: network:
# Likely redundant, we use Cilium as IPAM with their CRDs # Likely redundant, we use Cilium as IPAM with their CRDs
podSubnets: podSubnets: