Compare commits
7 Commits
d20647c855
...
afc01e19d6
| Author | SHA1 | Date | |
|---|---|---|---|
| afc01e19d6 | |||
| 1adabe92a3 | |||
| 08473fdeae | |||
| c14257842a | |||
| d053342234 | |||
| 2dbd964c28 | |||
| 7712aac0f5 |
@@ -1,4 +1,6 @@
|
||||
# yaml-language-server: $schema=https://raw.githubusercontent.com/mostlygeek/llama-swap/refs/heads/main/config-schema.json
|
||||
healthCheckTimeout: 600
|
||||
logToStdout: "both" # proxy and upstream
|
||||
|
||||
models:
|
||||
"DeepSeek-R1-0528-Qwen3-8B-GGUF":
|
||||
@@ -6,7 +8,6 @@ models:
|
||||
cmd: |
|
||||
/app/llama-server
|
||||
-hf unsloth/DeepSeek-R1-0528-Qwen3-8B-GGUF:Q4_K_M
|
||||
--n-gpu-layers 37
|
||||
--ctx-size 16384
|
||||
--no-warmup
|
||||
--port ${PORT}
|
||||
@@ -16,7 +17,6 @@ models:
|
||||
cmd: |
|
||||
/app/llama-server
|
||||
-hf unsloth/Qwen3-8B-GGUF:Q4_K_M
|
||||
--n-gpu-layers 37
|
||||
--ctx-size 16384
|
||||
--no-warmup
|
||||
--port ${PORT}
|
||||
@@ -26,7 +26,6 @@ models:
|
||||
cmd: |
|
||||
/app/llama-server
|
||||
-hf unsloth/Qwen3-8B-GGUF:Q4_K_M
|
||||
--n-gpu-layers 37
|
||||
--ctx-size 16384
|
||||
--jinja
|
||||
--chat-template-file /config/qwen_nothink_chat_template.jinja
|
||||
@@ -39,7 +38,6 @@ models:
|
||||
/app/llama-server
|
||||
-hf unsloth/gemma-3n-E4B-it-GGUF:UD-Q4_K_XL
|
||||
--ctx-size 16384
|
||||
--n-gpu-layers 99
|
||||
--seed 3407
|
||||
--prio 2
|
||||
--temp 1.0
|
||||
@@ -56,7 +54,6 @@ models:
|
||||
/app/llama-server
|
||||
-hf unsloth/gemma-3-12b-it-GGUF:Q4_K_M
|
||||
--ctx-size 16384
|
||||
--n-gpu-layers 99
|
||||
--prio 2
|
||||
--temp 1.0
|
||||
--repeat-penalty 1.0
|
||||
@@ -72,7 +69,6 @@ models:
|
||||
/app/llama-server
|
||||
-hf unsloth/gemma-3-12b-it-GGUF:Q4_K_M
|
||||
--ctx-size 16384
|
||||
--n-gpu-layers 99
|
||||
--prio 2
|
||||
--temp 1.0
|
||||
--repeat-penalty 1.0
|
||||
@@ -89,7 +85,6 @@ models:
|
||||
/app/llama-server
|
||||
-hf unsloth/gemma-3-12b-it-GGUF:Q2_K_L
|
||||
--ctx-size 16384
|
||||
--n-gpu-layers 99
|
||||
--prio 2
|
||||
--temp 1.0
|
||||
--repeat-penalty 1.0
|
||||
@@ -105,7 +100,6 @@ models:
|
||||
/app/llama-server
|
||||
-hf unsloth/gemma-3-4b-it-GGUF:Q4_K_M
|
||||
--ctx-size 16384
|
||||
--n-gpu-layers 99
|
||||
--prio 2
|
||||
--temp 1.0
|
||||
--repeat-penalty 1.0
|
||||
@@ -121,7 +115,6 @@ models:
|
||||
/app/llama-server
|
||||
-hf unsloth/gemma-3-4b-it-GGUF:Q4_K_M
|
||||
--ctx-size 16384
|
||||
--n-gpu-layers 99
|
||||
--prio 2
|
||||
--temp 1.0
|
||||
--repeat-penalty 1.0
|
||||
@@ -137,7 +130,6 @@ models:
|
||||
cmd: |
|
||||
/app/llama-server
|
||||
-hf unsloth/Qwen3-4B-Thinking-2507-GGUF:Q4_K_M
|
||||
--n-gpu-layers 99
|
||||
--ctx-size 16384
|
||||
--predict 8192
|
||||
--temp 0.6
|
||||
@@ -153,7 +145,6 @@ models:
|
||||
cmd: |
|
||||
/app/llama-server
|
||||
-hf unsloth/Qwen3-4B-Thinking-2507-GGUF:Q4_K_M
|
||||
--n-gpu-layers 99
|
||||
--ctx-size 262144
|
||||
--predict 81920
|
||||
--temp 0.6
|
||||
@@ -172,7 +163,6 @@ models:
|
||||
cmd: |
|
||||
/app/llama-server
|
||||
-hf unsloth/Qwen3-4B-Instruct-2507-GGUF:Q4_K_M
|
||||
--n-gpu-layers 99
|
||||
--ctx-size 16384
|
||||
--predict 8192
|
||||
--temp 0.7
|
||||
@@ -188,7 +178,6 @@ models:
|
||||
cmd: |
|
||||
/app/llama-server
|
||||
-hf unsloth/Qwen3-4B-Instruct-2507-GGUF:Q4_K_M
|
||||
--n-gpu-layers 99
|
||||
--ctx-size 262144
|
||||
--predict 81920
|
||||
--temp 0.7
|
||||
@@ -207,7 +196,6 @@ models:
|
||||
cmd: |
|
||||
/app/llama-server
|
||||
-hf unsloth/Qwen2.5-VL-32B-Instruct-GGUF:IQ1_S
|
||||
--n-gpu-layers 99
|
||||
--ctx-size 16384
|
||||
--predict 8192
|
||||
--temp 0.7
|
||||
@@ -223,7 +211,6 @@ models:
|
||||
cmd: |
|
||||
/app/llama-server
|
||||
-hf unsloth/Qwen2.5-VL-32B-Instruct-GGUF:Q2_K_L
|
||||
--n-gpu-layers 99
|
||||
--ctx-size 16384
|
||||
--predict 8192
|
||||
--temp 0.7
|
||||
@@ -239,7 +226,6 @@ models:
|
||||
cmd: |
|
||||
/app/llama-server
|
||||
-hf unsloth/Qwen2.5-VL-7B-Instruct-GGUF:Q4_K_M
|
||||
--n-gpu-layers 37
|
||||
--ctx-size 16384
|
||||
--predict 8192
|
||||
--temp 0.7
|
||||
@@ -255,7 +241,6 @@ models:
|
||||
cmd: |
|
||||
/app/llama-server
|
||||
-hf Qwen/Qwen3-VL-2B-Instruct-GGUF:Q8_0
|
||||
--n-gpu-layers 99
|
||||
--ctx-size 12288
|
||||
--predict 4096
|
||||
--flash-attn auto
|
||||
@@ -275,7 +260,6 @@ models:
|
||||
cmd: |
|
||||
/app/llama-server
|
||||
-hf Qwen/Qwen3-VL-4B-Instruct-GGUF:Q8_0
|
||||
--n-gpu-layers 99
|
||||
--ctx-size 12288
|
||||
--predict 4096
|
||||
--flash-attn auto
|
||||
@@ -295,7 +279,6 @@ models:
|
||||
cmd: |
|
||||
/app/llama-server
|
||||
-hf Qwen/Qwen3-VL-8B-Instruct-GGUF:Q4_K_M
|
||||
--n-gpu-layers 99
|
||||
--ctx-size 12288
|
||||
--predict 4096
|
||||
--flash-attn auto
|
||||
@@ -315,7 +298,6 @@ models:
|
||||
cmd: |
|
||||
/app/llama-server
|
||||
-hf Qwen/Qwen3-VL-2B-Instruct-GGUF:Q8_0
|
||||
--n-gpu-layers 99
|
||||
--ctx-size 12288
|
||||
--predict 4096
|
||||
--flash-attn auto
|
||||
@@ -335,7 +317,6 @@ models:
|
||||
cmd: |
|
||||
/app/llama-server
|
||||
-hf Qwen/Qwen3-VL-4B-Instruct-GGUF:Q8_0
|
||||
--n-gpu-layers 99
|
||||
--ctx-size 12288
|
||||
--predict 4096
|
||||
--flash-attn auto
|
||||
@@ -355,7 +336,6 @@ models:
|
||||
cmd: |
|
||||
/app/llama-server
|
||||
-hf Qwen/Qwen3-VL-8B-Instruct-GGUF:Q4_K_M
|
||||
--n-gpu-layers 99
|
||||
--ctx-size 12288
|
||||
--predict 4096
|
||||
--flash-attn auto
|
||||
@@ -375,7 +355,6 @@ models:
|
||||
cmd: |
|
||||
/app/llama-server
|
||||
-hf Qwen/Qwen3-VL-2B-Thinking-GGUF:Q8_0
|
||||
--n-gpu-layers 99
|
||||
--ctx-size 12288
|
||||
--predict 4096
|
||||
--flash-attn auto
|
||||
@@ -394,7 +373,6 @@ models:
|
||||
cmd: |
|
||||
/app/llama-server
|
||||
-hf Qwen/Qwen3-VL-4B-Thinking-GGUF:Q4_K_M
|
||||
--n-gpu-layers 99
|
||||
--ctx-size 12288
|
||||
--predict 4096
|
||||
--flash-attn auto
|
||||
@@ -413,7 +391,6 @@ models:
|
||||
cmd: |
|
||||
/app/llama-server
|
||||
-hf Qwen/Qwen3-VL-8B-Thinking-GGUF:Q4_K_M
|
||||
--n-gpu-layers 99
|
||||
--ctx-size 12288
|
||||
--predict 4096
|
||||
--flash-attn auto
|
||||
@@ -432,7 +409,6 @@ models:
|
||||
cmd: |
|
||||
/app/llama-server
|
||||
-hf noctrex/Huihui-Qwen3-VL-8B-Instruct-abliterated-GGUF:Q6_K
|
||||
--n-gpu-layers 99
|
||||
--ctx-size 12288
|
||||
--predict 4096
|
||||
--flash-attn auto
|
||||
@@ -452,7 +428,6 @@ models:
|
||||
cmd: |
|
||||
/app/llama-server
|
||||
-hf noctrex/Huihui-Qwen3-VL-8B-Thinking-abliterated-GGUF:Q6_K
|
||||
--n-gpu-layers 99
|
||||
--ctx-size 12288
|
||||
--predict 4096
|
||||
--flash-attn auto
|
||||
|
||||
@@ -30,7 +30,7 @@ spec:
|
||||
protocol: TCP
|
||||
volumeMounts:
|
||||
- name: models
|
||||
mountPath: /app/.cache
|
||||
mountPath: /root/.cache
|
||||
- mountPath: /dev/kfd
|
||||
name: kfd
|
||||
- mountPath: /dev/dri
|
||||
|
||||
@@ -15,7 +15,7 @@ spec:
|
||||
- name: renovate
|
||||
# Update this to the latest available and then enable Renovate on
|
||||
# the manifest
|
||||
image: renovate/renovate:43.39.2-full
|
||||
image: renovate/renovate:43.43.0-full
|
||||
envFrom:
|
||||
- secretRef:
|
||||
name: renovate-gitea-token
|
||||
|
||||
200
infra/controllers/coredns.yaml
Normal file
200
infra/controllers/coredns.yaml
Normal file
@@ -0,0 +1,200 @@
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: coredns
|
||||
namespace: kube-system
|
||||
labels:
|
||||
k8s-app: kube-dns
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: system:coredns
|
||||
labels:
|
||||
k8s-app: kube-dns
|
||||
rules:
|
||||
- apiGroups: [""]
|
||||
resources:
|
||||
- endpoints
|
||||
- services
|
||||
- pods
|
||||
- namespaces
|
||||
verbs: ["list", "watch"]
|
||||
- apiGroups: ["discovery.k8s.io"]
|
||||
resources: ["endpointslices"]
|
||||
verbs: ["list", "watch"]
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: system:coredns
|
||||
labels:
|
||||
k8s-app: kube-dns
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: system:coredns
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: coredns
|
||||
namespace: kube-system
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: coredns
|
||||
namespace: kube-system
|
||||
data:
|
||||
Corefile: |-
|
||||
.:53 {
|
||||
errors
|
||||
health {
|
||||
lameduck 5s
|
||||
}
|
||||
ready
|
||||
log . {
|
||||
class all
|
||||
}
|
||||
prometheus :9153
|
||||
|
||||
# Return NODATA for AAAA on selected domains to force IPv4.
|
||||
template IN AAAA {
|
||||
match "(^|\.)huggingface\.co\.$"
|
||||
rcode NOERROR
|
||||
fallthrough
|
||||
}
|
||||
|
||||
kubernetes homelab.lumpiasty.xyz cluster.local in-addr.arpa ip6.arpa {
|
||||
pods insecure
|
||||
fallthrough in-addr.arpa ip6.arpa
|
||||
ttl 30
|
||||
}
|
||||
forward . /etc/resolv.conf {
|
||||
max_concurrent 1000
|
||||
}
|
||||
cache 30 {
|
||||
disable success cluster.local
|
||||
disable denial cluster.local
|
||||
}
|
||||
loop
|
||||
reload
|
||||
loadbalance
|
||||
}
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: coredns
|
||||
namespace: kube-system
|
||||
labels:
|
||||
k8s-app: kube-dns
|
||||
kubernetes.io/name: CoreDNS
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
k8s-app: kube-dns
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
k8s-app: kube-dns
|
||||
spec:
|
||||
priorityClassName: system-cluster-critical
|
||||
serviceAccountName: coredns
|
||||
tolerations:
|
||||
- key: node-role.kubernetes.io/control-plane
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
nodeSelector:
|
||||
kubernetes.io/os: linux
|
||||
containers:
|
||||
- name: coredns
|
||||
image: registry.k8s.io/coredns/coredns:v1.14.1
|
||||
imagePullPolicy: IfNotPresent
|
||||
args: ["-conf", "/etc/coredns/Corefile"]
|
||||
ports:
|
||||
- containerPort: 53
|
||||
name: dns
|
||||
protocol: UDP
|
||||
- containerPort: 53
|
||||
name: dns-tcp
|
||||
protocol: TCP
|
||||
- containerPort: 9153
|
||||
name: metrics
|
||||
protocol: TCP
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
port: 8080
|
||||
scheme: HTTP
|
||||
initialDelaySeconds: 60
|
||||
timeoutSeconds: 5
|
||||
successThreshold: 1
|
||||
failureThreshold: 5
|
||||
periodSeconds: 10
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /ready
|
||||
port: 8181
|
||||
scheme: HTTP
|
||||
timeoutSeconds: 1
|
||||
successThreshold: 1
|
||||
failureThreshold: 3
|
||||
periodSeconds: 10
|
||||
resources:
|
||||
limits:
|
||||
memory: 170Mi
|
||||
requests:
|
||||
cpu: 0
|
||||
memory: 70Mi
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
add:
|
||||
- NET_BIND_SERVICE
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
volumeMounts:
|
||||
- name: config-volume
|
||||
mountPath: /etc/coredns
|
||||
readOnly: true
|
||||
dnsPolicy: Default
|
||||
volumes:
|
||||
- name: config-volume
|
||||
configMap:
|
||||
name: coredns
|
||||
items:
|
||||
- key: Corefile
|
||||
path: Corefile
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: kube-dns
|
||||
namespace: kube-system
|
||||
labels:
|
||||
k8s-app: kube-dns
|
||||
kubernetes.io/name: CoreDNS
|
||||
spec:
|
||||
type: ClusterIP
|
||||
clusterIP: 10.43.0.10
|
||||
clusterIPs:
|
||||
- 10.43.0.10
|
||||
- 2001:470:61a3:300::a
|
||||
ipFamilyPolicy: RequireDualStack
|
||||
ipFamilies:
|
||||
- IPv4
|
||||
- IPv6
|
||||
selector:
|
||||
k8s-app: kube-dns
|
||||
ports:
|
||||
- name: dns
|
||||
port: 53
|
||||
protocol: UDP
|
||||
targetPort: 53
|
||||
- name: dns-tcp
|
||||
port: 53
|
||||
protocol: TCP
|
||||
targetPort: 53
|
||||
@@ -4,6 +4,7 @@ resources:
|
||||
- controllers/k8up-crd-4.8.3.yaml
|
||||
- controllers/cilium.yaml
|
||||
- controllers/nginx-ingress.yaml
|
||||
- controllers/coredns.yaml
|
||||
- controllers/dns-public.yaml
|
||||
- controllers/cert-manager.yaml
|
||||
- controllers/cert-manager-webhook-ovh.yaml
|
||||
|
||||
@@ -14,7 +14,15 @@ machine:
|
||||
hostDNS:
|
||||
forwardKubeDNSToHost: false
|
||||
|
||||
kubelet:
|
||||
clusterDNS:
|
||||
- 10.43.0.10
|
||||
- 2001:470:61a3:300::a
|
||||
|
||||
cluster:
|
||||
# We're configuring CoreDNS ourselves, so disable the default one
|
||||
coreDNS:
|
||||
disabled: true
|
||||
network:
|
||||
# Likely redundant, we use Cilium as IPAM with their CRDs
|
||||
podSubnets:
|
||||
|
||||
Reference in New Issue
Block a user