klaster/apps/librechat/release.yaml

---
apiVersion: source.toolkit.fluxcd.io/v1
kind: HelmRepository
metadata:
  name: dynomite567-charts
  namespace: librechat
spec:
  interval: 24h
  url: https://dynomite567.github.io/helm-charts/
---
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
  name: librechat
  namespace: librechat
spec:
  interval: 30m
  chart:
    spec:
      chart: librechat
      version: 1.9.1
      sourceRef:
        kind: HelmRepository
        name: dynomite567-charts
  values:
    global:
      librechat:
        existingSecretName: librechat
    librechat:
      configEnv:
        PLUGIN_MODELS: null
        ALLOW_REGISTRATION: "false"
        TRUST_PROXY: "1"
        DOMAIN_CLIENT: https://librechat.lumpiasty.xyz
        SEARCH: "true"
      existingSecretName: librechat
      configYamlContent: |
        version: 1.0.3

        endpoints:
          custom:
            - name: "Llama.cpp"
              apiKey: "llama"
              baseURL: "http://llama.llama.svc.cluster.local:11434/v1"
              models:
                default: [
                  "DeepSeek-R1-0528-Qwen3-8B-GGUF",
                  "Qwen3-8B-GGUF",
                  "Qwen3-8B-GGUF-no-thinking",
                  "gemma3n-e4b",
                  "gemma3-12b",
                  "gemma3-12b-q2",
                  "gemma3-12b-novision",
                  "gemma3-4b",
                  "gemma3-4b-novision",
                  "Qwen3-4B-Thinking-2507",
                  "Qwen3-4B-Thinking-2507-long-ctx",
                  "Qwen2.5-VL-7B-Instruct-GGUF",
                  "Qwen2.5-VL-32B-Instruct-GGUF-IQ1_S",
                  "Qwen2.5-VL-32B-Instruct-GGUF-Q2_K_L",
                  "Qwen3-VL-2B-Instruct-GGUF",
                  "Qwen3-VL-2B-Instruct-GGUF-unslothish",
                  "Qwen3-VL-2B-Thinking-GGUF",
                  "Qwen3-VL-4B-Instruct-GGUF",
                  "Qwen3-VL-4B-Instruct-GGUF-unslothish",
                  "Qwen3-VL-4B-Thinking-GGUF",
                  "Qwen3-VL-8B-Instruct-GGUF",
                  "Qwen3-VL-8B-Instruct-GGUF-unslothish",
                  "Qwen3-VL-8B-Thinking-GGUF",
                  "Huihui-Qwen3-VL-8B-Instruct-abliterated-GGUF",
                  "Huihui-Qwen3-VL-8B-Thinking-abliterated-GGUF"
                ]
              titleConvo: true
              titleModel: "gemma3-4b-novision"
              summarize: false
              summaryModel: "gemma3-4b-novision"
              forcePrompt: false
              modelDisplayLabel: "Llama.cpp"

              # ✨ IMPORTANT: let llama-swap/llama-server own all these
              dropParams:
                - "temperature"
                - "top_p"
                - "top_k"
                - "presence_penalty"
                - "frequency_penalty"
                - "stop"
                - "max_tokens"
      imageVolume:
        enabled: true
        size: 10G
        accessModes: ReadWriteOnce
        storageClassName: mayastor-single-hdd
    ingress:
      enabled: true
      className: nginx-ingress
      annotations:
        cert-manager.io/cluster-issuer: letsencrypt
        nginx.ingress.kubernetes.io/proxy-body-size: "0"
        nginx.ingress.kubernetes.io/proxy-buffering: "false"
        nginx.ingress.kubernetes.io/proxy-read-timeout: 30m
      hosts:
        - host: librechat.lumpiasty.xyz
          paths:
            - path: /
              pathType: ImplementationSpecific
      tls:
        - hosts:
            - librechat.lumpiasty.xyz
          secretName: librechat-ingress

    mongodb:
      persistence:
        storageClass: mayastor-single-hdd

    meilisearch:
      persistence:
        storageClass: mayastor-single-hdd
      auth:
        existingMasterKeySecret: librechat