From de3ef465f04ab9cc522b17cf21cba8a43bd08c59 Mon Sep 17 00:00:00 2001 From: Lumpiasty Date: Wed, 23 Jul 2025 22:45:26 +0200 Subject: [PATCH] add qwen3 no thinking --- apps/librechat/release.yaml | 2 +- apps/llama/configmap.yaml | 26 ----- apps/llama/configs/config.yaml | 20 ++++ .../configs/qwen_nothink_chat_template.jinja | 101 ++++++++++++++++++ apps/llama/kustomization.yaml | 7 +- 5 files changed, 128 insertions(+), 28 deletions(-) delete mode 100644 apps/llama/configmap.yaml create mode 100644 apps/llama/configs/config.yaml create mode 100644 apps/llama/configs/qwen_nothink_chat_template.jinja diff --git a/apps/librechat/release.yaml b/apps/librechat/release.yaml index 640aceb..65eade1 100644 --- a/apps/librechat/release.yaml +++ b/apps/librechat/release.yaml @@ -66,7 +66,7 @@ spec: default: [ "DeepSeek-R1-0528-Qwen3-8B-GGUF", "Qwen3-8B-GGUF", - "Qwen3-8B-GGUF-Q6_K" + "Qwen3-8B-GGUF-no-thinking" ] titleConvo: true titleModel: "current_model" diff --git a/apps/llama/configmap.yaml b/apps/llama/configmap.yaml deleted file mode 100644 index 4dd8b0d..0000000 --- a/apps/llama/configmap.yaml +++ /dev/null @@ -1,26 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - namespace: llama - name: llama-swap -data: - config.yaml: | - models: - "DeepSeek-R1-0528-Qwen3-8B-GGUF": - cmd: | - /app/llama-server - -hf unsloth/DeepSeek-R1-0528-Qwen3-8B-GGUF:Q4_K_M - -ngl 37 -c 16384 - --port ${PORT} - "Qwen3-8B-GGUF": - cmd: | - /app/llama-server - -hf unsloth/Qwen3-8B-GGUF:Q4_K_M - -ngl 37 -c 16384 - --port ${PORT} - "Qwen3-8B-GGUF-Q6_K": - cmd: | - /app/llama-server - -hf unsloth/Qwen3-8B-GGUF:Q6_K - -ngl 37 -c 16384 - --port ${PORT} diff --git a/apps/llama/configs/config.yaml b/apps/llama/configs/config.yaml new file mode 100644 index 0000000..6a4ff68 --- /dev/null +++ b/apps/llama/configs/config.yaml @@ -0,0 +1,20 @@ +models: + "DeepSeek-R1-0528-Qwen3-8B-GGUF": + cmd: | + /app/llama-server + -hf unsloth/DeepSeek-R1-0528-Qwen3-8B-GGUF:Q4_K_M + -ngl 37 -c 16384 + --port ${PORT} + "Qwen3-8B-GGUF": + cmd: | + /app/llama-server + -hf unsloth/Qwen3-8B-GGUF:Q4_K_M + -ngl 37 -c 16384 + --port ${PORT} + "Qwen3-8B-GGUF-no-thinking": + cmd: | + /app/llama-server + -hf unsloth/Qwen3-8B-GGUF:Q4_K_M + -ngl 37 -c 16384 + --jinja --chat-template-file /config/qwen_nothink_chat_template.jinja + --port ${PORT} diff --git a/apps/llama/configs/qwen_nothink_chat_template.jinja b/apps/llama/configs/qwen_nothink_chat_template.jinja new file mode 100644 index 0000000..fada20e --- /dev/null +++ b/apps/llama/configs/qwen_nothink_chat_template.jinja @@ -0,0 +1,101 @@ +{%- if not add_generation_prompt is defined %} + {%- set add_generation_prompt = false %} +{%- endif %} +{%- set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true, is_last_user=false) %} +{%- for message in messages %} + {%- if message['role'] == 'system' %} + {%- if ns.is_first_sp %} + {%- set ns.system_prompt = ns.system_prompt + message['content'] %} + {%- set ns.is_first_sp = false %} + {%- else %} + {%- set ns.system_prompt = ns.system_prompt + '\n\n' + message['content'] %} + {%- endif %} + {%- endif %} +{%- endfor %} + +{#- Adapted from https://github.com/sgl-project/sglang/blob/main/examples/chat_template/tool_chat_template_deepseekr1.jinja #} +{%- if tools is defined and tools is not none %} + {%- set tool_ns = namespace(text='You are a helpful assistant with tool calling capabilities. ' + 'When a tool call is needed, you MUST use the following format to issue the call:\n' + '<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>FUNCTION_NAME\n' + '```json\n{"param1": "value1", "param2": "value2"}\n```<|tool▁call▁end|><|tool▁calls▁end|>\n\n' + 'Make sure the JSON is valid.' + '## Tools\n\n### Function\n\nYou have the following functions available:\n\n') %} + {%- for tool in tools %} + {%- set tool_ns.text = tool_ns.text + '\n```json\n' + (tool | tojson) + '\n```\n' %} + {%- endfor %} + {%- if ns.system_prompt|length != 0 %} + {%- set ns.system_prompt = ns.system_prompt + '\n\n' + tool_ns.text %} + {%- else %} + {%- set ns.system_prompt = tool_ns.text %} + {%- endif %} +{%- endif %} +{{- bos_token }} +{{- '/no_think' + ns.system_prompt }} +{%- set last_index = (messages|length - 1) %} +{%- for message in messages %} + {%- set content = message['content'] %} + {%- if message['role'] == 'user' %} + {%- set ns.is_tool = false -%} + {%- set ns.is_first = false -%} + {%- set ns.is_last_user = true -%} + {%- if loop.index0 == last_index %} + {{- '<|User|>' + content }} + {%- else %} + {{- '<|User|>' + content + '<|Assistant|>'}} + {%- endif %} + {%- endif %} + {%- if message['role'] == 'assistant' %} + {%- if '' in content %} + {%- set content = (content.split('')|last) %} + {%- endif %} + {%- endif %} + {%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %} + {%- set ns.is_last_user = false -%} + {%- if ns.is_tool %} + {{- '<|tool▁outputs▁end|>'}} + {%- endif %} + {%- set ns.is_first = false %} + {%- set ns.is_tool = false -%} + {%- set ns.is_output_first = true %} + {%- for tool in message['tool_calls'] %} + {%- set arguments = tool['function']['arguments'] %} + {%- if arguments is not string %} + {%- set arguments = arguments|tojson %} + {%- endif %} + {%- if not ns.is_first %} + {%- if content is none %} + {{- '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + arguments + '\n' + '```' + '<|tool▁call▁end|>'}} + } + {%- else %} + {{- content + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + arguments + '\n' + '```' + '<|tool▁call▁end|>'}} + {%- endif %} + {%- set ns.is_first = true -%} + {%- else %} + {{- '\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + arguments + '\n' + '```' + '<|tool▁call▁end|>'}} + {%- endif %} + {%- endfor %} + {{- '<|tool▁calls▁end|><|end▁of▁sentence|>'}} + {%- endif %} + {%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none) %} + {%- set ns.is_last_user = false -%} + {%- if ns.is_tool %} + {{- '<|tool▁outputs▁end|>' + content + '<|end▁of▁sentence|>'}} + {%- set ns.is_tool = false -%} + {%- else %} + {{- content + '<|end▁of▁sentence|>'}} + {%- endif %} + {%- endif %} + {%- if message['role'] == 'tool' %} + {%- set ns.is_last_user = false -%} + {%- set ns.is_tool = true -%} + {%- if ns.is_output_first %} + {{- '<|tool▁outputs▁begin|><|tool▁output▁begin|>' + content + '<|tool▁output▁end|>'}} + {%- set ns.is_output_first = false %} + {%- else %} + {{- '\n<|tool▁output▁begin|>' + content + '<|tool▁output▁end|>'}} + {%- endif %} + {%- endif %} +{%- endfor -%} +{%- if ns.is_tool %} + {{- '<|tool▁outputs▁end|>'}} +{%- endif %} +{#- if add_generation_prompt and not ns.is_last_user and not ns.is_tool #} +{%- if add_generation_prompt and not ns.is_tool %} + {{- '<|Assistant|>'}} +{%- endif %} diff --git a/apps/llama/kustomization.yaml b/apps/llama/kustomization.yaml index aeebaaf..a44da8c 100644 --- a/apps/llama/kustomization.yaml +++ b/apps/llama/kustomization.yaml @@ -7,4 +7,9 @@ resources: - ingress.yaml - pvc.yaml - deployment.yaml - - configmap.yaml +configMapGenerator: + - name: llama-swap + namespace: llama + files: + - config.yaml=configs/config.yaml + - qwen_nothink_chat_template.jinja=configs/qwen_nothink_chat_template.jinja