Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
b41342be01
|
|||
|
25a7b6c242
|
|||
|
d3434a4102
|
@@ -75,7 +75,6 @@
|
|||||||
option device 'eth0.1'
|
option device 'eth0.1'
|
||||||
option proto 'static'
|
option proto 'static'
|
||||||
option ipaddr '{{ openwrt_mgmt_ip }}/{{ openwrt_mgmt_prefix }}'
|
option ipaddr '{{ openwrt_mgmt_ip }}/{{ openwrt_mgmt_prefix }}'
|
||||||
option dns '{{ openwrt_dns_servers | join(" ") }}'
|
|
||||||
|
|
||||||
# Policy routing for mgmt interface.
|
# Policy routing for mgmt interface.
|
||||||
#
|
#
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ hooks:
|
|||||||
matrix:
|
matrix:
|
||||||
vars:
|
vars:
|
||||||
q8: "Qwen3.5-0.8B-GGUF-nothink:Q4_K_XL"
|
q8: "Qwen3.5-0.8B-GGUF-nothink:Q4_K_XL"
|
||||||
flux: "flux1-dev:Q4_K_S"
|
flux: "flux2-klein-4b:Q4_K_M"
|
||||||
coder: "Qwen3-Coder-Next-GGUF:Q4_K_M"
|
coder: "Qwen3-Coder-Next-GGUF:Q4_K_M"
|
||||||
q35t: "Qwen3.5-35B-A3B-GGUF:Q4_K_M"
|
q35t: "Qwen3.5-35B-A3B-GGUF:Q4_K_M"
|
||||||
q35nt: "Qwen3.5-35B-A3B-GGUF-nothink:Q4_K_M"
|
q35nt: "Qwen3.5-35B-A3B-GGUF-nothink:Q4_K_M"
|
||||||
@@ -234,19 +234,20 @@ models:
|
|||||||
|
|
||||||
# Image generation via stable-diffusion.cpp (sd-server)
|
# Image generation via stable-diffusion.cpp (sd-server)
|
||||||
# Models must be pre-downloaded to /root/.cache/sd/
|
# Models must be pre-downloaded to /root/.cache/sd/
|
||||||
# FLUX.1-dev: state-of-the-art open-weight text-to-image model by Black Forest Labs
|
# FLUX.2-klein-4B: fast unified text-to-image and image editing model (Apache 2.0)
|
||||||
# Download: huggingface-cli download lllyasviel/FLUX.1-dev-gguf flux1-dev-Q4_K_S.gguf --local-dir /root/.cache/sd
|
# Download: uv run --with huggingface_hub hf download unsloth/FLUX.2-klein-4B-GGUF flux-2-klein-4b-Q4_K_M.gguf --local-dir /root/.cache/sd
|
||||||
# Download VAE: huggingface-cli download black-forest-labs/FLUX.1-dev ae.safetensors --local-dir /root/.cache/sd
|
# Download VAE: uv run --with huggingface_hub hf download Comfy-Org/flux2-klein-4B split_files/vae/flux2-vae.safetensors --local-dir /root/.cache/sd/flux2-klein && cp /root/.cache/sd/flux2-klein/split_files/vae/flux2-vae.safetensors /root/.cache/sd/
|
||||||
# Download text encoders: huggingface-cli download comfyanonymous/flux_text_encoders clip_l.safetensors t5xxl_fp16.safetensors --local-dir /root/.cache/sd
|
# Download LLM: uv run --with huggingface_hub hf download unsloth/Qwen3-4B-GGUF Qwen3-4B-Q4_K_M.gguf --local-dir /root/.cache/sd
|
||||||
"flux1-dev:Q4_K_S":
|
"flux2-klein-4b:Q4_K_M":
|
||||||
checkEndpoint: "/"
|
checkEndpoint: "/"
|
||||||
cmd: |
|
cmd: |
|
||||||
sd-server
|
sd-server
|
||||||
--listen-port ${PORT}
|
--listen-port ${PORT}
|
||||||
--diffusion-model /root/.cache/sd/flux1-dev-Q4_K_S.gguf
|
--diffusion-model /root/.cache/sd/flux-2-klein-4b-Q4_K_M.gguf
|
||||||
--vae /root/.cache/sd/ae.safetensors
|
--vae /root/.cache/sd/flux2-vae.safetensors
|
||||||
--clip_l /root/.cache/sd/clip_l.safetensors
|
--llm /root/.cache/sd/Qwen3-4B-Q4_K_M.gguf
|
||||||
--t5xxl /root/.cache/sd/t5xxl_fp16.safetensors
|
|
||||||
--cfg-scale 1.0
|
--cfg-scale 1.0
|
||||||
--sampling-method euler
|
--sampling-method euler
|
||||||
--steps 20
|
--steps 4
|
||||||
|
--diffusion-fa
|
||||||
|
--offload-to-cpu
|
||||||
|
|||||||
@@ -1,101 +0,0 @@
|
|||||||
{%- if not add_generation_prompt is defined %}
|
|
||||||
{%- set add_generation_prompt = false %}
|
|
||||||
{%- endif %}
|
|
||||||
{%- set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true, is_last_user=false) %}
|
|
||||||
{%- for message in messages %}
|
|
||||||
{%- if message['role'] == 'system' %}
|
|
||||||
{%- if ns.is_first_sp %}
|
|
||||||
{%- set ns.system_prompt = ns.system_prompt + message['content'] %}
|
|
||||||
{%- set ns.is_first_sp = false %}
|
|
||||||
{%- else %}
|
|
||||||
{%- set ns.system_prompt = ns.system_prompt + '\n\n' + message['content'] %}
|
|
||||||
{%- endif %}
|
|
||||||
{%- endif %}
|
|
||||||
{%- endfor %}
|
|
||||||
|
|
||||||
{#- Adapted from https://github.com/sgl-project/sglang/blob/main/examples/chat_template/tool_chat_template_deepseekr1.jinja #}
|
|
||||||
{%- if tools is defined and tools is not none %}
|
|
||||||
{%- set tool_ns = namespace(text='You are a helpful assistant with tool calling capabilities. ' + 'When a tool call is needed, you MUST use the following format to issue the call:\n' + '<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>FUNCTION_NAME\n' + '```json\n{"param1": "value1", "param2": "value2"}\n```<|tool▁call▁end|><|tool▁calls▁end|>\n\n' + 'Make sure the JSON is valid.' + '## Tools\n\n### Function\n\nYou have the following functions available:\n\n') %}
|
|
||||||
{%- for tool in tools %}
|
|
||||||
{%- set tool_ns.text = tool_ns.text + '\n```json\n' + (tool | tojson) + '\n```\n' %}
|
|
||||||
{%- endfor %}
|
|
||||||
{%- if ns.system_prompt|length != 0 %}
|
|
||||||
{%- set ns.system_prompt = ns.system_prompt + '\n\n' + tool_ns.text %}
|
|
||||||
{%- else %}
|
|
||||||
{%- set ns.system_prompt = tool_ns.text %}
|
|
||||||
{%- endif %}
|
|
||||||
{%- endif %}
|
|
||||||
{{- bos_token }}
|
|
||||||
{{- '/no_think' + ns.system_prompt }}
|
|
||||||
{%- set last_index = (messages|length - 1) %}
|
|
||||||
{%- for message in messages %}
|
|
||||||
{%- set content = message['content'] %}
|
|
||||||
{%- if message['role'] == 'user' %}
|
|
||||||
{%- set ns.is_tool = false -%}
|
|
||||||
{%- set ns.is_first = false -%}
|
|
||||||
{%- set ns.is_last_user = true -%}
|
|
||||||
{%- if loop.index0 == last_index %}
|
|
||||||
{{- '<|User|>' + content }}
|
|
||||||
{%- else %}
|
|
||||||
{{- '<|User|>' + content + '<|Assistant|>'}}
|
|
||||||
{%- endif %}
|
|
||||||
{%- endif %}
|
|
||||||
{%- if message['role'] == 'assistant' %}
|
|
||||||
{%- if '</think>' in content %}
|
|
||||||
{%- set content = (content.split('</think>')|last) %}
|
|
||||||
{%- endif %}
|
|
||||||
{%- endif %}
|
|
||||||
{%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}
|
|
||||||
{%- set ns.is_last_user = false -%}
|
|
||||||
{%- if ns.is_tool %}
|
|
||||||
{{- '<|tool▁outputs▁end|>'}}
|
|
||||||
{%- endif %}
|
|
||||||
{%- set ns.is_first = false %}
|
|
||||||
{%- set ns.is_tool = false -%}
|
|
||||||
{%- set ns.is_output_first = true %}
|
|
||||||
{%- for tool in message['tool_calls'] %}
|
|
||||||
{%- set arguments = tool['function']['arguments'] %}
|
|
||||||
{%- if arguments is not string %}
|
|
||||||
{%- set arguments = arguments|tojson %}
|
|
||||||
{%- endif %}
|
|
||||||
{%- if not ns.is_first %}
|
|
||||||
{%- if content is none %}
|
|
||||||
{{- '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + arguments + '\n' + '```' + '<|tool▁call▁end|>'}}
|
|
||||||
}
|
|
||||||
{%- else %}
|
|
||||||
{{- content + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + arguments + '\n' + '```' + '<|tool▁call▁end|>'}}
|
|
||||||
{%- endif %}
|
|
||||||
{%- set ns.is_first = true -%}
|
|
||||||
{%- else %}
|
|
||||||
{{- '\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + arguments + '\n' + '```' + '<|tool▁call▁end|>'}}
|
|
||||||
{%- endif %}
|
|
||||||
{%- endfor %}
|
|
||||||
{{- '<|tool▁calls▁end|><|end▁of▁sentence|>'}}
|
|
||||||
{%- endif %}
|
|
||||||
{%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none) %}
|
|
||||||
{%- set ns.is_last_user = false -%}
|
|
||||||
{%- if ns.is_tool %}
|
|
||||||
{{- '<|tool▁outputs▁end|>' + content + '<|end▁of▁sentence|>'}}
|
|
||||||
{%- set ns.is_tool = false -%}
|
|
||||||
{%- else %}
|
|
||||||
{{- content + '<|end▁of▁sentence|>'}}
|
|
||||||
{%- endif %}
|
|
||||||
{%- endif %}
|
|
||||||
{%- if message['role'] == 'tool' %}
|
|
||||||
{%- set ns.is_last_user = false -%}
|
|
||||||
{%- set ns.is_tool = true -%}
|
|
||||||
{%- if ns.is_output_first %}
|
|
||||||
{{- '<|tool▁outputs▁begin|><|tool▁output▁begin|>' + content + '<|tool▁output▁end|>'}}
|
|
||||||
{%- set ns.is_output_first = false %}
|
|
||||||
{%- else %}
|
|
||||||
{{- '\n<|tool▁output▁begin|>' + content + '<|tool▁output▁end|>'}}
|
|
||||||
{%- endif %}
|
|
||||||
{%- endif %}
|
|
||||||
{%- endfor -%}
|
|
||||||
{%- if ns.is_tool %}
|
|
||||||
{{- '<|tool▁outputs▁end|>'}}
|
|
||||||
{%- endif %}
|
|
||||||
{#- if add_generation_prompt and not ns.is_last_user and not ns.is_tool #}
|
|
||||||
{%- if add_generation_prompt and not ns.is_tool %}
|
|
||||||
{{- '<|Assistant|>'}}
|
|
||||||
{%- endif %}
|
|
||||||
@@ -12,4 +12,3 @@ configMapGenerator:
|
|||||||
namespace: llama
|
namespace: llama
|
||||||
files:
|
files:
|
||||||
- config.yaml=configs/config.yaml
|
- config.yaml=configs/config.yaml
|
||||||
- qwen_nothink_chat_template.jinja=configs/qwen_nothink_chat_template.jinja
|
|
||||||
|
|||||||
@@ -84,7 +84,7 @@ spec:
|
|||||||
- name: IMAGES_OPENAI_API_KEY
|
- name: IMAGES_OPENAI_API_KEY
|
||||||
value: "ignored"
|
value: "ignored"
|
||||||
- name: IMAGE_GENERATION_MODEL
|
- name: IMAGE_GENERATION_MODEL
|
||||||
value: "flux1-dev:Q4_K_S"
|
value: "flux2-klein-4b:Q4_K_M"
|
||||||
- name: IMAGE_SIZE
|
- name: IMAGE_SIZE
|
||||||
value: "512x512"
|
value: "512x512"
|
||||||
# Image editing via llama-swap sd-server (/v1/images/edits)
|
# Image editing via llama-swap sd-server (/v1/images/edits)
|
||||||
@@ -97,6 +97,6 @@ spec:
|
|||||||
- name: IMAGES_EDIT_OPENAI_API_KEY
|
- name: IMAGES_EDIT_OPENAI_API_KEY
|
||||||
value: "ignored"
|
value: "ignored"
|
||||||
- name: IMAGE_EDIT_MODEL
|
- name: IMAGE_EDIT_MODEL
|
||||||
value: "flux1-dev:Q4_K_S"
|
value: "flux2-klein-4b:Q4_K_M"
|
||||||
- name: IMAGE_EDIT_SIZE
|
- name: IMAGE_EDIT_SIZE
|
||||||
value: "512x512"
|
value: "512x512"
|
||||||
|
|||||||
Reference in New Issue
Block a user