{# Accepts: - messages as a single string - messages as 1-2 chat dicts: user[, assistant] - user.content may be a string OR a list with exactly one {type:text} and one {type:image} #} {% if messages is string -%} {% set text = messages | trim -%} {% set has_assistant = false -%} {%- else -%} {% if messages | length < 1 or messages | length > 2 -%} {{ raise_exception("Provide a single string or 1-2 messages (user[, assistant]).") }} {%- endif -%} {% if (messages[0].get('role') | default('')) != 'user' -%} {{ raise_exception("First message must have role 'user'.") }} {%- endif -%} {% if messages | length == 2 and (messages[1].get('role') | default('')) != 'assistant' -%} {{ raise_exception("Second message, if present, must have role 'assistant'.") }} {%- endif -%} {# Extract user text, supporting multimodal content #} {% set ucontent = messages[0]['content'] | default('', true) -%} {% if ucontent is string -%} {% set text = ucontent | trim -%} {%- else -%} {% if ucontent | length != 2 -%} {{ raise_exception("User content list must have exactly two parts: one text and one image.") }} {%- endif -%} {% set text_parts = ucontent | selectattr('type','equalto','text') | list -%} {% set image_parts = ucontent | selectattr('type','equalto','image') | list -%} {% if (text_parts | length) != 1 or (image_parts | length) != 1 -%} {{ raise_exception("User content must include exactly one text and one image part.") }} {%- endif -%} {% set text = (text_parts[0].get('text') | default('')) | trim -%} {%- endif -%} {# Extract assistant text if present (string or list of parts) #} {% set has_assistant = (messages | length == 2) -%} {% if has_assistant -%} {% set acontent = messages[1]['content'] | default('', true) -%} {% if acontent is string -%} {% set assistant_text = acontent -%} {%- else -%} {% set atexts = acontent | selectattr('type','equalto','text') | map(attribute='text') | list -%} {% set assistant_text = (atexts | join('')) -%} {%- endif -%} {%- endif -%} {%- endif -%} {% set lower = text | lower -%} {# Routing with zero-whitespace outputs #} {% if text == '' -%} <|md_reserved_0|>describe<|md_reserved_1|>normal<|md_reserved_2|> {%- elif lower.startswith('caption:') -%} {% set length = (text[8:] | trim | lower) -%} {% if length not in ['short','normal','long'] -%} {{ raise_exception("caption length must be one of: short, normal, long.") }} {%- endif -%} <|md_reserved_0|>describe<|md_reserved_1|>{{ length }}<|md_reserved_2|> {%- elif lower.startswith('reason:') -%} {% set q = text[7:] | trim -%} <|md_reserved_0|>query<|md_reserved_1|>{{ q }}<|md_reserved_2|><|md_reserved_3|> {%- elif lower.startswith('query:') -%} {% set q = text[6:] | trim -%} <|md_reserved_0|>query<|md_reserved_1|>{{ q }}<|md_reserved_2|> {%- elif lower.startswith('detect:') -%} {% set q = text[7:] | trim -%} <|md_reserved_0|>det<|md_reserved_1|> {{ q }}<|md_reserved_2|> {%- elif lower.startswith('point:') -%} {% set q = text[6:] | trim -%} <|md_reserved_0|>point<|md_reserved_1|> {{ q }}<|md_reserved_2|> {%- else -%} {% set q = text -%} <|md_reserved_0|>query<|md_reserved_1|>{{ q }}<|md_reserved_2|> {%- endif -%} {%- generation -%} {%- if has_assistant -%}{{ assistant_text }}{{ eos_token }}{%- endif -%} {%- endgeneration -%}