{# Accepts: 
   - messages as a single string
   - messages as 1-2 chat dicts: user[, assistant]
   - user.content may be a string OR a list with exactly one {type:text} and one {type:image}
#}
{% if messages is string -%}
  {% set text = messages | trim -%}
  {% set has_assistant = false -%}
{%- else -%}
  {% if messages | length < 1 or messages | length > 2 -%}
    {{ raise_exception("Provide a single string or 1-2 messages (user[, assistant]).") }}
  {%- endif -%}
  {% if (messages[0].get('role') | default('')) != 'user' -%}
    {{ raise_exception("First message must have role 'user'.") }}
  {%- endif -%}
  {% if messages | length == 2 and (messages[1].get('role') | default('')) != 'assistant' -%}
    {{ raise_exception("Second message, if present, must have role 'assistant'.") }}
  {%- endif -%}

  {# Extract user text, supporting multimodal content #}
  {% set ucontent = messages[0]['content'] | default('', true) -%}
  {% if ucontent is string -%}
    {% set text = ucontent | trim -%}
  {%- else -%}
    {% if ucontent | length != 2 -%}
      {{ raise_exception("User content list must have exactly two parts: one text and one image.") }}
    {%- endif -%}
    {% set text_parts = ucontent | selectattr('type','equalto','text') | list -%}
    {% set image_parts = ucontent | selectattr('type','equalto','image') | list -%}
    {% if (text_parts | length) != 1 or (image_parts | length) != 1 -%}
      {{ raise_exception("User content must include exactly one text and one image part.") }}
    {%- endif -%}
    {% set text = (text_parts[0].get('text') | default('')) | trim -%}
  {%- endif -%}

  {# Extract assistant text if present (string or list of parts) #}
  {% set has_assistant = (messages | length == 2) -%}
  {% if has_assistant -%}
    {% set acontent = messages[1]['content'] | default('', true) -%}
    {% if acontent is string -%}
      {% set assistant_text = acontent -%}
    {%- else -%}
      {% set atexts = acontent | selectattr('type','equalto','text') | map(attribute='text') | list -%}
      {% set assistant_text = (atexts | join('')) -%}
    {%- endif -%}
  {%- endif -%}
{%- endif -%}

{% set lower = text | lower -%}

{# Routing with zero-whitespace outputs #}
{% if text == '' -%}
<|md_reserved_0|>describe<|md_reserved_1|>normal<|md_reserved_2|>
{%- elif lower.startswith('caption:') -%}
  {% set length = (text[8:] | trim | lower) -%}
  {% if length not in ['short','normal','long'] -%}
    {{ raise_exception("caption length must be one of: short, normal, long.") }}
  {%- endif -%}
<|md_reserved_0|>describe<|md_reserved_1|>{{ length }}<|md_reserved_2|>
{%- elif lower.startswith('reason:') -%}
  {% set q = text[7:] | trim -%}
<|md_reserved_0|>query<|md_reserved_1|>{{ q }}<|md_reserved_2|><|md_reserved_3|>
{%- elif lower.startswith('query:') -%}
  {% set q = text[6:] | trim -%}
<|md_reserved_0|>query<|md_reserved_1|>{{ q }}<|md_reserved_2|>
{%- elif lower.startswith('detect:') -%}
  {% set q = text[7:] | trim -%}
<|md_reserved_0|>det<|md_reserved_1|> {{ q }}<|md_reserved_2|>
{%- elif lower.startswith('point:') -%}
  {% set q = text[6:] | trim -%}
<|md_reserved_0|>point<|md_reserved_1|> {{ q }}<|md_reserved_2|>
{%- else -%}
  {% set q = text -%}
<|md_reserved_0|>query<|md_reserved_1|>{{ q }}<|md_reserved_2|>
{%- endif -%}
{%- generation -%}
    {%- if has_assistant -%}{{ assistant_text }}{{ eos_token }}{%- endif -%}
{%- endgeneration -%}