nathanrchn commited on
Commit
f89f5ef
·
verified ·
1 Parent(s): 5df61a7

add support for images and fix an issue with the end of assistant tokens that was not added if the last message was an assistant message.

Browse files
Files changed (3) hide show
  1. chat_template.jinja +11 -1
  2. tokenizer.json +2 -2
  3. tokenizer_config.json +1 -1
chat_template.jinja CHANGED
@@ -150,8 +150,9 @@
150
  {%- set outer_token = '<|inner_suffix|>' -%}
151
  {%- set tool_calls_token = '<|tools_prefix|>' -%}
152
  {%- set end_tool_calls_token = '<|tools_suffix|>' -%}
 
153
 
154
- {%- set ns = namespace(in_assistant=false, in_tool=false, in_inner=false, assistant_format=none) -%}
155
 
156
  {%- if messages and messages[0].role == 'system' -%}
157
  {%- if "content" in messages[0] -%}
@@ -204,6 +205,8 @@
204
  {%- for part in parts -%}
205
  {%- if part.type == "text" -%}
206
  {{ part.text }}
 
 
207
  {%- else -%}
208
  {{- raise_exception("Invalid user part: " + part.type) -}}
209
  {%- endif -%}
@@ -257,6 +260,7 @@
257
  {%- endif -%}
258
  {%- endfor -%}
259
  {{ ']' + end_tool_calls_token }}
 
260
  {%- elif block.type == 'tool_outputs' -%}
261
  {%- if ns.in_tool -%}
262
  {{- raise_exception("Cannot have both tool outputs as separate messages and tool outputs as blocks") -}}
@@ -269,6 +273,7 @@
269
  {%- endif -%}
270
  {%- endfor -%}
271
  {{- ']' }}
 
272
  {%- elif block.type == 'response' -%}
273
  {%- if ns.in_tool -%}
274
  {{ ']' }}
@@ -303,6 +308,7 @@
303
  {%- endif -%}
304
  {%- endfor -%}
305
  {{ ']' + end_tool_calls_token }}
 
306
  {%- endif -%}
307
  {%- elif message.role == 'tool' -%}
308
  {%- if not ns.in_assistant -%}
@@ -315,6 +321,7 @@
315
  {{ ", "}}
316
  {%- endif -%}
317
  {{ message.content }}
 
318
  {%- else -%}
319
  {{- raise_exception("Invalid message role") -}}
320
  {%- endif -%}
@@ -322,6 +329,9 @@
322
  {%- if ns.in_tool -%}
323
  {{ ']' }}
324
  {%- endif -%}
 
 
 
325
  {%- if add_generation_prompt -%}
326
  {{ assistant_token }}
327
  {%- endif -%}
 
150
  {%- set outer_token = '<|inner_suffix|>' -%}
151
  {%- set tool_calls_token = '<|tools_prefix|>' -%}
152
  {%- set end_tool_calls_token = '<|tools_suffix|>' -%}
153
+ {%- set image_token = '<|image|>' -%}
154
 
155
+ {%- set ns = namespace(in_assistant=false, in_tool=false, in_inner=false, waiting_for_tool_outputs=false, assistant_format=none) -%}
156
 
157
  {%- if messages and messages[0].role == 'system' -%}
158
  {%- if "content" in messages[0] -%}
 
205
  {%- for part in parts -%}
206
  {%- if part.type == "text" -%}
207
  {{ part.text }}
208
+ {%- elif part.type == "image" -%}
209
+ {{ image_token }}
210
  {%- else -%}
211
  {{- raise_exception("Invalid user part: " + part.type) -}}
212
  {%- endif -%}
 
260
  {%- endif -%}
261
  {%- endfor -%}
262
  {{ ']' + end_tool_calls_token }}
263
+ {%- set ns.waiting_for_tool_outputs = true -%}
264
  {%- elif block.type == 'tool_outputs' -%}
265
  {%- if ns.in_tool -%}
266
  {{- raise_exception("Cannot have both tool outputs as separate messages and tool outputs as blocks") -}}
 
273
  {%- endif -%}
274
  {%- endfor -%}
275
  {{- ']' }}
276
+ {%- set ns.waiting_for_tool_outputs = false -%}
277
  {%- elif block.type == 'response' -%}
278
  {%- if ns.in_tool -%}
279
  {{ ']' }}
 
308
  {%- endif -%}
309
  {%- endfor -%}
310
  {{ ']' + end_tool_calls_token }}
311
+ {%- set ns.waiting_for_tool_outputs = true -%}
312
  {%- endif -%}
313
  {%- elif message.role == 'tool' -%}
314
  {%- if not ns.in_assistant -%}
 
321
  {{ ", "}}
322
  {%- endif -%}
323
  {{ message.content }}
324
+ {%- set ns.waiting_for_tool_outputs = false -%}
325
  {%- else -%}
326
  {{- raise_exception("Invalid message role") -}}
327
  {%- endif -%}
 
329
  {%- if ns.in_tool -%}
330
  {{ ']' }}
331
  {%- endif -%}
332
+ {%- if ns.in_assistant and not (continue_assistant_message is defined and continue_assistant_message) and not ns.waiting_for_tool_outputs -%}
333
+ {{ end_assistant_token }}
334
+ {%- endif -%}
335
  {%- if add_generation_prompt -%}
336
  {{ assistant_token }}
337
  {%- endif -%}
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb201fb226cde11f66c3cf51c5344fb37b1611f00c21e75c324546d854eff2e1
3
- size 17078480
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:010095abf1dc6f52d4867584e7b3f0d4eece854593ae310220ec7782dd1b0a66
3
+ size 17078474
tokenizer_config.json CHANGED
@@ -588,7 +588,7 @@
588
  "special": true
589
  },
590
  "73": {
591
- "content": "<SPECIAL_73>",
592
  "lstrip": false,
593
  "normalized": false,
594
  "rstrip": false,
 
588
  "special": true
589
  },
590
  "73": {
591
+ "content": "<|image|>",
592
  "lstrip": false,
593
  "normalized": false,
594
  "rstrip": false,