aimeri commited on
Commit
b95d16f
·
verified ·
1 Parent(s): 720de47

Update tokenizer_config.json

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +46 -220
tokenizer_config.json CHANGED
@@ -7,7 +7,7 @@
7
  "eoa_token": "<audio|>",
8
  "eoc_token": "<channel|>",
9
  "eoi_token": "<image|>",
10
- "eos_token": "<turn|>",
11
  "eot_token": "<turn|>",
12
  "escape_token": "<|\"|>",
13
  "etc_token": "<tool_call|>",
@@ -17,32 +17,52 @@
17
  "<|video|>"
18
  ],
19
  "image_token": "<|image|>",
20
- "is_local": false,
21
  "mask_token": "<mask>",
22
- "model_max_length": 262144,
23
- "model_specific_special_tokens": {
24
- "audio_token": "<|audio|>",
25
- "boa_token": "<|audio>",
26
- "boi_token": "<|image>",
27
- "eoa_token": "<audio|>",
28
- "eoc_token": "<channel|>",
29
- "eoi_token": "<image|>",
30
- "eot_token": "<turn|>",
31
- "escape_token": "<|\"|>",
32
- "etc_token": "<tool_call|>",
33
- "etd_token": "<tool|>",
34
- "etr_token": "<tool_response|>",
35
- "image_token": "<|image|>",
36
- "soc_token": "<|channel>",
37
- "sot_token": "<|turn>",
38
- "stc_token": "<|tool_call>",
39
- "std_token": "<|tool>",
40
- "str_token": "<|tool_response>",
41
- "think_token": "<|think|>"
42
- },
43
  "pad_token": "<pad>",
44
- "padding_side": "right",
45
  "processor_class": "Gemma4Processor",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  "soc_token": "<|channel>",
47
  "sot_token": "<|turn>",
48
  "stc_token": "<|tool_call>",
@@ -50,199 +70,5 @@
50
  "str_token": "<|tool_response>",
51
  "think_token": "<|think|>",
52
  "tokenizer_class": "GemmaTokenizer",
53
- "unk_token": "<unk>",
54
- "added_tokens_decoder": {
55
- "0": {
56
- "content": "<pad>",
57
- "single_word": false,
58
- "lstrip": false,
59
- "rstrip": false,
60
- "normalized": false,
61
- "special": true
62
- },
63
- "1": {
64
- "content": "<eos>",
65
- "single_word": false,
66
- "lstrip": false,
67
- "rstrip": false,
68
- "normalized": false,
69
- "special": true
70
- },
71
- "2": {
72
- "content": "<bos>",
73
- "single_word": false,
74
- "lstrip": false,
75
- "rstrip": false,
76
- "normalized": false,
77
- "special": true
78
- },
79
- "3": {
80
- "content": "<unk>",
81
- "single_word": false,
82
- "lstrip": false,
83
- "rstrip": false,
84
- "normalized": false,
85
- "special": true
86
- },
87
- "4": {
88
- "content": "<mask>",
89
- "single_word": false,
90
- "lstrip": false,
91
- "rstrip": false,
92
- "normalized": false,
93
- "special": true
94
- },
95
- "46": {
96
- "content": "<|tool>",
97
- "single_word": false,
98
- "lstrip": false,
99
- "rstrip": false,
100
- "normalized": false,
101
- "special": true
102
- },
103
- "47": {
104
- "content": "<tool|>",
105
- "single_word": false,
106
- "lstrip": false,
107
- "rstrip": false,
108
- "normalized": false,
109
- "special": true
110
- },
111
- "48": {
112
- "content": "<|tool_call>",
113
- "single_word": false,
114
- "lstrip": false,
115
- "rstrip": false,
116
- "normalized": false,
117
- "special": true
118
- },
119
- "49": {
120
- "content": "<tool_call|>",
121
- "single_word": false,
122
- "lstrip": false,
123
- "rstrip": false,
124
- "normalized": false,
125
- "special": true
126
- },
127
- "50": {
128
- "content": "<|tool_response>",
129
- "single_word": false,
130
- "lstrip": false,
131
- "rstrip": false,
132
- "normalized": false,
133
- "special": true
134
- },
135
- "51": {
136
- "content": "<tool_response|>",
137
- "single_word": false,
138
- "lstrip": false,
139
- "rstrip": false,
140
- "normalized": false,
141
- "special": true
142
- },
143
- "52": {
144
- "content": "<|\"|>",
145
- "single_word": false,
146
- "lstrip": false,
147
- "rstrip": false,
148
- "normalized": false,
149
- "special": true
150
- },
151
- "98": {
152
- "content": "<|think|>",
153
- "single_word": false,
154
- "lstrip": false,
155
- "rstrip": false,
156
- "normalized": false,
157
- "special": true
158
- },
159
- "100": {
160
- "content": "<|channel>",
161
- "single_word": false,
162
- "lstrip": false,
163
- "rstrip": false,
164
- "normalized": false,
165
- "special": true
166
- },
167
- "101": {
168
- "content": "<channel|>",
169
- "single_word": false,
170
- "lstrip": false,
171
- "rstrip": false,
172
- "normalized": false,
173
- "special": true
174
- },
175
- "105": {
176
- "content": "<|turn>",
177
- "single_word": false,
178
- "lstrip": false,
179
- "rstrip": false,
180
- "normalized": false,
181
- "special": true
182
- },
183
- "106": {
184
- "content": "<turn|>",
185
- "single_word": false,
186
- "lstrip": false,
187
- "rstrip": false,
188
- "normalized": false,
189
- "special": true
190
- },
191
- "255999": {
192
- "content": "<|image>",
193
- "single_word": false,
194
- "lstrip": false,
195
- "rstrip": false,
196
- "normalized": false,
197
- "special": true
198
- },
199
- "256000": {
200
- "content": "<|audio>",
201
- "single_word": false,
202
- "lstrip": false,
203
- "rstrip": false,
204
- "normalized": false,
205
- "special": true
206
- },
207
- "258880": {
208
- "content": "<|image|>",
209
- "single_word": false,
210
- "lstrip": false,
211
- "rstrip": false,
212
- "normalized": false,
213
- "special": true
214
- },
215
- "258881": {
216
- "content": "<|audio|>",
217
- "single_word": false,
218
- "lstrip": false,
219
- "rstrip": false,
220
- "normalized": false,
221
- "special": true
222
- },
223
- "258882": {
224
- "content": "<image|>",
225
- "single_word": false,
226
- "lstrip": false,
227
- "rstrip": false,
228
- "normalized": false,
229
- "special": true
230
- },
231
- "258883": {
232
- "content": "<audio|>",
233
- "single_word": false,
234
- "lstrip": false,
235
- "rstrip": false,
236
- "normalized": false,
237
- "special": true
238
- },
239
- "258884": {
240
- "content": "<|video|>",
241
- "single_word": false,
242
- "lstrip": false,
243
- "rstrip": false,
244
- "normalized": false,
245
- "special": true
246
- }
247
- }
248
- }
 
7
  "eoa_token": "<audio|>",
8
  "eoc_token": "<channel|>",
9
  "eoi_token": "<image|>",
10
+ "eos_token": "<eos>",
11
  "eot_token": "<turn|>",
12
  "escape_token": "<|\"|>",
13
  "etc_token": "<tool_call|>",
 
17
  "<|video|>"
18
  ],
19
  "image_token": "<|image|>",
 
20
  "mask_token": "<mask>",
21
+ "model_max_length": 1000000000000000019884624838656,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  "pad_token": "<pad>",
23
+ "padding_side": "left",
24
  "processor_class": "Gemma4Processor",
25
+ "response_schema": {
26
+ "type": "object",
27
+ "properties": {
28
+ "role": {
29
+ "const": "assistant"
30
+ },
31
+ "thinking": {
32
+ "type": "string"
33
+ },
34
+ "content": {
35
+ "type": "string"
36
+ },
37
+ "tool_calls": {
38
+ "x-regex-iterator": "<\\|tool_call>(.*?)<tool_call\\|>",
39
+ "type": "array",
40
+ "items": {
41
+ "type": "object",
42
+ "properties": {
43
+ "type": {
44
+ "const": "function"
45
+ },
46
+ "function": {
47
+ "type": "object",
48
+ "x-regex": "call\\:(?P<name>\\w+)(?P<arguments>\\{.*\\})",
49
+ "properties": {
50
+ "name": {
51
+ "type": "string"
52
+ },
53
+ "arguments": {
54
+ "type": "object",
55
+ "x-parser": "gemma4-tool-call",
56
+ "additionalProperties": {}
57
+ }
58
+ }
59
+ }
60
+ }
61
+ }
62
+ }
63
+ },
64
+ "x-regex": "(\\<\\|channel\\>thought\\n(?P<thinking>.*?)\\<channel\\|\\>)?(?P<tool_calls>\\<\\|tool_call\\>.*\\<tool_call\\|\\>)?(?P<content>(?:(?!\\<turn\\|\\>)(?!\\<\\|tool_response\\>).)+)?(?:\\<turn\\|\\>|\\<\\|tool_response\\>)?"
65
+ },
66
  "soc_token": "<|channel>",
67
  "sot_token": "<|turn>",
68
  "stc_token": "<|tool_call>",
 
70
  "str_token": "<|tool_response>",
71
  "think_token": "<|think|>",
72
  "tokenizer_class": "GemmaTokenizer",
73
+ "unk_token": "<unk>"
74
+ }