-
async function printTokens() { const tokens_per_line=20; const output = await navigator.clipboard.readText(); // clip uses </w> for a word break, T5 uses ▁, GLM uses Ġ const tokens = JSON.parse(output)[0].tokens.map(s => s.replace(/(?:<\/w>|Ġ)/g, ' ')); let pos=0; for (let index = 0; index < tokens.length; index += tokens_per_line) { console.log(`${index} ${tokens.slice(index, index + tokens_per_line).join("‧").replace(/(?:\s‧|‧\s|‧▁|^▁)/g, ' ')}`); } } -
Or do all the tokenizing in the browser: https://sd-tokenizer.rocker.boo/
Please register or sign in to comment