add tokenizer
b104e87 | { |
| "[PAD]": 51, |
| "[UNK]": 50, |
| "|": 44, |
| "ء": 3, |
| "آ": 5, |
| "أ": 32, |
| "ؤ": 35, |
| "ئ": 38, |
| "ا": 46, |
| "ب": 25, |
| "ت": 19, |
| "ث": 2, |
| "ج": 49, |
| "ح": 39, |
| "خ": 31, |
| "د": 27, |
| "ذ": 10, |
| "ر": 0, |
| "ز": 33, |
| "س": 28, |
| "ش": 42, |
| "ص": 43, |
| "ض": 26, |
| "ط": 45, |
| "ظ": 15, |
| "ع": 4, |
| "غ": 11, |
| "ف": 8, |
| "ق": 7, |
| "ل": 41, |
| "م": 37, |
| "ن": 36, |
| "ه": 21, |
| "و": 48, |
| "ى": 17, |
| "ي": 22, |
| "ٹ": 9, |
| "پ": 30, |
| "چ": 40, |
| "ڈ": 16, |
| "ڑ": 14, |
| "ژ": 47, |
| "ک": 1, |
| "گ": 13, |
| "ں": 23, |
| "ھ": 24, |
| "ہ": 34, |
| "ۂ": 29, |
| "ۃ": 20, |
| "ی": 6, |
| "ے": 18, |
| "ۓ": 12 |
| } |
|
|