This tutorial shows you how to remove diacritic characters from a String in Dart by replacing each diacritic character with ASCII character(s).
A diacritic is a mark added to a letter for differentiating the pronunciations of the letter. A letter can have some diacritics, with each diacritic having a different code unit. In some situations, you may need to get the ASCII version of the letter, which doesn't have the diacritic accent. For example, if you have a search feature, users usually don't type the diacritic version of the letter. Therefore, it's necessary to convert the diacritic characters into ASCII characters. Below is the example of how to do it in Dart which also works in Fluter or any other Dart framework.
Remove Diacritics From a String
At the time this post was written, Dart doesn't provide any built-in method to remove diacritic characters. However, the solution idea is quite simple. You need to define a Map
for mapping each diacritic character to ASCII character(s). Then, replace each character of the String if it's found in the Map
. In addition, we can also create an extension to the String
class by adding a getter for getting the value without diacritic characters
Below is a class that provides the Map
. The Map
's keys are the diacritic characters, while the values are the ASCII characters. To make it easier to know the list of diacritic characters for each ASCII character(s) (and to make the code more readable), I didn't define the value directly to the Map
. Instead, I created a _register
method that accepts the ASCII character(s) as the first argument and the list of diacritic characters as the second argument. The _register
method is responsible for registering each diacritic character to the Map
. You can see the mapping in the _init
method below, which makes calls to the _register
method.
As you can see from the code below, characters 'A' has some diacritic variants such as 'Á', 'Ă', 'Ắ', 'Ặ', 'Ằ' and so on. Each of the diacritic values will be mapped to 'A'. There are some diacritic characters that map to more than one letter. For example, 'Æ' will be mapped to 'AE'.
class Diacritics {
static final _map = <String, String>{};
Diacritics() {
if (_map.isEmpty) {
_init();
}
}
void _init() {
_register('A', ['A', 'Á', 'Ă', 'Ắ', 'Ặ', 'Ằ', 'Ẳ', 'Ẵ', 'Ǎ', 'Â', 'Ấ', 'Ậ', 'Ầ', 'Ẩ', 'Ẫ', 'Ä', 'Ạ', 'À', 'Ả', 'Ā', 'Ą', 'Å', 'Ǻ', 'Ã', 'Ⓐ', 'A', 'Ȧ', 'Ǡ', 'Ǟ', 'Ȁ', 'Ȃ', 'Ḁ', 'Ⱥ', 'Ɐ']);
_register('B', ['B', 'Ḅ', 'Ɓ', 'Ⓑ', 'B', 'Ḃ', 'Ḇ', 'Ƀ', 'Ƃ']);
_register('C', ['C', 'Ć', 'Č', 'Ç', 'Ĉ', 'Ċ', 'ʗ', 'Ⓒ', 'C', 'Ḉ', 'Ƈ', 'Ȼ', 'Ꜿ']);
_register('D', ['D', 'Ď', 'Ḓ', 'Ḍ', 'Ɗ', 'Ḏ', 'Đ', 'Ð', 'Ⓓ', 'D', 'Ḋ', 'Ḑ', 'Ƌ', 'Ɖ', 'Ꝺ']);
_register('E', ['E', 'É', 'Ĕ', 'Ě', 'Ê', 'Ế', 'Ệ', 'Ề', 'Ể', 'Ễ', 'Ë', 'Ė', 'Ẹ', 'È', 'Ẻ', 'Ē', 'Ę', 'Ẽ', 'Ɛ', 'Ə', 'Ⓔ', 'E', 'Ḕ', 'Ḗ', 'Ȅ', 'Ȇ', 'Ȩ', 'Ḝ', 'Ḙ', 'Ḛ', 'Ǝ']);
_register('F', ['F', 'Ƒ', 'Ⓕ', 'F', 'Ḟ', 'Ꝼ']);
_register('G', ['G", ''Ǵ', 'Ğ', 'Ǧ', 'Ģ', 'Ĝ', 'Ġ', 'Ḡ', 'ʛ', 'G', 'Ⓖ', 'G', 'Ǵ', 'Ǥ', 'Ɠ', 'Ꞡ', 'Ᵹ', 'Ꝿ']);
_register('H', ['H', 'Ḫ', 'Ĥ', 'Ḥ', 'Ħ', 'Ⓗ', 'H', 'Ḣ', 'Ḧ', 'Ȟ', 'Ḩ', 'Ⱨ', 'Ⱶ', 'Ɥ']);
_register('I', ['I', 'Í', 'Ĭ', 'Ǐ', 'Î', 'Ï', 'İ', 'Ị', 'Ì', 'Ỉ', 'Ī', 'Į', 'Ĩ', 'Ⓘ', 'I', 'Ḯ', 'Ȉ', 'Ȋ', 'Ḭ', 'Ɨ']);
_register('J', ['J', 'Ĵ', 'Ⓙ', 'J', 'Ɉ']);
_register('K', ['K', 'Ķ', 'Ḳ', 'Ƙ', 'Ḵ', 'Ⓚ', 'K', 'Ḱ', 'Ǩ', 'Ⱪ', 'Ꝁ', 'Ꝃ', 'Ꝅ', 'Ꞣ']);
_register('L', ['L', 'Ĺ', 'Ƚ', 'Ľ', 'Ļ', 'Ḽ', 'Ḷ', 'Ḹ', 'Ḻ', 'Ŀ', 'Ł', 'Ⓛ', 'L', 'Ɫ', 'Ⱡ', 'Ꝉ', 'Ꝇ', 'Ꞁ']);
_register('M', ['M', 'Ḿ', 'Ṁ', 'Ṃ', 'Ⓜ', 'M', 'Ɱ', 'Ɯ']);
_register('N', ['N', 'Ń', 'Ň', 'Ņ', 'Ṋ', 'Ṅ', 'Ṇ', 'Ǹ', 'Ɲ', 'Ṉ', 'Ñ', 'Ⓝ', 'N', 'Ƞ', 'Ꞑ', 'Ꞥ']);
_register('O', ['O', 'Ó', 'Ŏ', 'Ǒ', 'Ô', 'Ố', 'Ộ', 'Ồ', 'Ổ', 'Ỗ', 'Ö', 'Ọ', 'Ő', 'Ò', 'Ỏ', 'Ơ', 'Ớ', 'Ợ', 'Ờ', 'Ở', 'Ỡ', 'Ō', 'Ɵ', 'Ǫ', 'Ø', 'Ǿ', 'Õ', 'Ⓞ', 'O', 'Ṍ', 'Ȭ', 'Ṏ', 'Ṑ', 'Ṓ', 'Ȯ', 'Ȱ', 'Ȫ', 'Ȍ', 'Ȏ', 'Ǭ', ' - O', 'Ꝋ', 'Ꝍ', 'Ɔ']);
_register('P', ['P', 'Ⓟ', 'P', 'Ṕ', 'Ṗ', 'Ƥ', 'Ᵽ', 'Ꝑ', 'Ꝓ', 'Ꝕ']);
_register('Q', ['Q', 'Ⓠ', 'Q', 'Ꝗ', 'Ꝙ', 'Ɋ']);
_register('R', ['R', 'Ŕ', 'Ř', 'Ŗ', 'Ṙ', 'Ṛ', 'Ṝ', 'Ṟ', 'ʁ', 'Ⓡ', 'R', 'Ȑ', 'Ȓ', 'Ɍ', 'Ɽ', 'Ꝛ', 'Ꞧ', 'Ꞃ']);
_register('S', ['S', 'Ś', 'Š', 'Ş', 'Ŝ', 'Ș', 'Ṡ', 'Ṣ', 'ẞ', 'Ⓢ', 'S', 'Ṥ', 'Ṧ', 'Ṩ', 'Ȿ', 'Ꞩ', 'Ꞅ']);
_register('T', ['T', 'Ť', 'Ţ', 'Ṱ', 'Ț', 'Ṭ', 'Ṯ', 'Ŧ', 'Ⓣ', 'T', 'Ṫ', 'Ƭ', 'Ʈ', 'Ⱦ', 'Ꞇ']);
_register('U', ['U', 'Ú', 'Ŭ', 'Ǔ', 'Û', 'Ü', 'Ǘ', 'Ǚ', 'Ǜ', 'Ǖ', 'Ụ', 'Ű', 'Ù', 'Ủ', 'Ư', 'Ứ', 'Ự', 'Ừ', 'Ử', 'Ữ', 'Ū', 'Ų', 'Ů', 'Ũ', 'Ⓤ', 'U', 'Ṹ', 'Ṻ', 'Ȕ', 'Ȗ', 'Ṳ', 'Ṷ', 'Ṵ', 'Ʉ']);
_register('V', ['V', 'Ⓥ', 'V', 'Ṽ', 'Ṿ', 'Ʋ', 'Ꝟ', 'Ʌ']);
_register('W', ['W', 'Ẃ', 'Ŵ', 'Ẅ', 'Ẁ', 'Ⓦ', 'W', 'Ẇ', 'Ẉ', 'Ⱳ']);
_register('X', ['X', 'Ⓧ', 'X', 'Ẋ', 'Ẍ']);
_register('Y', ['Y', 'Ý', 'Ŷ', 'Ÿ', 'Ẏ', 'Ỵ', 'Ỳ', 'Ƴ', 'Ỷ', 'Ȳ', 'Ỹ', 'Ⓨ', 'Y', 'Ɏ', 'Ỿ']);
_register('Z', ['Z', 'Ź', 'Ž', 'Ż', 'Ẓ', 'Ẕ', 'Ƶ', 'Ⓩ', 'Z', 'Ẑ', 'Ȥ', 'Ɀ', 'Ⱬ', 'Ꝣ', 'ʒ', 'ǯ', 'ʓ']);
_register('a', ['a', 'á', 'ă', 'ắ', 'ặ', 'ằ', 'ẳ', 'ẵ', 'ǎ', 'â', 'ấ', 'ậ', 'ầ', 'ẩ', 'ẫ', 'ä', 'ạ', 'à', 'ả', 'ā', 'ą', 'å', 'ǻ', 'ã', 'ɑ', 'ɐ', 'ɒ', 'ⓐ', 'a', 'ẚ', 'ȧ', 'ǡ', 'ǟ', 'ȁ', 'ȃ', 'ḁ', 'ⱥ', 'ə', 'ɚ']);
_register('b', ['ḅ', 'ɓ', 'b', 'ⓑ', 'b', 'ḃ', 'ḇ', 'ƀ', 'ƃ']);
_register('c', ['ć', 'č', 'ç', 'ĉ', 'ɕ', 'ċ', 'c', 'ⓒ', 'c', 'ḉ', 'ƈ', 'ȼ', 'ꜿ', 'ↄ']);
_register('d', ['ď', 'ḓ', 'ḍ', 'ɗ', 'ḏ', 'đ', 'ɖ', 'ð', 'd', 'ⓓ', 'd', 'ḋ', 'ḑ', 'ƌ', 'ꝺ']);
_register('e', ['é', 'ĕ', 'ě', 'ê', 'ế', 'ệ', 'ề', 'ể', 'ễ', 'ë', 'ė', 'ẹ', 'è', 'ẻ', 'ē', 'ę', 'ẽ', 'ɘ', 'ɜ', 'ɝ', 'ʚ', 'ɞ', 'e', 'ⓔ', 'e', 'ḕ', 'ḗ', 'ȅ', 'ȇ', 'ȩ', 'ḝ', 'ḙ', 'ḛ', 'ɇ', 'ǝ', 'ɛ']);
_register('f', ['ƒ', 'f', 'ⓕ', 'f', 'ḟ', 'ꝼ']);
_register('g', ['ǵ', 'ğ', 'ǧ', 'ģ', 'ĝ', 'ġ', 'ɠ', 'ḡ', 'ɡ', 'ɣ', 'g', 'ⓖ', 'g', 'ǥ', 'ꞡ', 'ᵹ', 'ꝿ']);
_register('h', ['ḫ', 'ĥ', 'ḥ', 'ɦ', 'ẖ', 'ħ', 'ɧ', 'ɥ', 'ʮ', 'ʯ', 'h', 'ⓗ', 'h', 'ḣ', 'ḧ', 'ȟ', 'ḩ', 'ⱨ', 'ⱶ']);
_register('i', ['í', 'ĭ', 'ǐ', 'î', 'ï', 'ị', 'ì', 'ỉ', 'ī', 'į', 'ɨ', 'ĩ', 'ɩ', 'ı', 'i', 'ⓘ', 'i', 'ḯ', 'ȉ', 'ȋ', 'ḭ']);
_register('j', ['ǰ', 'ĵ', 'ʝ', 'ȷ', 'ɟ', 'ʄ', 'j', 'ⓙ', 'j', 'ɉ']);
_register('k', ['ķ', 'ḳ', 'ƙ', 'ḵ', 'ĸ', 'ʞ', 'k', 'ⓚ', 'k', 'ḱ', 'ǩ', 'ⱪ', 'ꝁ', 'ꝃ', 'ꝅ', 'ꞣ']);
_register('l', ['ĺ', 'ƚ', 'ɬ', 'ľ', 'ļ', 'ḽ', 'ḷ', 'ḹ', 'ḻ', 'ŀ', 'ɫ', 'ɭ', 'ł', 'ƛ', 'l', 'ⓛ', 'l', 'ⱡ', 'ꝉ', 'ꞁ', 'ꝇ']);
_register('m', ['ḿ', 'ṁ', 'ṃ', 'ɱ', 'ɯ', 'ɰ', 'm', 'ⓜ', 'm']);
_register('n', ['ʼn', 'ń', 'ň', 'ņ', 'ṋ', 'ṅ', 'ṇ', 'ǹ', 'ɲ', 'ṉ', 'ɳ', 'ñ', 'ŋ', 'Ŋ', 'n', 'ꝕⓝ', 'n', 'ƞ', 'ꞑ', 'ꞥ', 'л', 'ԉ', 'ⓝ']);
_register('o', ['ó', 'ŏ', 'ǒ', 'ô', 'ố', 'ộ', 'ồ', 'ổ', 'ỗ', 'ö', 'ọ', 'ő', 'ò', 'ỏ', 'ơ', 'ớ', 'ợ', 'ờ', 'ở', 'ỡ', 'ō', 'ǫ', 'ø', 'ǿ', 'õ', 'ɔ', 'ɵ', 'ʘ', '߀', 'o', 'ⓞ', 'o', 'ṍ', 'ȭ', 'ṏ', 'ṑ', 'ṓ', 'ȯ', 'ȱ', 'ȫ', 'ȍ', 'ȏ', 'ǭ', 'ꝋ', 'ꝍ']);
_register('p', ['ɸ', 'p', 'ⓟ', 'p', 'ṕ', 'ṗ', 'ƥ', 'ᵽ', 'ꝑ', 'ꝓ', 'ꝕ']);
_register('q', ['ʠ', 'q', 'ⓠ', 'q', 'ɋ', 'ꝗ', 'ꝙ']);
_register('r', ['ŕ', 'ř', 'ŗ', 'ṙ', 'ṛ', 'ṝ', 'ɾ', 'ṟ', 'ɼ', 'ɽ', 'ɿ', 'ɹ', 'ɻ', 'ɺ', 'r', 'ⓡ', 'r', 'ȑ', 'ȓ', 'ɍ', 'ꝛ', 'ꞧ', 'ꞃ']);
_register('s', ['ś', 'š', 'ş', 'ŝ', 'ș', 'ṡ', 'ṣ', 'ʂ', 'ſ', 'ʃ', 'ʆ', 'ʅ', 's', 'ⓢ', 's', 'ṥ', 'ṧ', 'ṩ', 'ȿ', 'ꞩ', 'ꞅ', 'ẛ']);
_register('t', ['ť', 'ţ', 'ṱ', 'ț', 'ẗ', 'ṭ', 'ṯ', 'ʈ', 'ŧ', 'ʇ', 't', 'ⓣ', 't', 'ṫ', 'ƭ', 'ⱦ', 'ꞇ']);
_register('u', ['ʉ', 'ú', 'ŭ', 'ǔ', 'û', 'ü', 'ǘ', 'ǚ', 'ǜ', 'ǖ', 'ụ', 'ű', 'ù', 'ủ', 'ư', 'ứ', 'ự', 'ừ', 'ử', 'ữ', 'ū', 'ų', 'ů', 'ũ', 'ʊ', 'u', 'ⓤ', 'u', 'ṹ', 'ṻ', 'ȕ', 'ȗ', 'ṳ', 'ṷ', 'ṵ']);
_register('v', ['ʋ', 'ʌ', 'v', 'ⓥ', 'v', 'ṽ', 'ṿ', 'ꝟ']);
_register('w', ['ẃ', 'ŵ', 'ẅ', 'ẁ', 'ʍ', 'w', 'ⓦ', 'w', 'ẇ', 'ẘ', 'ẉ', 'ⱳ']);
_register('x', ['x', 'ⓧ', 'x', 'ẋ', 'ẍ']);
_register('y', ['ý', 'ŷ', 'ÿ', 'ẏ', 'ỵ', 'ỳ', 'ƴ', 'ỷ', 'ȳ', 'ỹ', 'ʎ', 'y', 'ⓨ', 'y', 'ẙ', 'ɏ', 'ỿ']);
_register('z', ['ź', 'ž', 'ʑ', 'ż', 'ẓ', 'ẕ', 'ʐ', 'ƶ', 'z', 'ⓩ', 'z', 'ẑ', 'ȥ', 'ɀ', 'ⱬ', 'ꝣ']);
_register('AA', ['Ꜳ']);
_register('AE', ['Æ', 'Ǽ', 'Ǣ']);
_register('AO', ['Ꜵ']);
_register('AU', ['Ꜷ']);
_register('AV', ['Ꜹ', 'Ꜻ']);
_register('AY', ['Ꜽ']);
_register('DZ', ['DZ', 'DŽ']);
_register('Dz', ['Dz', 'Dž']);
_register('IJJ', ['IJ']);
_register('LJ', ['LJ']);
_register('Lj', ['Lj']);
_register('NJ', ['NJ']);
_register('Nj', ['Nj']);
_register('OE', ['ɶ']);
_register('OI', ['Ƣ']);
_register('OO', ['Ꝏ']);
_register('OU', ['Ȣ']);
_register('Th', ['Þ']);
_register('TZ', ['Ꜩ']);
_register('VY', ['Ꝡ']);
_register('WW', ['ʬ']);
_register('aa', ['ꜳ']);
_register('ae', ['æ', 'ǽ', 'ǣ']);
_register('ao', ['ꜵ']);
_register('au', ['ꜷ']);
_register('av', ['ꜹ', 'ꜻ']);
_register('ay', ['ꜽ']);
_register('dZ', ['ʤ', 'ʣ', 'ʥ']);
_register('dz', ['dz', 'dž']);
_register('ff', ['ff']);
_register('fi', ['fi']);
_register('fl', ['fl']);
_register('ffi', ['ffi']);
_register('ffl', ['ffl']);
_register('fN', ['ʩ']);
_register('hv', ['ƕ']);
_register('ij', ['ij']);
_register('lj', ['lj']);
_register('lZ', ['ɮ']);
_register('ls', ['ʪ']);
_register('lz', ['ʫ']);
_register('nj', ['nj']);
_register('oe', ['Œ', 'œ']);
_register('oi', ['ƣ']);
_register('ou', ['ȣ']);
_register('oo', ['ꝏ']);
_register('ss', ['ß']);
_register('tC', ['ʨ']);
_register('tS', ['ʧ']);
_register('th', ['þ']);
_register('ts', ['ʦ']);
_register('tz', ['ꜩ']);
_register('vy', ['ꝡ']);
}
void _register(String asciiChar, List<String> diacriticChars) {
for (final diacriticChar in diacriticChars) {
_map[diacriticChar] = asciiChar;
}
}
get map => _map;
}
After having the Map
, we can create an Extension
on the String
class to add a getter for getting a value without diacritics.
extension DiacriticsAwareString on String {
String get withoutDiacritics {
final diacriticsMap = Diacritics().map;
return splitMapJoin(
'',
onNonMatch: (char) => diacriticsMap.containsKey(char)
? diacriticsMap[char]
: char,
);
}
}
Below is the usage example with the output.
print('Ŵǒôƚĥá.čőm'.withoutDiacritics); // Woolha.com
Summary
Removing diacritic or accent characters in Dart can be done by creating a mapping from each diacritic character to the ASCII character(s). Keep in mind that the mapping in this tutorial may be incomplete. If you have to handle different characters, just add or change the mapping. Furthermore, some diacritic characters can have different mappings. If you try it on multiple online diacritics removal tool, you may get different results.