mightyscape-1.2/extensions/fablabchemnitz/braille_l18n/braille_l18n.py

504 lines
12 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
import inkex
# ---------------------------------
# UTILITIES
# Common standards
UPPERCASE_PREFIXES = {
chr(15): 0x2828, # uppercase prefix: https://codepoints.net/U+000F
}
LOUIS_BRAILLE_NUMBERS_PREFIX = 0x283c # Louis Braille's numbers prefix
LOUIS_BRAILLE_NUMBERS = { # Louis Braille's original numbers codification
"0": 0x281a,
"1": 0x2801,
"2": 0x2803,
"3": 0x2809,
"4": 0x2819,
"5": 0x2811,
"6": 0x280B,
"7": 0x281b,
"8": 0x2813,
"9": 0x280a,
}
# ---------------------
# English based locales
EN_ASCII = " A1B'K2L@CIF/MSP\"E3H9O6R^DJG>NTQ,*5<-U8V.%[$+X!&;:4\\0Z7(_?W]#Y)="
# Spanish based locales
ES_LETTERS = {
"A": 0x2801,
"B": 0x2803,
"C": 0x2809,
"D": 0x2819,
"E": 0x2811,
"F": 0x280B,
"G": 0x281b,
"H": 0x2813,
"I": 0x280a,
"J": 0x281a,
"K": 0x2805,
"L": 0x2807,
"M": 0x280d,
"N": 0x281d,
"Ñ": 0x283b,
"O": 0x2815,
"P": 0x280f,
"Q": 0x281f,
"R": 0x2817,
"S": 0x280e,
"T": 0x281e,
"U": 0x2825,
"V": 0x2827,
"W": 0x283a,
"X": 0x282d,
"Y": 0x283d,
"Z": 0x2835,
}
ES_SIGNS = {
" ": 0x2800, # braille space
"ª": 0x2801, # ordinal (feminine) -> same as A
"º": 0x2815, # ordinal (masculine) -> same as O
"&": 0x282f,
".": 0x2804,
",": 0x2802,
":": 0x2812,
";": 0x2806,
"¿": 0x2822,
"?": 0x2822,
"¡": 0x2816,
"!": 0x2816,
'"': 0x2826,
"(": 0x2823,
")": 0x281c,
# "[": 0x2837, collides with "Á" (Spanish and Catalan)
# "]": 0x283e, collides with "Ú" (Spanish and Catalan)
"*": 0x2814,
# math
"-": 0x2824,
"=": 0x2836,
"×": 0x2826, # multiplication
"÷": 0x2832, # division
"+": 0x2816,
"@": 0x2810,
}
ES_ACCENT_MARKS = {
"Á": 0x2837,
"É": 0x282e,
"Í": 0x280c,
"Ó": 0x282c,
"Ú": 0x283e,
"Ü": 0x2833,
}
ES_COMBINATIONS = {
# signs
"%": (0x2838, 0x2834),
"": (0x2838, 0x2834, 0x2834), # per mile
"/": (0x2820, 0x2802),
"\\": (0x2810, 0x2804),
"<": (0x2810, 0x2805),
">": (0x2828, 0x2802),
"|": (0x2838, 0x2807),
"{": (0x2810, 0x2807),
"}": (0x2838, 0x2802),
"": (0x2824, 0x2824), # two different unicode dashes
"": (0x2824, 0x2824),
"": (0x2804, 0x2804, 0x2804),
# legal
"©": (0x2823, 0x2828, 0x2809, 0x281c), # copyright
"®": (0x2823, 0x2828, 0x2817, 0x281c), # registered
"": (0x2823, 0x2828, 0x280f, 0x281c),
"🄯": (0x2823, 0x2828, 0x2807, 0x281c),
# currencies
"": (0x2838, 0x2811),
"$": (0x2838, 0x280e),
"¢": (0x2818, 0x2809),
"£": (0x2810, 0x282e),
"¥": (0x2838, 0x283d),
"": (0x2838, 0x283d),
}
CA_ACCENT_MARKS = {
"É": 0x283f,
"Í": 0x280c,
"Ó": 0x282a,
"Ú": 0x283e,
"À": 0x2837,
"È": 0x282e,
"Ò": 0x282c,
"Ï": 0x283b,
"Ü": 0x2833,
"Ç": 0x282f,
}
# French based locales
FR_LETTERS = {
"A": 0x2801,
"B": 0x2803,
"C": 0x2809,
"D": 0x2819,
"E": 0x2811,
"F": 0x280b,
"G": 0x281b,
"H": 0x2813,
"I": 0x280a,
"J": 0x281a,
"K": 0x2805,
"L": 0x2807,
"M": 0x280d,
"N": 0x281d,
"O": 0x2815,
"P": 0x280f,
"Q": 0x281f,
"R": 0x2817,
"S": 0x280e,
"T": 0x281e,
"U": 0x2825,
"V": 0x2827,
"W": 0x283a,
"X": 0x282d,
"Y": 0x283d,
"Z": 0x2835,
}
FR_ACCENT_MARKS = {
"É": 0x283f,
"À": 0x2837,
"È": 0x282e,
"Ù": 0x283e,
"Â": 0x2821,
"Ê": 0x2823,
"Î": 0x2829,
"Ô": 0x2839,
"Û": 0x2831,
"Ë": 0x282b,
"Ï": 0x283b,
"Ü": 0x2833,
"Ç": 0x282f,
"Œ": 0x282a, # oe ligature
}
FR_SIGNS = {
" ": 0x2800, # braille space
",": 0x2802,
";": 0x2806,
":": 0x2812,
".": 0x2832,
"?": 0x2822,
"!": 0x2816,
"«": 0x2836,
"»": 0x2836,
"": 0x2836,
"": 0x2836,
'"': 0x2836,
"": 0x2836,
"": 0x2836,
"(": 0x2826,
")": 0x2834,
"'": 0x2804,
"'": 0x2804,
"/": 0x280c,
"@": 0x281c,
"^": 0x2808, # elevation exponent
"-": 0x2824,
"+": 0x2816,
"×": 0x2814, # multiplication
"÷": 0x2812, # division
"=": 0x2836,
}
FR_COMBINATIONS = {
"": (0x282a, 0x2812, 0x2815), # bidirectional arrow
"": (0x282a, 0x2812, 0x2812), # left arrow
"": (0x2812, 0x2812, 0x2815), # right arrow
"": (0x2832, 0x2832, 0x2832), # unicode ellipsis
"": (0x2824, 0x2824),
"": (0x2824, 0x2824),
"_": (0x2810, 0x2824),
"[": (0x2818, 0x2826),
"]": (0x2834, 0x2803),
"°": (0x2810, 0x2815), # degrees
"§": (0x2810, 0x280f), # paragraph/section symbol
"&": (0x2810, 0x283f),
"\\": (0x2810, 0x280c),
"#": (0x2810, 0x283c),
"{": (0x2820, 0x2820, 0x2826),
"}": (0x2834, 0x2804, 0x2804),
# math
"µ": (0x2818, 0x280d), # micron
"π": (0x2818, 0x280f),
"": (0x2818, 0x2823),
"": (0x2818, 0x281c),
"<": (0x2810, 0x2823),
">": (0x2810, 0x281c),
"~": (0x2810, 0x2822),
"*": (0x2810, 0x2814),
"%": (0x2810, 0x282c),
"": (0x2810, 0x282c, 0x282c), # per mile
# legal
"©": (0x2810, 0x2809), # copyright
"®": (0x2810, 0x2817), # registered
"": (0x2810, 0x281e), # trademark
# currencies
"¢": (0x2818, 0x2809),
"": (0x2818, 0x2811),
"£": (0x2818, 0x2807),
"$": (0x2818, 0x280e),
"¥": (0x2818, 0x283d),
"": (0x2818, 0x283d),
}
# German based locales
DE_ACCENT_MARKS = {
"Ä": 0x281c,
"Ö": 0x282a,
"Ü": 0x2833,
}
DE_SIGNS = {
" ": 0x2800, # braille space
",": 0x2802,
";": 0x2806,
":": 0x2812,
"?": 0x2822,
"!": 0x2816,
"": 0x2826,
"": 0x2834,
"§": 0x282c,
".": 0x2804,
"": 0x2824,
"": 0x2820,
}
DE_COMBINATIONS = {
# signs
"ß": (0x282e,), # converted to 'SS' if uppercased, so defined in combinations
"|": (0x2810, 0x2824),
"[": (0x2818, 0x2837),
"]": (0x2818, 0x283e),
"/": (0x2818, 0x280c),
"`": (0x2820, 0x2826),
"´": (0x2820, 0x2834),
"/": (0x2810, 0x2802),
"&": (0x2810, 0x2825),
"*": (0x2820, 0x2814),
"": (0x2812, 0x2812, 0x2815),
"": (0x282a, 0x2812, 0x2812),
"": (0x282a, 0x2812, 0x2812, 0x2815),
"%": (0x283c, 0x281a, 0x2834),
"": (0x283c, 0x281a, 0x2834, 0x2834),
"°": (0x2808, 0x2834),
"": (0x2808, 0x2814),
"": (0x2808, 0x2814, 0x2814),
"@": (0x2808, 0x281c),
"_": (0x2808, 0x2838),
"#": (0x2808, 0x283c),
# currencies
"": (0x2808, 0x2811),
"$": (0x2808, 0x280e),
"¢": (0x2808, 0x2809),
"£": (0x2808, 0x2807),
# legal
"©": (0x2836, 0x2818, 0x2809, 0x2836),
"®": (0x2836, 0x2818, 0x2817, 0x2836),
}
# END: UTILITIES
# ---------------------------------
# LOCALE FUNCTIONS
def en_char_map(char):
"""English chars mapper.
Source: https://en.wikipedia.org/wiki/Braille_ASCII#Braille_ASCII_values
"""
try:
mapint = EN_ASCII.index(char.upper())
except ValueError:
return char
return chr(mapint + 0x2800)
def numbers_singleuppers_combinations_factory(
numbers_map,
singleuppers_map,
combinations_map, # also individual characters that are modified if uppercased
number_prefix,
uppercase_prefix,
):
"""Wrapper for various character mappers implementations."""
def char_mapper(char):
if char.isnumeric():
# numeric prefix + number
return "".join([chr(number_prefix), chr(numbers_map[char])])
try:
bcharint = singleuppers_map[char.upper()]
except KeyError:
try:
# combinations
return "".join([chr(num) for num in combinations_map[char]])
except KeyError:
return char
else:
# if uppercase, add uppercase prefix before letter
if char.isupper():
return "".join([chr(uppercase_prefix), chr(bcharint)])
return chr(bcharint)
return char_mapper
def es_char_map_loader():
"""Spanish/Galician chars mappers.
Source: https://sid.usal.es/idocs/F8/FDO12069/signografiabasica.pdf
"""
return numbers_singleuppers_combinations_factory(
LOUIS_BRAILLE_NUMBERS,
{
**ES_LETTERS,
**ES_ACCENT_MARKS,
**ES_SIGNS,
**UPPERCASE_PREFIXES,
},
ES_COMBINATIONS,
0x283c,
0x2828,
)
def eu_char_map_loader():
"""Euskera chars mapper.
Uses the sample implementation as Spanish but without accent marks.
Source: https://sid.usal.es/idocs/F8/FDO12069/signografiabasica.pdf
"""
return numbers_singleuppers_combinations_factory(
LOUIS_BRAILLE_NUMBERS,
{
**ES_LETTERS,
**ES_SIGNS,
**UPPERCASE_PREFIXES,
},
ES_COMBINATIONS,
0x283c,
0x2828,
)
def ca_char_map_loader():
"""Catalan/Valencian chars mappers. Uses the same implementation as
Spanish but different accent marks.
Source: https://sid.usal.es/idocs/F8/FDO12069/signografiabasica.pdf
"""
return numbers_singleuppers_combinations_factory(
LOUIS_BRAILLE_NUMBERS,
{
**ES_LETTERS,
**CA_ACCENT_MARKS,
**ES_SIGNS,
**UPPERCASE_PREFIXES,
},
ES_COMBINATIONS,
0x283c,
0x2828,
)
def fr_char_map_loader():
"""French chars mapper.
Source: https://sid.usal.es/idocs/F8/FDO12069/signografiabasica.pdf
"""
return numbers_singleuppers_combinations_factory(
LOUIS_BRAILLE_NUMBERS,
{
**FR_LETTERS,
**FR_ACCENT_MARKS,
**FR_SIGNS,
**UPPERCASE_PREFIXES,
},
FR_COMBINATIONS,
0x283c,
0x2828,
)
def de_char_map_loader():
"""German chars mapper.
- For letters, uses the same dictionary as French implementation.
Source: http://bskdl.org/textschrift.html
"""
return numbers_singleuppers_combinations_factory(
LOUIS_BRAILLE_NUMBERS,
{
**FR_LETTERS, # Same as French implementation
**DE_ACCENT_MARKS,
**DE_SIGNS,
**UPPERCASE_PREFIXES,
},
DE_COMBINATIONS,
0x283c,
0x2828,
)
# END: LOCALE FUNCTIONS
LOCALE_CHARMAPS = {
"en": en_char_map, # English
"es": es_char_map_loader, # Spanish
"fr": fr_char_map_loader, # French
"de": de_char_map_loader, # German
"gl": es_char_map_loader, # Galician
"eu": eu_char_map_loader, # Euskera
"ca": ca_char_map_loader, # Catalan/Valencian
}
# ---------------------------------
# EXTENSION
class BrailleL18n(inkex.TextExtension):
"""Convert to Braille giving a localized map of replacements."""
def add_arguments(self, parser):
parser.add_argument(
"-l", "--locale", type=str, dest="locale", default="en",
choices=LOCALE_CHARMAPS.keys(),
help="Locale to use converting to Braille.",
)
def process_chardata(self, text):
"""Replaceable chardata method for processing the text."""
chars_mapper = LOCALE_CHARMAPS[self.options.locale]
# `chars_mapper` could be a function loader or a characters mapper
# itself, so check if the characters mapper is loaded and load it
# if is created from a factory
if "loader" in chars_mapper.__name__:
chars_mapper = chars_mapper()
return ''.join(map(chars_mapper, text))
if __name__ == '__main__':
BrailleL18n().run()