This repository has been archived on 2023-03-25. You can view files and clone it, but cannot push or open issues or pull requests.
mightyscape-1.1-deprecated/extensions/fablabchemnitz/braille-l18n/braille-l18n.py
2021-10-10 22:50:09 +02:00

504 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
import inkex
# ---------------------------------
# UTILITIES
# Common standards
UPPERCASE_PREFIXES = {
chr(15): 0x2828, # uppercase prefix: https://codepoints.net/U+000F
}
LOUIS_BRAILLE_NUMBERS_PREFIX = 0x283c # Louis Braille's numbers prefix
LOUIS_BRAILLE_NUMBERS = { # Louis Braille's original numbers codification
"0": 0x281a,
"1": 0x2801,
"2": 0x2803,
"3": 0x2809,
"4": 0x2819,
"5": 0x2811,
"6": 0x280B,
"7": 0x281b,
"8": 0x2813,
"9": 0x280a,
}
# ---------------------
# English based locales
EN_ASCII = " A1B'K2L@CIF/MSP\"E3H9O6R^DJG>NTQ,*5<-U8V.%[$+X!&;:4\\0Z7(_?W]#Y)="
# Spanish based locales
ES_LETTERS = {
"A": 0x2801,
"B": 0x2803,
"C": 0x2809,
"D": 0x2819,
"E": 0x2811,
"F": 0x280B,
"G": 0x281b,
"H": 0x2813,
"I": 0x280a,
"J": 0x281a,
"K": 0x2805,
"L": 0x2807,
"M": 0x280d,
"N": 0x281d,
"Ñ": 0x283b,
"O": 0x2815,
"P": 0x280f,
"Q": 0x281f,
"R": 0x2817,
"S": 0x280e,
"T": 0x281e,
"U": 0x2825,
"V": 0x2827,
"W": 0x283a,
"X": 0x282d,
"Y": 0x283d,
"Z": 0x2835,
}
ES_SIGNS = {
" ": 0x2800, # braille space
"ª": 0x2801, # ordinal (feminine) -> same as A
"º": 0x2815, # ordinal (masculine) -> same as O
"&": 0x282f,
".": 0x2804,
",": 0x2802,
":": 0x2812,
";": 0x2806,
"¿": 0x2822,
"?": 0x2822,
"¡": 0x2816,
"!": 0x2816,
'"': 0x2826,
"(": 0x2823,
")": 0x281c,
# "[": 0x2837, collides with "Á" (Spanish and Catalan)
# "]": 0x283e, collides with "Ú" (Spanish and Catalan)
"*": 0x2814,
# math
"-": 0x2824,
"=": 0x2836,
"×": 0x2826, # multiplication
"÷": 0x2832, # division
"+": 0x2816,
"@": 0x2810,
}
ES_ACCENT_MARKS = {
"Á": 0x2837,
"É": 0x282e,
"Í": 0x280c,
"Ó": 0x282c,
"Ú": 0x283e,
"Ü": 0x2833,
}
ES_COMBINATIONS = {
# signs
"%": (0x2838, 0x2834),
"": (0x2838, 0x2834, 0x2834), # per mile
"/": (0x2820, 0x2802),
"\\": (0x2810, 0x2804),
"<": (0x2810, 0x2805),
">": (0x2828, 0x2802),
"|": (0x2838, 0x2807),
"{": (0x2810, 0x2807),
"}": (0x2838, 0x2802),
"": (0x2824, 0x2824), # two different unicode dashes
"": (0x2824, 0x2824),
"": (0x2804, 0x2804, 0x2804),
# legal
"©": (0x2823, 0x2828, 0x2809, 0x281c), # copyright
"®": (0x2823, 0x2828, 0x2817, 0x281c), # registered
"": (0x2823, 0x2828, 0x280f, 0x281c),
"🄯": (0x2823, 0x2828, 0x2807, 0x281c),
# currencies
"": (0x2838, 0x2811),
"$": (0x2838, 0x280e),
"¢": (0x2818, 0x2809),
"£": (0x2810, 0x282e),
"¥": (0x2838, 0x283d),
"": (0x2838, 0x283d),
}
CA_ACCENT_MARKS = {
"É": 0x283f,
"Í": 0x280c,
"Ó": 0x282a,
"Ú": 0x283e,
"À": 0x2837,
"È": 0x282e,
"Ò": 0x282c,
"Ï": 0x283b,
"Ü": 0x2833,
"Ç": 0x282f,
}
# French based locales
FR_LETTERS = {
"A": 0x2801,
"B": 0x2803,
"C": 0x2809,
"D": 0x2819,
"E": 0x2811,
"F": 0x280b,
"G": 0x281b,
"H": 0x2813,
"I": 0x280a,
"J": 0x281a,
"K": 0x2805,
"L": 0x2807,
"M": 0x280d,
"N": 0x281d,
"O": 0x2815,
"P": 0x280f,
"Q": 0x281f,
"R": 0x2817,
"S": 0x280e,
"T": 0x281e,
"U": 0x2825,
"V": 0x2827,
"W": 0x283a,
"X": 0x282d,
"Y": 0x283d,
"Z": 0x2835,
}
FR_ACCENT_MARKS = {
"É": 0x283f,
"À": 0x2837,
"È": 0x282e,
"Ù": 0x283e,
"Â": 0x2821,
"Ê": 0x2823,
"Î": 0x2829,
"Ô": 0x2839,
"Û": 0x2831,
"Ë": 0x282b,
"Ï": 0x283b,
"Ü": 0x2833,
"Ç": 0x282f,
"Œ": 0x282a, # oe ligature
}
FR_SIGNS = {
" ": 0x2800, # braille space
",": 0x2802,
";": 0x2806,
":": 0x2812,
".": 0x2832,
"?": 0x2822,
"!": 0x2816,
"«": 0x2836,
"»": 0x2836,
"": 0x2836,
"": 0x2836,
'"': 0x2836,
"": 0x2836,
"": 0x2836,
"(": 0x2826,
")": 0x2834,
"'": 0x2804,
"'": 0x2804,
"/": 0x280c,
"@": 0x281c,
"^": 0x2808, # elevation exponent
"-": 0x2824,
"+": 0x2816,
"×": 0x2814, # multiplication
"÷": 0x2812, # division
"=": 0x2836,
}
FR_COMBINATIONS = {
"": (0x282a, 0x2812, 0x2815), # bidirectional arrow
"": (0x282a, 0x2812, 0x2812), # left arrow
"": (0x2812, 0x2812, 0x2815), # right arrow
"": (0x2832, 0x2832, 0x2832), # unicode ellipsis
"": (0x2824, 0x2824),
"": (0x2824, 0x2824),
"_": (0x2810, 0x2824),
"[": (0x2818, 0x2826),
"]": (0x2834, 0x2803),
"°": (0x2810, 0x2815), # degrees
"§": (0x2810, 0x280f), # paragraph/section symbol
"&": (0x2810, 0x283f),
"\\": (0x2810, 0x280c),
"#": (0x2810, 0x283c),
"{": (0x2820, 0x2820, 0x2826),
"}": (0x2834, 0x2804, 0x2804),
# math
"µ": (0x2818, 0x280d), # micron
"π": (0x2818, 0x280f),
"": (0x2818, 0x2823),
"": (0x2818, 0x281c),
"<": (0x2810, 0x2823),
">": (0x2810, 0x281c),
"~": (0x2810, 0x2822),
"*": (0x2810, 0x2814),
"%": (0x2810, 0x282c),
"": (0x2810, 0x282c, 0x282c), # per mile
# legal
"©": (0x2810, 0x2809), # copyright
"®": (0x2810, 0x2817), # registered
"": (0x2810, 0x281e), # trademark
# currencies
"¢": (0x2818, 0x2809),
"": (0x2818, 0x2811),
"£": (0x2818, 0x2807),
"$": (0x2818, 0x280e),
"¥": (0x2818, 0x283d),
"": (0x2818, 0x283d),
}
# German based locales
DE_ACCENT_MARKS = {
"Ä": 0x281c,
"Ö": 0x282a,
"Ü": 0x2833,
}
DE_SIGNS = {
" ": 0x2800, # braille space
",": 0x2802,
";": 0x2806,
":": 0x2812,
"?": 0x2822,
"!": 0x2816,
"": 0x2826,
"": 0x2834,
"§": 0x282c,
".": 0x2804,
"": 0x2824,
"": 0x2820,
}
DE_COMBINATIONS = {
# signs
"ß": (0x282e,), # converted to 'SS' if uppercased, so defined in combinations
"|": (0x2810, 0x2824),
"[": (0x2818, 0x2837),
"]": (0x2818, 0x283e),
"/": (0x2818, 0x280c),
"`": (0x2820, 0x2826),
"´": (0x2820, 0x2834),
"/": (0x2810, 0x2802),
"&": (0x2810, 0x2825),
"*": (0x2820, 0x2814),
"": (0x2812, 0x2812, 0x2815),
"": (0x282a, 0x2812, 0x2812),
"": (0x282a, 0x2812, 0x2812, 0x2815),
"%": (0x283c, 0x281a, 0x2834),
"": (0x283c, 0x281a, 0x2834, 0x2834),
"°": (0x2808, 0x2834),
"": (0x2808, 0x2814),
"": (0x2808, 0x2814, 0x2814),
"@": (0x2808, 0x281c),
"_": (0x2808, 0x2838),
"#": (0x2808, 0x283c),
# currencies
"": (0x2808, 0x2811),
"$": (0x2808, 0x280e),
"¢": (0x2808, 0x2809),
"£": (0x2808, 0x2807),
# legal
"©": (0x2836, 0x2818, 0x2809, 0x2836),
"®": (0x2836, 0x2818, 0x2817, 0x2836),
}
# END: UTILITIES
# ---------------------------------
# LOCALE FUNCTIONS
def en_char_map(char):
"""English chars mapper.
Source: https://en.wikipedia.org/wiki/Braille_ASCII#Braille_ASCII_values
"""
try:
mapint = EN_ASCII.index(char.upper())
except ValueError:
return char
return chr(mapint + 0x2800)
def numbers_singleuppers_combinations_factory(
numbers_map,
singleuppers_map,
combinations_map, # also individual characters that are modified if uppercased
number_prefix,
uppercase_prefix,
):
"""Wrapper for various character mappers implementations."""
def char_mapper(char):
if char.isnumeric():
# numeric prefix + number
return "".join([chr(number_prefix), chr(numbers_map[char])])
try:
bcharint = singleuppers_map[char.upper()]
except KeyError:
try:
# combinations
return "".join([chr(num) for num in combinations_map[char]])
except KeyError:
return char
else:
# if uppercase, add uppercase prefix before letter
if char.isupper():
return "".join([chr(uppercase_prefix), chr(bcharint)])
return chr(bcharint)
return char_mapper
def es_char_map_loader():
"""Spanish/Galician chars mappers.
Source: https://sid.usal.es/idocs/F8/FDO12069/signografiabasica.pdf
"""
return numbers_singleuppers_combinations_factory(
LOUIS_BRAILLE_NUMBERS,
{
**ES_LETTERS,
**ES_ACCENT_MARKS,
**ES_SIGNS,
**UPPERCASE_PREFIXES,
},
ES_COMBINATIONS,
0x283c,
0x2828,
)
def eu_char_map_loader():
"""Euskera chars mapper.
Uses the sample implementation as Spanish but without accent marks.
Source: https://sid.usal.es/idocs/F8/FDO12069/signografiabasica.pdf
"""
return numbers_singleuppers_combinations_factory(
LOUIS_BRAILLE_NUMBERS,
{
**ES_LETTERS,
**ES_SIGNS,
**UPPERCASE_PREFIXES,
},
ES_COMBINATIONS,
0x283c,
0x2828,
)
def ca_char_map_loader():
"""Catalan/Valencian chars mappers. Uses the same implementation as
Spanish but different accent marks.
Source: https://sid.usal.es/idocs/F8/FDO12069/signografiabasica.pdf
"""
return numbers_singleuppers_combinations_factory(
LOUIS_BRAILLE_NUMBERS,
{
**ES_LETTERS,
**CA_ACCENT_MARKS,
**ES_SIGNS,
**UPPERCASE_PREFIXES,
},
ES_COMBINATIONS,
0x283c,
0x2828,
)
def fr_char_map_loader():
"""French chars mapper.
Source: https://sid.usal.es/idocs/F8/FDO12069/signografiabasica.pdf
"""
return numbers_singleuppers_combinations_factory(
LOUIS_BRAILLE_NUMBERS,
{
**FR_LETTERS,
**FR_ACCENT_MARKS,
**FR_SIGNS,
**UPPERCASE_PREFIXES,
},
FR_COMBINATIONS,
0x283c,
0x2828,
)
def de_char_map_loader():
"""German chars mapper.
- For letters, uses the same dictionary as French implementation.
Source: http://bskdl.org/textschrift.html
"""
return numbers_singleuppers_combinations_factory(
LOUIS_BRAILLE_NUMBERS,
{
**FR_LETTERS, # Same as French implementation
**DE_ACCENT_MARKS,
**DE_SIGNS,
**UPPERCASE_PREFIXES,
},
DE_COMBINATIONS,
0x283c,
0x2828,
)
# END: LOCALE FUNCTIONS
LOCALE_CHARMAPS = {
"en": en_char_map, # English
"es": es_char_map_loader, # Spanish
"fr": fr_char_map_loader, # French
"de": de_char_map_loader, # German
"gl": es_char_map_loader, # Galician
"eu": eu_char_map_loader, # Euskera
"ca": ca_char_map_loader, # Catalan/Valencian
}
# ---------------------------------
# EXTENSION
class BrailleL18n(inkex.TextExtension):
"""Convert to Braille giving a localized map of replacements."""
def add_arguments(self, parser):
parser.add_argument(
"-l", "--locale", type=str, dest="locale", default="en",
choices=LOCALE_CHARMAPS.keys(),
help="Locale to use converting to Braille.",
)
def process_chardata(self, text):
"""Replaceable chardata method for processing the text."""
chars_mapper = LOCALE_CHARMAPS[self.options.locale]
# `chars_mapper` could be a function loader or a characters mapper
# itself, so check if the characters mapper is loaded and load it
# if is created from a factory
if "loader" in chars_mapper.__name__:
chars_mapper = chars_mapper()
return ''.join(map(chars_mapper, text))
if __name__ == '__main__':
BrailleL18n().run()