/usr/lib/python3/dist-packages/gleetex/document.py is in python3-gleetex 2.3.1-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 | """
This module wraps a formula in a LaTeX document. With this module, a formula can
be turned into a convertable document. It also offers methods to escape
non-ascii characters, so that formulas with unicode math or umlauts, etc. can be
converted.
"""
from . import unicode
class DocumentSerializationException(Exception):
"""This error is raised whenever a non-ascii character contained in a
formula could not be replaced by a LaTeX command.
It provides the following attributes:
formula - the formula
index - position in formula
upoint - unicode point."""
def __init__(self, formula, index, upoint):
self.formula = formula
self.index = index
self.upoint = upoint
super().__init__(formula, index, upoint)
def __str__(self):
return ("could not find LaTeX replacement command for unicode "
"character %d, index %d in formula %s") % (self.upoint,
self.index, self.formula)
def escape_unicode_in_formulas(formula, replace_alphabeticals=True):
"""This function uses the unicode table to replace any non-ascii character
(identified with its unicode code point) with a LaTeX command.
It also parses the formula for commands as e.g. \\\text or \\mbox and
applies text-mode commands within them."""
if not any(ord(ch) > 160 for ch in formula):
return formula # no umlauts, no replacement
# characters in math mode need a different replacement than in text mode.
# Therefore, the string has to be split into parts of math and text mode.
chunks = []
if not ('\\text' in formula or '\\mbox' in formula):
# no text mode, so tread a
chunks = [formula]
else:
start = 0
while '\\text' in formula[start:] or '\\mbox' in formula[start:]:
index = formula[start:].find('\\text')
if index < 0:
index = formula[start:].find('\\mbox')
opening_brace = formula[start + index:].find('{') + start + index
# add text before text-alike command and the command itself to chunks
chunks.append(formula[start:opening_brace])
closing_brace = get_matching_brace(formula, opening_brace)
# add text-mode stuff
chunks.append(formula[opening_brace:closing_brace + 1])
start = closing_brace + 1
# add last chunk
chunks.append(formula[start:])
is_math = True
for index, chunk in enumerate(chunks):
try:
chunks[index] = replace_unicode_characters(chunk, is_math,
replace_alphabeticals=replace_alphabeticals)
except ValueError as e: # unicode point missing
index = int(e.args[0])
raise DocumentSerializationException(formula, index,
ord(formula[index])) from None
is_math = not is_math
return ''.join(chunks)
def replace_unicode_characters(characters, is_math, replace_alphabeticals=True):
"""Replace all non-ascii characters within the given string with their LaTeX
equivalent. The boolean is_math indicates, whether text-mode commands (like
in \\text{}) or the amsmath equivalents should be used.
When replace_alphabeticals is False, alphabetical characters will not be
replaced through their LaTeX command when in text mode, so that text within
\\text{} (and similar) is not garbled. For instance, \\text{für} is be
replaced by \\text{f\"{u}r} when replace_alphabeticals=True. This is useful
for the alt attribute of an image, where the reader might want to read
the normal text as such.
This function raises a ValueError if a unicode point is not in the table.
The first argument of the ValueError is the index within the string, where
the unknown unicode character has been encountered."""
result = []
for idx, character in enumerate(characters):
if ord(character) < 168: # ignore normal ascii character and unicode control sequences
result.append(character)
# treat alphanumerical characters differently when in text mode, see doc
# string; don't replace alphabeticals if specified
elif character.isalpha() and not replace_alphabeticals:
result.append(character)
else:
mode = (unicode.LaTeXMode.mathmode if is_math else
unicode.LaTeXMode.textmode)
commands = unicode.unicode_table.get(ord(character))
if not commands: # unicode point missing in table
# is catched one level above; provide index for more concise error output
raise ValueError(characters.index(character))
# if math mode and only a text alternative exists, add \\text{}
# around it
if mode == unicode.LaTeXMode.mathmode and mode not in commands:
result.append('\\text{%s}' % commands[unicode.LaTeXMode.textmode])
else:
result.append(commands[mode])
# if the next character is alphabetical, add space
if (idx+1) < len(characters) and characters[idx+1].isalpha() \
and commands[mode][-1].isalpha():
result.append(' ')
return ''.join(result)
def get_matching_brace(string, pos_of_opening_brace):
if string[pos_of_opening_brace] != '{':
raise ValueError("index %s in string %s: not a opening brace" % \
(pos_of_opening_brace, repr(string)))
counter = 1
for index, ch in enumerate(string[pos_of_opening_brace + 1:]):
if ch == '{':
counter += 1
elif ch == '}':
counter -= 1
if counter == 0:
return pos_of_opening_brace + index + 1
if counter != 0:
raise ValueError("Unbalanced braces in formula " + repr(string))
class LaTeXDocument:
"""This class represents a LaTeX document. It is intended to contain an
equation as main content and properties to customize it. Its main purpose is
to provide a str method which will serialize it to a full LaTeX document."""
def __init__(self, eqn):
self.__encoding = None
self.__equation = eqn
self.__displaymath = False
self._preamble = ''
self.__maths_env = None
self.__replace_nonascii = False
def set_replace_nonascii(self, flag):
"""If True, all non-ascii character will be replaced through a LaTeX
command."""
self.__replace_nonascii = flag
def set_latex_environment(self, env):
"""Set maths environment name like `displaymath` or `flalign*`."""
self.__maths_env = env
def get_latex_environment(self):
return self.__maths_env
def get_encoding(self):
"""Return encoding for the document (or None)."""
return self.__encoding
def set_preamble_string(self, p):
"""Set the string to add to the preamble of the LaTeX document."""
self._preamble = p
def set_encoding(self, encoding):
"""Set the encoding as used by the inputenc package."""
if encoding.lower().startswith('utf') and '8' in encoding:
self.__encoding = 'utf8'
elif (encoding.lower().startswith('iso') and '8859' in encoding) or \
encoding.lower() == 'latin1':
self.__encoding = 'latin1'
else:
# if you plan to add an encoding, you have to adjust the str
# function, which also loads the fontenc package
raise ValueError(("Encoding %s is not supported at the moment. If "
"you want to use LaTeX 2e, you should report a bug at the home "
"page of GladTeX.") % encoding)
def set_displaymath(self, flag):
"""Set whether the formula is set in displaymath."""
if not isinstance(flag, bool):
raise TypeError("Displaymath parameter must be of type bool.")
self.__displaymath = flag
def is_displaymath(self):
return self.__displaymath
def _get_encoding_preamble(self):
# first check whether there are umlauts within the formula and if so, an
# encoding has been set
if any(ord(ch) > 128 for ch in self.__equation) and \
not self.__replace_nonascii:
if not self.__encoding:
raise ValueError(("No encoding set, but non-ascii characters "
"present. Please specify an encoding."))
encoding_preamble = ''
if self.__encoding:
# try to guess language and hence character set (fontenc)
import locale
language = locale.getdefaultlocale()
if language and language[0]: # extract just the language code
language = language[0].split('_')[0]
if not language or not language[0]:
language = 'en'
# check whether language on computer is within T1 and hence whether
# it should be loaded; I know that this can be a misleading
# assumption, but there's no better way that I know of
if language in ['fr', 'es', 'it', 'de', 'nl', 'ro', 'en']:
encoding_preamble += '\n\\usepackage[T1]{fontenc}'
else:
raise ValueError(("Language not supported by T1 fontenc "
"encoding; please report this to the GladTeX project."))
return encoding_preamble
def __str__(self):
preamble = self._get_encoding_preamble() + \
('\n\\usepackage[utf8]{inputenc}\n\\usepackage{amsmath, amssymb}'
'\n') + (self._preamble if self._preamble else '')
return self._format_document(preamble)
def _format_document(self, preamble):
"""Return a formatted LaTeX document with the specified formula
embedded."""
opening, closing = None,None
if self.__maths_env:
opening = '\\begin{%s}' % self.__maths_env
closing = '\\end{%s}' % self.__maths_env
else:
# determine characters with which to surround the formula
opening = '\\[' if self.__displaymath else '\\('
closing = '\\]' if self.__displaymath else '\\)'
formula = self.__equation.lstrip().rstrip()
if self.__replace_nonascii:
formula = escape_unicode_in_formulas(formula, replace_alphabeticals=True)
return ("\\documentclass[fontsize=12pt, fleqn]{scrartcl}\n\n%s\n"
"\\usepackage[active,textmath,displaymath,tightpage]{preview} "
"%% must be last one, see doc\n\n\\begin{document}\n"
"\\noindent%%\n%s%s%s\n"
"\\end{document}\n") % (preamble, opening, formula, closing)
|