about summary refs log tree commit diff
path: root/.venv/lib/python3.12/site-packages/docutils/utils/math/latex2mathml.py
diff options
context:
space:
mode:
authorS. Solomon Darnell2025-03-28 21:52:21 -0500
committerS. Solomon Darnell2025-03-28 21:52:21 -0500
commit4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
treeee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/docutils/utils/math/latex2mathml.py
parentcc961e04ba734dd72309fb548a2f97d67d578813 (diff)
downloadgn-ai-master.tar.gz
two version of R2R are here HEAD master
Diffstat (limited to '.venv/lib/python3.12/site-packages/docutils/utils/math/latex2mathml.py')
-rw-r--r--.venv/lib/python3.12/site-packages/docutils/utils/math/latex2mathml.py1252
1 files changed, 1252 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/docutils/utils/math/latex2mathml.py b/.venv/lib/python3.12/site-packages/docutils/utils/math/latex2mathml.py
new file mode 100644
index 00000000..b6ca3934
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/docutils/utils/math/latex2mathml.py
@@ -0,0 +1,1252 @@
+# :Id: $Id: latex2mathml.py 9536 2024-02-01 13:04:22Z milde $
+# :Copyright: © 2005 Jens Jørgen Mortensen [1]_
+#             © 2010, 2021, 2024 Günter Milde.
+#
+# :License: Released under the terms of the `2-Clause BSD license`_, in short:
+#
+#    Copying and distribution of this file, with or without modification,
+#    are permitted in any medium without royalty provided the copyright
+#    notice and this notice are preserved.
+#    This file is offered as-is, without any warranty.
+#
+# .. _2-Clause BSD license: https://opensource.org/licenses/BSD-2-Clause
+#
+# .. [1] the original `rst2mathml.py` in `sandbox/jensj/latex_math`
+
+"""Convert LaTex maths code into presentational MathML.
+
+This module is provisional:
+the API is not settled and may change with any minor Docutils version.
+"""
+
+# Usage:
+#
+# >>> from latex2mathml import *
+
+import re
+import unicodedata
+
+from docutils.utils.math import (MathError, mathalphabet2unichar,
+                                 tex2unichar, toplevel_code)
+from docutils.utils.math.mathml_elements import (
+    math, mtable, mrow, mtr, mtd, menclose, mphantom, msqrt, mi, mn, mo,
+    mtext, msub, msup, msubsup, munder, mover, munderover, mroot, mfrac,
+    mspace, MathRow)
+
+
+# Character data
+# --------------
+
+# LaTeX math macro to Unicode mappings.
+# Character categories.
+
+# identifiers -> <mi>
+
+letters = {'hbar': 'ℏ'}  # Compatibility mapping: \hbar resembles italic ħ
+#                          "unicode-math" unifies \hbar and \hslash to ℏ.
+letters.update(tex2unichar.mathalpha)
+
+ordinary = tex2unichar.mathord  # Miscellaneous symbols
+
+# special case: Capital Greek letters: (upright in TeX style)
+greek_capitals = {
+    'Phi': '\u03a6', 'Xi': '\u039e', 'Sigma': '\u03a3',
+    'Psi': '\u03a8', 'Delta': '\u0394', 'Theta': '\u0398',
+    'Upsilon': '\u03d2', 'Pi': '\u03a0', 'Omega': '\u03a9',
+    'Gamma': '\u0393', 'Lambda': '\u039b'}
+
+# functions -> <mi>
+functions = {
+    # functions with a space in the name
+    'liminf': 'lim\u202finf',
+    'limsup': 'lim\u202fsup',
+    'injlim': 'inj\u202flim',
+    'projlim': 'proj\u202flim',
+    # embellished function names (see handle_cmd() below)
+    'varlimsup': 'lim',
+    'varliminf': 'lim',
+    'varprojlim': 'lim',
+    'varinjlim': 'lim',
+    # custom function name
+    'operatorname': None,
+}
+functions.update((name, name) for name in
+                 ('arccos', 'arcsin', 'arctan', 'arg',  'cos',
+                  'cosh',   'cot',    'coth',   'csc',  'deg',
+                  'det',    'dim',    'exp',    'gcd',  'hom',
+                  'ker',    'lg',     'ln',     'log',  'Pr',
+                  'sec',    'sin',    'sinh',   'tan',  'tanh'))
+# Function with limits: 'lim', 'sup', 'inf', 'max', 'min':
+# use <mo> to allow "movablelimits" attribute (see below).
+
+# modulo operator/arithmetic
+modulo_functions = {
+    # cmdname: (binary, named, parentheses, padding)
+    'bmod': (True,  True,  False, '0.278em'),  # a mod n
+    'pmod': (False, True,  True,  '0.444em'),  # a  (mod n)
+    'mod':  (False, True,  False, '0.667em'),  # a  mod n
+    'pod':  (False, False, True,  '0.444em'),  # a  (n)
+    }
+
+
+# "mathematical alphabets": map identifiers to the corresponding
+# characters from the "Mathematical Alphanumeric Symbols" block
+math_alphabets = {
+    # 'cmdname':  'mathvariant value'        # package
+    'mathbb':     'double-struck',           # amssymb
+    'mathbf':     'bold',
+    'mathbfit':   'bold-italic',             # isomath
+    'mathcal':    'script',
+    'mathfrak':   'fraktur',                 # amssymb
+    'mathit':     'italic',
+    'mathrm':     'normal',
+    'mathscr':    'script',                  # mathrsfs et al
+    'mathsf':     'sans-serif',
+    'mathbfsfit': 'sans-serif-bold-italic',  # unicode-math
+    'mathsfbfit': 'sans-serif-bold-italic',  # isomath
+    'mathsfit':   'sans-serif-italic',       # isomath
+    'mathtt':     'monospace',
+    # unsupported: bold-fraktur
+    #              bold-script
+    #              bold-sans-serif
+}
+
+# operator, fence, or separator -> <mo>
+
+stretchables = {
+    # extensible delimiters allowed in left/right cmds
+    'backslash':   '\\',
+    'uparrow':     '\u2191',  # ↑ UPWARDS ARROW
+    'downarrow':   '\u2193',  # ↓ DOWNWARDS ARROW
+    'updownarrow': '\u2195',  # ↕ UP DOWN ARROW
+    'Uparrow':     '\u21d1',  # ⇑ UPWARDS DOUBLE ARROW
+    'Downarrow':   '\u21d3',  # ⇓ DOWNWARDS DOUBLE ARROW
+    'Updownarrow': '\u21d5',  # ⇕ UP DOWN DOUBLE ARROW
+    'lmoustache':  '\u23b0',  # ⎰ … CURLY BRACKET SECTION
+    'rmoustache':  '\u23b1',  # ⎱ … LEFT CURLY BRACKET SECTION
+    'arrowvert':   '\u23d0',  # ⏐ VERTICAL LINE EXTENSION
+    'bracevert':   '\u23aa',  # ⎪ CURLY BRACKET EXTENSION
+    'lvert':      '|',        # left  |
+    'lVert':      '\u2016',   # left  ‖
+    'rvert':      '|',        # right |
+    'rVert':      '\u2016',   # right ‖
+    'Arrowvert':  '\u2016',   # ‖
+}
+stretchables.update(tex2unichar.mathfence)
+stretchables.update(tex2unichar.mathopen)   # Braces
+stretchables.update(tex2unichar.mathclose)  # Braces
+
+# >>> print(' '.join(sorted(set(stretchables.values()))))
+# [ \ ] { | } ‖ ↑ ↓ ↕ ⇑ ⇓ ⇕ ⌈ ⌉ ⌊ ⌋ ⌜ ⌝ ⌞ ⌟ ⎪ ⎰ ⎱ ⏐ ⟅ ⟆ ⟦ ⟧ ⟨ ⟩ ⟮ ⟯ ⦇ ⦈
+
+operators = {
+    # negated symbols without pre-composed Unicode character
+    'nleqq':      '\u2266\u0338',  # ≦̸
+    'ngeqq':      '\u2267\u0338',  # ≧̸
+    'nleqslant':  '\u2a7d\u0338',  # ⩽̸
+    'ngeqslant':  '\u2a7e\u0338',  # ⩾̸
+    'ngtrless':   '\u2277\u0338',  # txfonts
+    'nlessgtr':   '\u2276\u0338',  # txfonts
+    'nsubseteqq': '\u2AC5\u0338',  # ⫅̸
+    'nsupseteqq': '\u2AC6\u0338',  # ⫆̸
+    # compatibility definitions:
+    'centerdot': '\u2B1D',  # BLACK VERY SMALL SQUARE | mathbin
+    'varnothing': '\u2300',  # ⌀ DIAMETER SIGN | empty set
+    'varpropto': '\u221d',  # ∝ PROPORTIONAL TO | sans serif
+    'triangle': '\u25B3',  # WHITE UP-POINTING TRIANGLE | mathord
+    'triangledown': '\u25BD',  # WHITE DOWN-POINTING TRIANGLE | mathord
+    # alias commands:
+    'dotsb': '\u22ef',  # ⋯ with binary operators/relations
+    'dotsc': '\u2026',  # … with commas
+    'dotsi': '\u22ef',  # ⋯ with integrals
+    'dotsm': '\u22ef',  # ⋯ multiplication dots
+    'dotso': '\u2026',  # … other dots
+    # functions with movable limits (requires <mo>)
+    'lim': 'lim',
+    'sup': 'sup',
+    'inf': 'inf',
+    'max': 'max',
+    'min': 'min',
+}
+operators.update(tex2unichar.mathbin)    # Binary symbols
+operators.update(tex2unichar.mathrel)    # Relation symbols, arrow symbols
+operators.update(tex2unichar.mathpunct)  # Punctuation
+operators.update(tex2unichar.mathop)     # Variable-sized symbols
+operators.update(stretchables)
+
+
+# special cases
+
+thick_operators = {
+    # style='font-weight: bold;'
+    'thicksim':       '\u223C',  # ∼
+    'thickapprox':    '\u2248',  # ≈
+}
+
+small_operators = {
+    # mathsize='75%'
+    'shortmid':       '\u2223',  # ∣
+    'shortparallel':  '\u2225',  # ∥
+    'nshortmid':      '\u2224',  # ∤
+    'nshortparallel': '\u2226',  # ∦
+    'smallfrown':     '\u2322',  # ⌢ FROWN
+    'smallsmile':     '\u2323',  # ⌣ SMILE
+    'smallint':       '\u222b',  # ∫ INTEGRAL
+}
+
+# Operators and functions with limits above/below in display formulas
+# and in index position inline (movablelimits=True)
+movablelimits = ('bigcap', 'bigcup', 'bigodot', 'bigoplus', 'bigotimes',
+                 'bigsqcup', 'biguplus', 'bigvee', 'bigwedge',
+                 'coprod', 'intop', 'ointop', 'prod', 'sum',
+                 'lim', 'max', 'min', 'sup', 'inf')
+# Depending on settings, integrals may also be in this category.
+# (e.g. if "amsmath" is loaded with option "intlimits", see
+#  http://mirror.ctan.org/macros/latex/required/amsmath/amsldoc.pdf)
+# movablelimits.extend(('fint', 'iiiint', 'iiint', 'iint', 'int', 'oiint',
+#                       'oint', 'ointctrclockwise', 'sqint',
+#                       'varointclockwise',))
+
+# horizontal space -> <mspace>
+
+spaces = {'qquad':         '2em',        # two \quad
+          'quad':          '1em',        # 18 mu
+          'thickspace':    '0.2778em',   # 5mu = 5/18em
+          ';':             '0.2778em',   # 5mu thickspace
+          ' ':             '0.25em',     # inter word space
+          '\n':            '0.25em',     # inter word space
+          'medspace':      '0.2222em',   # 4mu = 2/9em
+          ':':             '0.2222em',   # 4mu medspace
+          'thinspace':     '0.1667em',   # 3mu = 1/6em
+          ',':             '0.1667em',   # 3mu thinspace
+          'negthinspace':  '-0.1667em',  # -3mu = -1/6em
+          '!':             '-0.1667em',  # negthinspace
+          'negmedspace':   '-0.2222em',  # -4mu = -2/9em
+          'negthickspace': '-0.2778em',  # -5mu = -5/18em
+          }
+
+# accents: -> <mo stretchy="false"> in <mover>
+accents = {
+    # TeX:      spacing    combining
+    'acute':    '´',     # '\u0301'
+    'bar':      'ˉ',     # '\u0304'
+    'breve':    '˘',     # '\u0306'
+    'check':    'ˇ',     # '\u030C'
+    'dot':      '˙',     # '\u0307'
+    'ddot':     '¨',     # '\u0308'
+    'dddot':    '˙˙˙',   # '\u20DB'  # or … ?
+    'ddddot':   '˙˙˙˙',  # '\u20DC'  # or ¨¨ ?
+    'grave':    '`',     # '\u0300'
+    'hat':      'ˆ',     # '\u0302'
+    'mathring': '˚',     # '\u030A'
+    'tilde':    '~',     # '\u0303'  # tilde ~ or small tilde ˜?
+    'vec':      '→',     # '\u20d7'  # → too heavy, use scriptlevel="+1"
+}
+
+# limits etc. -> <mo> in <mover> or <munder>
+over = {
+    # TeX:                  (char,     offset-correction/em)
+    'overbrace':            ('\u23DE', -0.2),  # DejaVu Math -0.6
+    'overleftarrow':        ('\u2190', -0.2),
+    'overleftrightarrow':   ('\u2194', -0.2),
+    'overline':             ('_',      -0.2),  # \u2012 does not stretch
+    'overrightarrow':       ('\u2192', -0.2),
+    'widehat':              ('^',      -0.5),
+    'widetilde':            ('~',      -0.3),
+}
+under = {'underbrace':          ('\u23DF',  0.1),  # DejaVu Math -0.7
+         'underleftarrow':      ('\u2190', -0.2),
+         'underleftrightarrow': ('\u2194', -0.2),
+         'underline':           ('_',      -0.8),
+         'underrightarrow':     ('\u2192', -0.2),
+         }
+
+# Character translations
+# ----------------------
+# characters with preferred alternative in mathematical use
+# cf. https://www.w3.org/TR/MathML3/chapter7.html#chars.anomalous
+anomalous_chars = {'-': '\u2212',  # HYPHEN-MINUS -> MINUS SIGN
+                   ':': '\u2236',  # COLON -> RATIO
+                   '~': '\u00a0',  # NO-BREAK SPACE
+                   }
+
+# blackboard bold (Greek characters not working with "mathvariant" (Firefox 78)
+mathbb = {'Γ': '\u213E',    # ℾ
+          'Π': '\u213F',    # ℿ
+          'Σ': '\u2140',    # ⅀
+          'γ': '\u213D',    # ℽ
+          'π': '\u213C',    # ℼ
+          }
+
+# Matrix environments
+matrices = {
+    # name:    fences
+    'matrix':  ('', ''),
+    'smallmatrix':  ('', ''),  # smaller, see begin_environment()!
+    'pmatrix': ('(', ')'),
+    'bmatrix': ('[', ']'),
+    'Bmatrix': ('{', '}'),
+    'vmatrix': ('|', '|'),
+    'Vmatrix': ('\u2016', '\u2016'),  # ‖
+    'aligned': ('', ''),
+    'cases':   ('{', ''),
+}
+
+layout_styles = {
+    'displaystyle':      {'displaystyle': True,  'scriptlevel': 0},
+    'textstyle':         {'displaystyle': False, 'scriptlevel': 0},
+    'scriptstyle':       {'displaystyle': False, 'scriptlevel': 1},
+    'scriptscriptstyle': {'displaystyle': False, 'scriptlevel': 2},
+    }
+# See also https://www.w3.org/TR/MathML3/chapter3.html#presm.scriptlevel
+
+fractions = {
+    # name:   attributes
+    'frac':   {},
+    'cfrac':  {'displaystyle': True,  'scriptlevel': 0,
+               'class': 'cfrac'},  # in LaTeX with padding
+    'dfrac':  layout_styles['displaystyle'],
+    'tfrac':  layout_styles['textstyle'],
+    'binom':  {'linethickness': 0},
+    'dbinom': layout_styles['displaystyle'] | {'linethickness': 0},
+    'tbinom': layout_styles['textstyle'] | {'linethickness': 0},
+}
+
+delimiter_sizes = ['', '1.2em', '1.623em', '2.047em', '2.470em']
+bigdelimiters = {'left':  0,
+                 'right': 0,
+                 'bigl':  1,
+                 'bigr':  1,
+                 'Bigl':  2,
+                 'Bigr':  2,
+                 'biggl': 3,
+                 'biggr': 3,
+                 'Biggl': 4,
+                 'Biggr': 4,
+                 }
+
+
+# LaTeX to MathML translation
+# ---------------------------
+
+# auxiliary functions
+# ~~~~~~~~~~~~~~~~~~~
+
+def tex_cmdname(string):
+    """Return leading TeX command name and remainder of `string`.
+
+    >>> tex_cmdname('mymacro2') # up to first non-letter
+    ('mymacro', '2')
+    >>> tex_cmdname('name 2') # strip trailing whitespace
+    ('name', '2')
+    >>> tex_cmdname('_2') # single non-letter character
+    ('_', '2')
+
+    """
+    m = re.match(r'([a-zA-Z]+)[ \n]*(.*)', string, re.DOTALL)
+    if m is None:
+        m = re.match(r'(.?)(.*)', string, re.DOTALL)
+    return m.group(1), m.group(2)
+
+
+# Test:
+#
+# >>> tex_cmdname('name\nnext') # strip trailing whitespace, also newlines
+# ('name', 'next')
+# >>> tex_cmdname('name_2') # first non-letter terminates
+# ('name', '_2')
+# >>> tex_cmdname('name_2\nnext line') # line-break allowed
+# ('name', '_2\nnext line')
+# >>> tex_cmdname(' next') # leading whitespace is returned
+# (' ', 'next')
+# >>> tex_cmdname('1 2') # whitespace after non-letter is kept
+# ('1', ' 2')
+# >>> tex_cmdname('1\n2\t3') # whitespace after non-letter is kept
+# ('1', '\n2\t3')
+# >>> tex_cmdname('') # empty string
+# ('', '')
+
+
+def tex_number(string):
+    """Return leading number literal and remainder of `string`.
+
+    >>> tex_number('123.4')
+    ('123.4', '')
+
+    """
+    m = re.match(r'([0-9.,]*[0-9]+)(.*)', string, re.DOTALL)
+    if m is None:
+        return '', string
+    return m.group(1), m.group(2)
+
+
+# Test:
+#
+# >>> tex_number(' 23.4b') # leading whitespace -> no number
+# ('', ' 23.4b')
+# >>> tex_number('23,400/2') # comma separator included
+# ('23,400', '/2')
+# >>> tex_number('23. 4/2') # trailing separator not included
+# ('23', '. 4/2')
+# >>> tex_number('4, 2') # trailing separator not included
+# ('4', ', 2')
+# >>> tex_number('1 000.4')
+# ('1', ' 000.4')
+
+
+def tex_token(string):
+    """Return first simple TeX token and remainder of `string`.
+
+    >>> tex_token('\\command{without argument}')
+    ('\\command', '{without argument}')
+    >>> tex_token('or first character')
+    ('o', 'r first character')
+
+    """
+    m = re.match(r"""((?P<cmd>\\[a-zA-Z]+)\s* # TeX command, skip whitespace
+                      |(?P<chcmd>\\.)          # one-character TeX command
+                      |(?P<ch>.?))            # first character (or empty)
+                     (?P<remainder>.*$)    # remaining part of string
+                 """, string, re.VERBOSE | re.DOTALL)
+    cmd, chcmd, ch, remainder = m.group('cmd', 'chcmd', 'ch', 'remainder')
+    return cmd or chcmd or ch, remainder
+
+# Test:
+#
+# >>> tex_token('{opening bracket of group}')
+# ('{', 'opening bracket of group}')
+# >>> tex_token('\\skip whitespace after macro name')
+# ('\\skip', 'whitespace after macro name')
+# >>> tex_token('. but not after single char')
+# ('.', ' but not after single char')
+# >>> tex_token('') # empty string.
+# ('', '')
+# >>> tex_token('\{escaped bracket')
+# ('\\{', 'escaped bracket')
+
+
+def tex_group(string):
+    """Return first TeX group or token and remainder of `string`.
+
+    >>> tex_group('{first group} returned without brackets')
+    ('first group', ' returned without brackets')
+
+    """
+    split_index = 0
+    nest_level = 0   # level of {{nested} groups}
+    escape = False   # the next character is escaped (\)
+
+    if not string.startswith('{'):
+        # special case: there is no group, return first token and remainder
+        return string[:1], string[1:]
+    for c in string:
+        split_index += 1
+        if escape:
+            escape = False
+        elif c == '\\':
+            escape = True
+        elif c == '{':
+            nest_level += 1
+        elif c == '}':
+            nest_level -= 1
+        if nest_level == 0:
+            break
+    else:
+        raise MathError('Group without closing bracket!')
+    return string[1:split_index-1], string[split_index:]
+
+
+# >>> tex_group('{} empty group')
+# ('', ' empty group')
+# >>> tex_group('{group with {nested} group} ')
+# ('group with {nested} group', ' ')
+# >>> tex_group('{group with {nested group}} at the end')
+# ('group with {nested group}', ' at the end')
+# >>> tex_group('{{group} {with {{complex }nesting}} constructs}')
+# ('{group} {with {{complex }nesting}} constructs', '')
+# >>> tex_group('{group with \\{escaped\\} brackets}')
+# ('group with \\{escaped\\} brackets', '')
+# >>> tex_group('{group followed by closing bracket}} from outer group')
+# ('group followed by closing bracket', '} from outer group')
+# >>> tex_group('No group? Return first character.')
+# ('N', 'o group? Return first character.')
+# >>> tex_group(' {also whitespace}')
+# (' ', '{also whitespace}')
+
+
+def tex_token_or_group(string):
+    """Return first TeX group or token and remainder of `string`.
+
+    >>> tex_token_or_group('\\command{without argument}')
+    ('\\command', '{without argument}')
+    >>> tex_token_or_group('first character')
+    ('f', 'irst character')
+    >>> tex_token_or_group(' also whitespace')
+    (' ', 'also whitespace')
+    >>> tex_token_or_group('{first group} keep rest')
+    ('first group', ' keep rest')
+
+    """
+    arg, remainder = tex_token(string)
+    if arg == '{':
+        arg, remainder = tex_group(string.lstrip())
+    return arg, remainder
+
+# >>> tex_token_or_group('\{no group but left bracket')
+# ('\\{', 'no group but left bracket')
+
+
+def tex_optarg(string):
+    """Return optional argument and remainder.
+
+    >>> tex_optarg('[optional argument] returned without brackets')
+    ('optional argument', ' returned without brackets')
+    >>> tex_optarg('{empty string, if there is no optional arg}')
+    ('', '{empty string, if there is no optional arg}')
+
+    """
+    m = re.match(r"""\s*                            # leading whitespace
+                 \[(?P<optarg>(\\]|[^\[\]]|\\])*)\] # [group] without nested groups
+                 (?P<remainder>.*$)
+                 """, string, re.VERBOSE | re.DOTALL)
+    if m is None and not string.startswith('['):
+        return '', string
+    try:
+        return m.group('optarg'), m.group('remainder')
+    except AttributeError:
+        raise MathError(f'Could not extract optional argument from "{string}"!')
+
+# Test:
+# >>> tex_optarg(' [optional argument] after whitespace')
+# ('optional argument', ' after whitespace')
+# >>> tex_optarg('[missing right bracket')
+# Traceback (most recent call last):
+#     ...
+# docutils.utils.math.MathError: Could not extract optional argument from "[missing right bracket"!
+# >>> tex_optarg('[group with [nested group]]')
+# Traceback (most recent call last):
+#     ...
+# docutils.utils.math.MathError: Could not extract optional argument from "[group with [nested group]]"!
+
+
+def parse_latex_math(root, source):
+    """Append MathML conversion of `string` to `node` and return it.
+
+    >>> parse_latex_math(math(), r'\alpha')
+    math(mi('α'))
+    >>> parse_latex_math(mrow(), r'x_{n}')
+    mrow(msub(mi('x'), mi('n')))
+
+    """
+    # Normalize white-space:
+    string = source  # not-yet handled part of source
+    node = root  # the current "insertion point"
+
+    # Loop over `string` while changing it.
+    while len(string) > 0:
+        # Take off first character:
+        c, string = string[0], string[1:]
+
+        if c in ' \n':
+            continue  # whitespace is ignored in LaTeX math mode
+        if c == '\\':  # start of a LaTeX macro
+            cmdname, string = tex_cmdname(string)
+            node, string = handle_cmd(cmdname, node, string)
+        elif c in "_^":
+            node = handle_script_or_limit(node, c)
+        elif c == '{':
+            if isinstance(node, MathRow) and node.nchildren == 1:
+                # LaTeX takes one arg, MathML node accepts a group
+                node.nchildren = None  # allow appending until closed by '}'
+            else:  # wrap group in an <mrow>
+                new_node = mrow()
+                node.append(new_node)
+                node = new_node
+        elif c == '}':
+            node = node.close()
+        elif c == '&':
+            new_node = mtd()
+            node.close().append(new_node)
+            node = new_node
+        elif c.isalpha():
+            node = node.append(mi(c))
+        elif c.isdigit():
+            number, string = tex_number(string)
+            node = node.append(mn(c+number))
+        elif c in anomalous_chars:
+            # characters with a special meaning in LaTeX math mode
+            # fix spacing before "unary" minus.
+            attributes = {}
+            if c == '-' and len(node):
+                previous_node = node[-1]
+                if (previous_node.text and previous_node.text in '([='
+                    or previous_node.get('class') == 'mathopen'):
+                    attributes['form'] = 'prefix'
+            node = node.append(mo(anomalous_chars[c], **attributes))
+        elif c in "/()[]|":
+            node = node.append(mo(c, stretchy=False))
+        elif c in "+*=<>,.!?`';@":
+            node = node.append(mo(c))
+        else:
+            raise MathError(f'Unsupported character: "{c}"!')
+            # TODO: append as <mi>?
+        if node is None:
+            if not string:
+                return root  # ignore unbalanced braces
+            raise MathError(f'No insertion point for "{string}". '
+                            f'Unbalanced braces in "{source[:-len(string)]}"?')
+    if node.nchildren and len(node) < node.nchildren:
+        raise MathError('Last node missing children. Source incomplete?')
+    return root
+
+# Test:
+
+# >>> parse_latex_math(math(), '')
+# math()
+# >>> parse_latex_math(math(), ' \\sqrt{ \\alpha}')
+# math(msqrt(mi('α')))
+# >>> parse_latex_math(math(), '23.4x')
+# math(mn('23.4'), mi('x'))
+# >>> parse_latex_math(math(), '\\sqrt 2 \\ne 3')
+# math(msqrt(mn('2')), mo('≠'), mn('3'))
+# >>> parse_latex_math(math(), '\\sqrt{2 + 3} < 10')
+# math(msqrt(mn('2'), mo('+'), mn('3'), nchildren=3), mo('<'), mn('10'))
+# >>> parse_latex_math(math(), '\\sqrt[3]{2 + 3}')
+# math(mroot(mrow(mn('2'), mo('+'), mn('3'), nchildren=3), mn('3')))
+# >>> parse_latex_math(math(), '\max_x') # function takes limits
+# math(munder(mo('max', movablelimits='true'), mi('x')))
+# >>> parse_latex_math(math(), 'x^j_i') # ensure correct order: base, sub, sup
+# math(msubsup(mi('x'), mi('i'), mi('j')))
+# >>> parse_latex_math(math(), '\int^j_i') # ensure correct order
+# math(msubsup(mo('∫'), mi('i'), mi('j')))
+# >>> parse_latex_math(math(), 'x_{\\alpha}')
+# math(msub(mi('x'), mi('α')))
+# >>> parse_latex_math(math(), 'x_\\text{in}')
+# math(msub(mi('x'), mtext('in')))
+# >>> parse_latex_math(math(), '2⌘')
+# Traceback (most recent call last):
+# docutils.utils.math.MathError: Unsupported character: "⌘"!
+# >>> parse_latex_math(math(), '23}x')  # doctest: +ELLIPSIS
+# Traceback (most recent call last):
+# ...
+# docutils.utils.math.MathError: ... Unbalanced braces in "23}"?
+# >>> parse_latex_math(math(), '\\frac{2}')
+# Traceback (most recent call last):
+# ...
+# docutils.utils.math.MathError: Last node missing children. Source incomplete?
+
+
+def handle_cmd(name, node, string):  # noqa: C901 TODO make this less complex
+    """Process LaTeX command `name` followed by `string`.
+
+    Append result to `node`.
+    If needed, parse `string` for command argument.
+    Return new current node and remainder of `string`:
+
+    >>> handle_cmd('hbar', math(), r' \frac')
+    (math(mi('ℏ')), ' \\frac')
+    >>> handle_cmd('hspace', math(), r'{1ex} (x)')
+    (math(mspace(width='1ex')), ' (x)')
+
+    """
+
+    # Token elements
+    # ==============
+
+    # identifier  ->  <mi>
+
+    if name in letters:
+        new_node = mi(letters[name])
+        if name in greek_capitals:
+            # upright in "TeX style" but MathML sets them italic ("ISO style").
+            # CSS styling does not change the font style in Firefox 78.
+            # Use 'mathvariant="normal"'?
+            new_node.set('class', 'capital-greek')
+        node = node.append(new_node)
+        return node, string
+
+    if name in ordinary:
+        # <mi mathvariant="normal"> well supported by Chromium but
+        # Firefox 115.5.0 puts additional space around the symbol, e.g.
+        # <mi mathvariant="normal">∂</mi><mi>t</mi> looks like ∂ t, not ∂t
+        # return node.append(mi(ordinary[name], mathvariant='normal')), string
+        return node.append(mi(ordinary[name])), string
+
+    if name in functions:
+        # use <mi> followed by invisible function applicator character
+        # (see https://www.w3.org/TR/MathML3/chapter3.html#presm.mi)
+        if name == 'operatorname':
+            # custom function name, e.g. ``\operatorname{abs}(x)``
+            # TODO: \operatorname* -> with limits
+            arg, string = tex_token_or_group(string)
+            new_node = mi(arg, mathvariant='normal')
+        else:
+            new_node = mi(functions[name])
+        # embellished function names:
+        if name == 'varliminf':    # \underline\lim
+            new_node = munder(new_node, mo('_'))
+        elif name == 'varlimsup':  # \overline\lim
+            new_node = mover(new_node, mo('¯'), accent=False)
+        elif name == 'varprojlim':  # \underleftarrow\lim
+            new_node = munder(new_node, mo('\u2190'))
+        elif name == 'varinjlim':  # \underrightarrow\lim
+            new_node = munder(new_node, mo('\u2192'))
+
+        node = node.append(new_node)
+        # add ApplyFunction when appropriate (not \sin^2(x), say)
+        # cf. https://www.w3.org/TR/MathML3/chapter3.html#presm.mi
+        if string and string[0] not in ('^', '_'):
+            node = node.append(mo('\u2061'))  # &ApplyFunction;
+        return node, string
+
+    if name in modulo_functions:
+        (binary, named, parentheses, padding) = modulo_functions[name]
+        if binary:
+            node = node.append(mo('mod', lspace=padding, rspace=padding))
+            return node, string
+        # left padding
+        if node.in_block():
+            padding = '1em'
+        node = node.append(mspace(width=padding))
+        if parentheses:
+            node = node.append(mo('(', stretchy=False))
+        if named:
+            node = node.append(mi('mod'))
+            node = node.append(mspace(width='0.333em'))
+        arg, string = tex_token_or_group(string)
+        node = parse_latex_math(node, arg)
+        if parentheses:
+            node = node.append(mo(')', stretchy=False))
+        return node, string
+
+    # font changes or mathematical alphanumeric characters
+
+    if name in ('boldsymbol', 'pmb'):  # \pmb is "poor mans bold"
+        new_node = mrow(CLASS='boldsymbol')
+        node.append(new_node)
+        return new_node, string
+
+    if name in math_alphabets:
+        return handle_math_alphabet(name, node, string)
+
+    # operator, fence, or separator  ->  <mo>
+
+    if name == 'colon':  # trailing punctuation, not binary relation
+        node = node.append(mo(':', form='postfix', lspace='0', rspace='0.28em'))
+        return node, string
+
+    if name == 'idotsint':  # AMS shortcut for ∫︀···∫︀
+        node = parse_latex_math(node, r'\int\dotsi\int')
+        return node, string
+
+    if name in thick_operators:
+        node = node.append(mo(thick_operators[name], style='font-weight: bold'))
+        return node, string
+
+    if name in small_operators:
+        node = node.append(mo(small_operators[name], mathsize='75%'))
+        return node, string
+
+    if name in operators:
+        attributes = {}
+        if name in movablelimits and string and string[0] in ' _^':
+            attributes['movablelimits'] = True
+        elif name in ('lvert', 'lVert'):
+            attributes['class'] = 'mathopen'
+        node = node.append(mo(operators[name], **attributes))
+        return node, string
+
+    if name in bigdelimiters:
+        delimiter_attributes = {}
+        size = delimiter_sizes[bigdelimiters[name]]
+        delimiter, string = tex_token_or_group(string)
+        if delimiter not in '()[]/|.':
+            try:
+                delimiter = stretchables[delimiter.lstrip('\\')]
+            except KeyError:
+                raise MathError(f'Unsupported "\\{name}" delimiter '
+                                f'"{delimiter}"!')
+        if size:
+            delimiter_attributes['maxsize'] = size
+            delimiter_attributes['minsize'] = size
+            delimiter_attributes['symmetric'] = True
+        if name == 'left' or name.endswith('l'):
+            row = mrow()
+            node.append(row)
+            node = row
+        if delimiter != '.':  # '.' stands for "empty delimiter"
+            node.append(mo(delimiter, **delimiter_attributes))
+        if name == 'right' or name.endswith('r'):
+            node = node.close()
+        return node, string
+
+    if name == 'not':
+        # negation: LaTeX just overlays next symbol with "/".
+        arg, string = tex_token(string)
+        if arg == '{':
+            return node, '{\\not ' + string
+        if arg.startswith('\\'):  # LaTeX macro
+            try:
+                arg = operators[arg[1:]]
+            except KeyError:
+                raise MathError(rf'"\not" cannot negate: "{arg}"!')
+        arg = unicodedata.normalize('NFC', arg+'\u0338')
+        node = node.append(mo(arg))
+        return node, string
+
+    # arbitrary text (usually comments)  ->  <mtext>
+    if name in ('text', 'mbox', 'textrm'):
+        arg, string = tex_token_or_group(string)
+        parts = arg.split('$')  # extract inline math
+        for i, part in enumerate(parts):
+            if i % 2 == 0:  # i is even
+                # LaTeX keeps whitespace in, e.g., ``\text{ foo }``,
+                # <mtext> displays only internal whitespace.
+                # → replace marginal whitespace with NBSP
+                part = re.sub('(^[ \n]|[ \n]$)', '\u00a0', part)
+                node = node.append(mtext(part))
+            else:
+                parse_latex_math(node, part)
+        return node, string
+
+    # horizontal space -> <mspace>
+    if name in spaces:
+        node = node.append(mspace(width='%s'%spaces[name]))
+        return node, string
+
+    if name in ('hspace', 'mspace'):
+        arg, string = tex_group(string)
+        if arg.endswith('mu'):
+            # unit "mu" (1mu=1/18em) not supported by MathML
+            arg = '%sem' % (float(arg[:-2])/18)
+        node = node.append(mspace(width='%s'%arg))
+        return node, string
+
+    if name == 'phantom':
+        new_node = mphantom()
+        node.append(new_node)
+        return new_node, string
+
+    if name == 'boxed':
+        # CSS padding is broken in Firefox 115.6.0esr
+        # therefore we still need the deprecated <menclose> element
+        new_node = menclose(notation='box', CLASS='boxed')
+        node.append(new_node)
+        return new_node, string
+
+    # Complex elements (Layout schemata)
+    # ==================================
+
+    if name == 'sqrt':
+        radix, string = tex_optarg(string)
+        if radix:
+            indexnode = mrow()
+            new_node = mroot(indexnode, switch=True)
+            parse_latex_math(indexnode, radix)
+            indexnode.close()
+        else:
+            new_node = msqrt()
+        node.append(new_node)
+        return new_node, string
+
+    if name in fractions:
+        attributes = fractions[name]
+        if name == 'cfrac':
+            optarg, string = tex_optarg(string)
+            optargs = {'l': 'left', 'r': 'right'}
+            if optarg in optargs:
+                attributes = attributes.copy()
+                attributes['numalign'] = optargs[optarg]  # "numalign" is deprecated
+                attributes['class'] += ' numalign-' + optargs[optarg]
+        new_node = frac = mfrac(**attributes)
+        if name.endswith('binom'):
+            new_node = mrow(mo('('), new_node, mo(')'), CLASS='binom')
+            new_node.nchildren = 3
+        node.append(new_node)
+        return frac, string
+
+    if name == '\\':  # end of a row
+        entry = mtd()
+        new_node = mtr(entry)
+        node.close().close().append(new_node)
+        return entry, string
+
+    if name in accents:
+        accent_node = mo(accents[name], stretchy=False)
+        # mi() would be simpler, but semantically wrong
+        # --- https://w3c.github.io/mathml-core/#operator-fence-separator-or-accent-mo
+        if name == 'vec':
+            accent_node.set('scriptlevel', '+1')  # scale down arrow
+        new_node = mover(accent_node, accent=True, switch=True)
+        node.append(new_node)
+        return new_node, string
+
+    if name in over:
+        # set "accent" to False (otherwise dots on i and j are dropped)
+        # but to True on accent node get "textstyle" (full size) symbols on top
+        new_node = mover(mo(over[name][0], accent=True),
+                         switch=True, accent=False)
+        node.append(new_node)
+        return new_node, string
+
+    if name == 'overset':
+        new_node = mover(switch=True)
+        node.append(new_node)
+        return new_node, string
+
+    if name in under:
+        new_node = munder(mo(under[name][0]), switch=True)
+        node.append(new_node)
+        return new_node, string
+
+    if name == 'underset':
+        new_node = munder(switch=True)
+        node.append(new_node)
+        return new_node, string
+
+    if name in ('xleftarrow', 'xrightarrow'):
+        subscript, string = tex_optarg(string)
+        base = mo(operators['long'+name[1:]])
+        if subscript:
+            new_node = munderover(base)
+            sub_node = parse_latex_math(mrow(), subscript)
+            if len(sub_node) == 1:
+                sub_node = sub_node[0]
+            new_node.append(sub_node)
+        else:
+            new_node = mover(base)
+        node.append(new_node)
+        return new_node, string
+
+    if name in layout_styles:  # 'displaystyle', 'textstyle', ...
+        if len(node) > 0:
+            raise MathError(rf'Declaration "\{name}" must be first command '
+                            'in a group!')
+        for k, v in layout_styles[name].items():
+            node.set(k, v)
+        return node, string
+
+    if name.endswith('limits'):
+        arg, remainder = tex_token(string)
+        if arg in '_^':  # else ignore
+            string = remainder
+            node = handle_script_or_limit(node, arg, limits=name)
+        return node, string
+
+    # Environments
+
+    if name == 'begin':
+        return begin_environment(node, string)
+
+    if name == 'end':
+        return end_environment(node, string)
+
+    raise MathError(rf'Unknown LaTeX command "\{name}".')
+
+# >>> handle_cmd('left', math(), '[a\\right]')
+# (mrow(mo('[')), 'a\\right]')
+# >>> handle_cmd('left', math(), '. a)') # empty \left
+# (mrow(), ' a)')
+# >>> handle_cmd('left', math(), '\\uparrow a)') # cmd
+# (mrow(mo('↑')), 'a)')
+# >>> handle_cmd('not', math(), '\\equiv \\alpha)') # cmd
+# (math(mo('≢')), '\\alpha)')
+# >>> handle_cmd('text', math(), '{ for } i>0') # group
+# (math(mtext('\xa0for\xa0')), ' i>0')
+# >>> handle_cmd('text', math(), '{B}T') # group
+# (math(mtext('B')), 'T')
+# >>> handle_cmd('text', math(), '{number of apples}}') # group
+# (math(mtext('number of apples')), '}')
+# >>> handle_cmd('text', math(), 'i \\sin(x)') # single char
+# (math(mtext('i')), ' \\sin(x)')
+# >>> handle_cmd(' ', math(), '  next') # inter word space
+# (math(mspace(width='0.25em')), '  next')
+# >>> handle_cmd('\n', math(), '\nnext') # inter word space
+# (math(mspace(width='0.25em')), '\nnext')
+# >>> handle_cmd('sin', math(), '(\\alpha)')
+# (math(mi('sin'), mo('\u2061')), '(\\alpha)')
+# >>> handle_cmd('sin', math(), ' \\alpha')
+# (math(mi('sin'), mo('\u2061')), ' \\alpha')
+# >>> handle_cmd('operatorname', math(), '{abs}(x)')
+# (math(mi('abs', mathvariant='normal'), mo('\u2061')), '(x)')
+# >>> handle_cmd('overline', math(), '{981}')
+# (mover(mo('_', accent='true'), switch=True, accent='false'), '{981}')
+# >>> handle_cmd('bar', math(), '{x}')
+# (mover(mo('ˉ', stretchy='false'), switch=True, accent='true'), '{x}')
+# >>> handle_cmd('xleftarrow', math(), r'[\alpha]{10}')
+# (munderover(mo('⟵'), mi('α')), '{10}')
+# >>> handle_cmd('xleftarrow', math(), r'[\alpha=5]{10}')
+# (munderover(mo('⟵'), mrow(mi('α'), mo('='), mn('5'))), '{10}')
+# >>> handle_cmd('left', math(), '< a)')
+# Traceback (most recent call last):
+# docutils.utils.math.MathError: Unsupported "\left" delimiter "<"!
+# >>> handle_cmd('not', math(), '{< b} c') #  LaTeX ignores the braces, too.
+# (math(), '{\\not < b} c')
+
+
+def handle_math_alphabet(name, node, string):
+    attributes = {}
+    if name == 'mathscr':
+        attributes['class'] = 'mathscr'
+    arg, string = tex_token_or_group(string)
+    # Shortcut for text arg like \mathrm{out} with more than one letter:
+    if name == 'mathrm' and arg.isalpha() and len(arg) > 1:
+        node = node.append(mi(arg))  # <mi> defaults to "normal" font
+        return node, string
+    # Parse into an <mrow>
+    container = mrow(**attributes)
+    node.append(container)
+    parse_latex_math(container, arg)
+    key = name.replace('mathscr', 'mathcal').replace('mathbfsfit', 'mathsfbfit')
+    a2ch = getattr(mathalphabet2unichar, key, {})
+    for subnode in container.iter():
+        if isinstance(subnode, mn):
+            # a number may consist of more than one digit
+            subnode.text = ''.join(a2ch.get(ch, ch) for ch in subnode.text)
+        elif isinstance(subnode, mi):
+            # don't convert multi-letter identifiers (functions)
+            subnode.text = a2ch.get(subnode.text, subnode.text)
+            if name == 'mathrm' and subnode.text.isalpha():
+                subnode.set('mathvariant', 'normal')
+    return container.close(), string
+
+# >>> handle_math_alphabet('mathrm', math(), '\\alpha')
+# (math(mi('α', mathvariant='normal')), '')
+# >>> handle_math_alphabet('mathbb', math(), '{R} = 3')
+# (math(mi('ℝ')), ' = 3')
+# >>> handle_math_alphabet('mathcal', math(), '{F = 3}')
+# (math(mrow(mi('ℱ'), mo('='), mn('3'), nchildren=3)), '')
+# >>> handle_math_alphabet('mathrm', math(), '{out} = 3')  # drop <mrow>
+# (math(mi('out')), ' = 3')
+#
+# Single letters in \mathrm require "mathvariant='normal'":
+# >>> handle_math_alphabet('mathrm', math(), '{V = 3}')  # doctest: +ELLIPSIS
+# (math(mrow(mi('V', mathvariant='normal'), mo('='), mn('3'), ...)), '')
+
+
+def handle_script_or_limit(node, c, limits=''):
+    """Append script or limit element to `node`."""
+    child = node.pop()
+    if limits == 'limits':
+        child.set('movablelimits', 'false')
+    elif (limits == 'movablelimits'
+          or getattr(child, 'text', '') in movablelimits):
+        child.set('movablelimits', 'true')
+
+    if c == '_':
+        if isinstance(child, mover):
+            new_node = munderover(*child, switch=True)
+        elif isinstance(child, msup):
+            new_node = msubsup(*child, switch=True)
+        elif (limits in ('limits', 'movablelimits')
+              or limits == '' and child.get('movablelimits', None)):
+            new_node = munder(child)
+        else:
+            new_node = msub(child)
+    elif c == '^':
+        if isinstance(child, munder):
+            new_node = munderover(*child)
+        elif isinstance(child, msub):
+            new_node = msubsup(*child)
+        elif (limits in ('limits', 'movablelimits')
+              or limits == '' and child.get('movablelimits', None)):
+            new_node = mover(child)
+        else:
+            new_node = msup(child)
+    node.append(new_node)
+    return new_node
+
+
+def begin_environment(node, string):
+    name, string = tex_group(string)
+    if name in matrices:
+        left_delimiter = matrices[name][0]
+        attributes = {}
+        if left_delimiter:
+            wrapper = mrow(mo(left_delimiter))
+            if name == 'cases':
+                wrapper = mrow(mo(left_delimiter, rspace='0.17em'))
+                attributes['columnalign'] = 'left'
+                attributes['class'] = 'cases'
+            node.append(wrapper)
+            node = wrapper
+        elif name == 'smallmatrix':
+            attributes['rowspacing'] = '0.02em'
+            attributes['columnspacing'] = '0.333em'
+            attributes['scriptlevel'] = '1'
+        elif name == 'aligned':
+            attributes['class'] = 'ams-align'
+        # TODO: array, aligned & alignedat take an optional [t], [b], or [c].
+        entry = mtd()
+        node.append(mtable(mtr(entry), **attributes))
+        node = entry
+    else:
+        raise MathError(f'Environment "{name}" not supported!')
+    return node, string
+
+
+def end_environment(node, string):
+    name, string = tex_group(string)
+    if name in matrices:
+        node = node.close().close().close()  # close: mtd, mdr, mtable
+        right_delimiter = matrices[name][1]
+        if right_delimiter:
+            node = node.append(mo(right_delimiter))
+            node = node.close()
+        elif name == 'cases':
+            node = node.close()
+    else:
+        raise MathError(f'Environment "{name}" not supported!')
+    return node, string
+
+
+# Return the number of "equation_columns" in `code_lines`. cf. "alignat"
+# in http://mirror.ctan.org/macros/latex/required/amsmath/amsldoc.pdf
+def tex_equation_columns(rows):
+    tabs = max(row.count('&') - row.count(r'\&') for row in rows)
+    if tabs == 0:
+        return 0
+    return int(tabs/2 + 1)
+
+# >>> tex_equation_columns(['a = b'])
+# 0
+# >>> tex_equation_columns(['a &= b'])
+# 1
+# >>> tex_equation_columns(['a &= b & a \in S'])
+# 2
+# >>> tex_equation_columns(['a &= b & c &= d'])
+# 2
+
+
+# Return dictionary with attributes to style an <mtable> as align environment:
+# Not used with HTML. Replaced by CSS rule for "mtable.ams-align" in
+# "minimal.css" as "columnalign" is disregarded by Chromium and webkit.
+def align_attributes(rows):
+    atts = {'class': 'ams-align',
+            'displaystyle': True}
+    # get maximal number of non-escaped "next column" markup characters:
+    tabs = max(row.count('&') - row.count(r'\&') for row in rows)
+    if tabs:
+        aligns = ['right', 'left'] * tabs
+        spacing = ['0', '2em'] * tabs
+        atts['columnalign'] = ' '.join(aligns[:tabs+1])
+        atts['columnspacing'] = ' '.join(spacing[:tabs])
+    return atts
+
+# >>> align_attributes(['a = b'])
+# {'class': 'ams-align', 'displaystyle': True}
+# >>> align_attributes(['a &= b'])
+# {'class': 'ams-align', 'displaystyle': True, 'columnalign': 'right left', 'columnspacing': '0'}
+# >>> align_attributes(['a &= b & a \in S'])
+# {'class': 'ams-align', 'displaystyle': True, 'columnalign': 'right left right', 'columnspacing': '0 2em'}
+# >>> align_attributes(['a &= b & c &= d'])
+# {'class': 'ams-align', 'displaystyle': True, 'columnalign': 'right left right left', 'columnspacing': '0 2em 0'}
+# >>> align_attributes([r'a &= b & c &= d \& e'])
+# {'class': 'ams-align', 'displaystyle': True, 'columnalign': 'right left right left', 'columnspacing': '0 2em 0'}
+# >>> align_attributes([r'a &= b & c &= d & e'])
+# {'class': 'ams-align', 'displaystyle': True, 'columnalign': 'right left right left right', 'columnspacing': '0 2em 0 2em'}
+
+
+def tex2mathml(tex_math, as_block=False):
+    """Return string with MathML code corresponding to `tex_math`.
+
+    Set `as_block` to ``True`` for displayed formulas.
+    """
+    # Set up tree
+    math_tree = math(xmlns='http://www.w3.org/1998/Math/MathML')
+    node = math_tree
+    if as_block:
+        math_tree.set('display', 'block')
+        rows = toplevel_code(tex_math).split(r'\\')
+        if len(rows) > 1:
+            # emulate "align*" environment with a math table
+            node = mtd()
+            math_tree.append(mtable(mtr(node), CLASS='ams-align',
+                                    displaystyle=True))
+    parse_latex_math(node, tex_math)
+    math_tree.indent_xml()
+    return math_tree.toxml()
+
+# >>> print(tex2mathml('3'))
+# <math xmlns="http://www.w3.org/1998/Math/MathML">
+#   <mn>3</mn>
+# </math>
+# >>> print(tex2mathml('3', as_block=True))
+# <math xmlns="http://www.w3.org/1998/Math/MathML" display="block">
+#   <mn>3</mn>
+# </math>
+# >>> print(tex2mathml(r'a & b \\ c & d', as_block=True))
+# <math xmlns="http://www.w3.org/1998/Math/MathML" display="block">
+#   <mtable class="ams-align" displaystyle="true">
+#     <mtr>
+#       <mtd>
+#         <mi>a</mi>
+#       </mtd>
+#       <mtd>
+#         <mi>b</mi>
+#       </mtd>
+#     </mtr>
+#     <mtr>
+#       <mtd>
+#         <mi>c</mi>
+#       </mtd>
+#       <mtd>
+#         <mi>d</mi>
+#       </mtd>
+#     </mtr>
+#   </mtable>
+# </math>
+# >>> print(tex2mathml(r'a \\ b', as_block=True))
+# <math xmlns="http://www.w3.org/1998/Math/MathML" display="block">
+#   <mtable class="ams-align" displaystyle="true">
+#     <mtr>
+#       <mtd>
+#         <mi>a</mi>
+#       </mtd>
+#     </mtr>
+#     <mtr>
+#       <mtd>
+#         <mi>b</mi>
+#       </mtd>
+#     </mtr>
+#   </mtable>
+# </math>
+
+
+# TODO: look up more symbols from tr25, e.g.
+#
+#
+# Table 2.8 Using Vertical Line or Solidus Overlay
+#   some of the negated forms of mathematical relations that can only be
+#   encoded by using either U+0338 COMBINING LONG SOLIDUS OVERLAY or U+20D2
+#   COMBINING LONG VERTICAL LINE OVERLAY . (For issues with using 0338 in
+#   MathML, see Section 3.2.7, Combining Marks.
+#
+# Table 2.9 Variants of Mathematical Symbols using VS1?
+#
+# Sequence      Description
+# 0030 + VS1    DIGIT ZERO - short diagonal stroke form
+# 2205 + VS1    EMPTY SET - zero with long diagonal stroke overlay form
+# 2229 + VS1    INTERSECTION - with serifs
+# 222A + VS1    UNION - with serifs
+# 2268 + VS1    LESS-THAN BUT NOT EQUAL TO - with vertical stroke
+# 2269 + VS1    GREATER-THAN BUT NOT EQUAL TO - with vertical stroke
+# 2272 + VS1    LESS-THAN OR EQUIVALENT TO - following the slant of the lower leg
+# 2273 + VS1    GREATER-THAN OR EQUIVALENT TO - following the slant of the lower leg
+# 228A + VS1    SUBSET OF WITH NOT EQUAL TO - variant with stroke through bottom members
+# 228B + VS1    SUPERSET OF WITH NOT EQUAL TO - variant with stroke through bottom members
+# 2293 + VS1    SQUARE CAP - with serifs
+# 2294 + VS1    SQUARE CUP - with serifs
+# 2295 + VS1    CIRCLED PLUS - with white rim
+# 2297 + VS1    CIRCLED TIMES - with white rim
+# 229C + VS1    CIRCLED EQUALS - equal sign inside and touching the circle
+# 22DA + VS1    LESS-THAN slanted EQUAL TO OR GREATER-THAN
+# 22DB + VS1    GREATER-THAN slanted EQUAL TO OR LESS-THAN
+# 2A3C + VS1    INTERIOR PRODUCT - tall variant with narrow foot
+# 2A3D + VS1    RIGHTHAND INTERIOR PRODUCT - tall variant with narrow foot
+# 2A9D + VS1    SIMILAR OR LESS-THAN - following the slant of the upper leg
+# 2A9E + VS1    SIMILAR OR GREATER-THAN - following the slant of the upper leg
+# 2AAC + VS1    SMALLER THAN OR slanted EQUAL
+# 2AAD + VS1    LARGER THAN OR slanted EQUAL
+# 2ACB + VS1    SUBSET OF ABOVE NOT EQUAL TO - variant with stroke through bottom members
+# 2ACC + VS1    SUPERSET OF ABOVE NOT EQUAL TO - variant with stroke through bottom members