From 4a52a71956a8d46fcb7294ac71734504bb09bcc2 Mon Sep 17 00:00:00 2001 From: S. Solomon Darnell Date: Fri, 28 Mar 2025 21:52:21 -0500 Subject: two version of R2R are here --- .../azure/ai/inference/prompts/_mustache.py | 671 +++++++++++++++++++++ 1 file changed, 671 insertions(+) create mode 100644 .venv/lib/python3.12/site-packages/azure/ai/inference/prompts/_mustache.py (limited to '.venv/lib/python3.12/site-packages/azure/ai/inference/prompts/_mustache.py') diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/prompts/_mustache.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/prompts/_mustache.py new file mode 100644 index 00000000..f7a0c21d --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/prompts/_mustache.py @@ -0,0 +1,671 @@ +# ------------------------------------ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# ------------------------------------ +# pylint: disable=line-too-long,R,consider-using-dict-items,docstring-missing-return,docstring-missing-rtype,docstring-missing-param,global-statement,unused-argument,global-variable-not-assigned,protected-access,logging-fstring-interpolation,deprecated-method +from __future__ import annotations +import logging +from collections.abc import Iterator, Sequence +from types import MappingProxyType +from typing import ( + Any, + Dict, + List, + Literal, + Mapping, + Optional, + Union, + cast, +) +from typing_extensions import TypeAlias + +logger = logging.getLogger(__name__) + + +Scopes: TypeAlias = List[Union[Literal[False, 0], Mapping[str, Any]]] + + +# Globals +_CURRENT_LINE = 1 +_LAST_TAG_LINE = None + + +class ChevronError(SyntaxError): + """Custom exception for Chevron errors.""" + + +# +# Helper functions +# + + +def grab_literal(template: str, l_del: str) -> tuple[str, str]: + """Parse a literal from the template. + + Args: + template: The template to parse. + l_del: The left delimiter. + + Returns: + Tuple[str, str]: The literal and the template. + """ + + global _CURRENT_LINE + + try: + # Look for the next tag and move the template to it + literal, template = template.split(l_del, 1) + _CURRENT_LINE += literal.count("\n") + return (literal, template) + + # There are no more tags in the template? + except ValueError: + # Then the rest of the template is a literal + return (template, "") + + +def l_sa_check(template: str, literal: str, is_standalone: bool) -> bool: + """Do a preliminary check to see if a tag could be a standalone. + + Args: + template: The template. (Not used.) + literal: The literal. + is_standalone: Whether the tag is standalone. + + Returns: + bool: Whether the tag could be a standalone. + """ + + # If there is a newline, or the previous tag was a standalone + if literal.find("\n") != -1 or is_standalone: + padding = literal.split("\n")[-1] + + # If all the characters since the last newline are spaces + # Then the next tag could be a standalone + # Otherwise it can't be + return padding.isspace() or padding == "" + else: + return False + + +def r_sa_check(template: str, tag_type: str, is_standalone: bool) -> bool: + """Do a final check to see if a tag could be a standalone. + + Args: + template: The template. + tag_type: The type of the tag. + is_standalone: Whether the tag is standalone. + + Returns: + bool: Whether the tag could be a standalone. + """ + + # Check right side if we might be a standalone + if is_standalone and tag_type not in ["variable", "no escape"]: + on_newline = template.split("\n", 1) + + # If the stuff to the right of us are spaces we're a standalone + return on_newline[0].isspace() or not on_newline[0] + + # If we're a tag can't be a standalone + else: + return False + + +def parse_tag(template: str, l_del: str, r_del: str) -> tuple[tuple[str, str], str]: + """Parse a tag from a template. + + Args: + template: The template. + l_del: The left delimiter. + r_del: The right delimiter. + + Returns: + Tuple[Tuple[str, str], str]: The tag and the template. + + Raises: + ChevronError: If the tag is unclosed. + ChevronError: If the set delimiter tag is unclosed. + """ + global _CURRENT_LINE + global _LAST_TAG_LINE + + tag_types = { + "!": "comment", + "#": "section", + "^": "inverted section", + "/": "end", + ">": "partial", + "=": "set delimiter?", + "{": "no escape?", + "&": "no escape", + } + + # Get the tag + try: + tag, template = template.split(r_del, 1) + except ValueError as e: + msg = "unclosed tag " f"at line {_CURRENT_LINE}" + raise ChevronError(msg) from e + + # Find the type meaning of the first character + tag_type = tag_types.get(tag[0], "variable") + + # If the type is not a variable + if tag_type != "variable": + # Then that first character is not needed + tag = tag[1:] + + # If we might be a set delimiter tag + if tag_type == "set delimiter?": + # Double check to make sure we are + if tag.endswith("="): + tag_type = "set delimiter" + # Remove the equal sign + tag = tag[:-1] + + # Otherwise we should complain + else: + msg = "unclosed set delimiter tag\n" f"at line {_CURRENT_LINE}" + raise ChevronError(msg) + + elif ( + # If we might be a no html escape tag + tag_type == "no escape?" + # And we have a third curly brace + # (And are using curly braces as delimiters) + and l_del == "{{" + and r_del == "}}" + and template.startswith("}") + ): + # Then we are a no html escape tag + template = template[1:] + tag_type = "no escape" + + # Strip the whitespace off the key and return + return ((tag_type, tag.strip()), template) + + +# +# The main tokenizing function +# + + +def tokenize(template: str, def_ldel: str = "{{", def_rdel: str = "}}") -> Iterator[tuple[str, str]]: + """Tokenize a mustache template. + + Tokenizes a mustache template in a generator fashion, + using file-like objects. It also accepts a string containing + the template. + + + Arguments: + + template -- a file-like object, or a string of a mustache template + + def_ldel -- The default left delimiter + ("{{" by default, as in spec compliant mustache) + + def_rdel -- The default right delimiter + ("}}" by default, as in spec compliant mustache) + + + Returns: + + A generator of mustache tags in the form of a tuple + + -- (tag_type, tag_key) + + Where tag_type is one of: + * literal + * section + * inverted section + * end + * partial + * no escape + + And tag_key is either the key or in the case of a literal tag, + the literal itself. + """ + + global _CURRENT_LINE, _LAST_TAG_LINE + _CURRENT_LINE = 1 + _LAST_TAG_LINE = None + + is_standalone = True + open_sections = [] + l_del = def_ldel + r_del = def_rdel + + while template: + literal, template = grab_literal(template, l_del) + + # If the template is completed + if not template: + # Then yield the literal and leave + yield ("literal", literal) + break + + # Do the first check to see if we could be a standalone + is_standalone = l_sa_check(template, literal, is_standalone) + + # Parse the tag + tag, template = parse_tag(template, l_del, r_del) + tag_type, tag_key = tag + + # Special tag logic + + # If we are a set delimiter tag + if tag_type == "set delimiter": + # Then get and set the delimiters + dels = tag_key.strip().split(" ") + l_del, r_del = dels[0], dels[-1] + + # If we are a section tag + elif tag_type in ["section", "inverted section"]: + # Then open a new section + open_sections.append(tag_key) + _LAST_TAG_LINE = _CURRENT_LINE + + # If we are an end tag + elif tag_type == "end": + # Then check to see if the last opened section + # is the same as us + try: + last_section = open_sections.pop() + except IndexError as e: + msg = f'Trying to close tag "{tag_key}"\n' "Looks like it was not opened.\n" f"line {_CURRENT_LINE + 1}" + raise ChevronError(msg) from e + if tag_key != last_section: + # Otherwise we need to complain + msg = ( + f'Trying to close tag "{tag_key}"\n' + f'last open tag is "{last_section}"\n' + f"line {_CURRENT_LINE + 1}" + ) + raise ChevronError(msg) + + # Do the second check to see if we're a standalone + is_standalone = r_sa_check(template, tag_type, is_standalone) + + # Which if we are + if is_standalone: + # Remove the stuff before the newline + template = template.split("\n", 1)[-1] + + # Partials need to keep the spaces on their left + if tag_type != "partial": + # But other tags don't + literal = literal.rstrip(" ") + + # Start yielding + # Ignore literals that are empty + if literal != "": + yield ("literal", literal) + + # Ignore comments and set delimiters + if tag_type not in ["comment", "set delimiter?"]: + yield (tag_type, tag_key) + + # If there are any open sections when we're done + if open_sections: + # Then we need to complain + msg = ( + "Unexpected EOF\n" + f'the tag "{open_sections[-1]}" was never closed\n' + f"was opened at line {_LAST_TAG_LINE}" + ) + raise ChevronError(msg) + + +# +# Helper functions +# + + +def _html_escape(string: str) -> str: + """HTML escape all of these " & < >""" + + html_codes = { + '"': """, + "<": "<", + ">": ">", + } + + # & must be handled first + string = string.replace("&", "&") + for char in html_codes: + string = string.replace(char, html_codes[char]) + return string + + +def _get_key( + key: str, + scopes: Scopes, + warn: bool, + keep: bool, + def_ldel: str, + def_rdel: str, +) -> Any: + """Get a key from the current scope""" + + # If the key is a dot + if key == ".": + # Then just return the current scope + return scopes[0] + + # Loop through the scopes + for scope in scopes: + try: + # Return an empty string if falsy, with two exceptions + # 0 should return 0, and False should return False + if scope in (0, False): + return scope + + # For every dot separated key + for child in key.split("."): + # Return an empty string if falsy, with two exceptions + # 0 should return 0, and False should return False + if scope in (0, False): + return scope + # Move into the scope + try: + # Try subscripting (Normal dictionaries) + scope = cast(Dict[str, Any], scope)[child] + except (TypeError, AttributeError): + try: + scope = getattr(scope, child) + except (TypeError, AttributeError): + # Try as a list + scope = scope[int(child)] # type: ignore + + try: + # This allows for custom falsy data types + # https://github.com/noahmorrison/chevron/issues/35 + if scope._CHEVRON_return_scope_when_falsy: # type: ignore + return scope + except AttributeError: + if scope in (0, False): + return scope + return scope or "" + except (AttributeError, KeyError, IndexError, ValueError): + # We couldn't find the key in the current scope + # We'll try again on the next pass + pass + + # We couldn't find the key in any of the scopes + + if warn: + logger.warn(f"Could not find key '{key}'") + + if keep: + return f"{def_ldel} {key} {def_rdel}" + + return "" + + +def _get_partial(name: str, partials_dict: Mapping[str, str]) -> str: + """Load a partial""" + try: + # Maybe the partial is in the dictionary + return partials_dict[name] + except KeyError: + return "" + + +# +# The main rendering function +# +g_token_cache: Dict[str, List[tuple[str, str]]] = {} + +EMPTY_DICT: MappingProxyType[str, str] = MappingProxyType({}) + + +def render( + template: Union[str, List[tuple[str, str]]] = "", + data: Mapping[str, Any] = EMPTY_DICT, + partials_dict: Mapping[str, str] = EMPTY_DICT, + padding: str = "", + def_ldel: str = "{{", + def_rdel: str = "}}", + scopes: Optional[Scopes] = None, + warn: bool = False, + keep: bool = False, +) -> str: + """Render a mustache template. + + Renders a mustache template with a data scope and inline partial capability. + + Arguments: + + template -- A file-like object or a string containing the template. + + data -- A python dictionary with your data scope. + + partials_path -- The path to where your partials are stored. + If set to None, then partials won't be loaded from the file system + (defaults to '.'). + + partials_ext -- The extension that you want the parser to look for + (defaults to 'mustache'). + + partials_dict -- A python dictionary which will be search for partials + before the filesystem is. {'include': 'foo'} is the same + as a file called include.mustache + (defaults to {}). + + padding -- This is for padding partials, and shouldn't be used + (but can be if you really want to). + + def_ldel -- The default left delimiter + ("{{" by default, as in spec compliant mustache). + + def_rdel -- The default right delimiter + ("}}" by default, as in spec compliant mustache). + + scopes -- The list of scopes that get_key will look through. + + warn -- Log a warning when a template substitution isn't found in the data + + keep -- Keep unreplaced tags when a substitution isn't found in the data. + + + Returns: + + A string containing the rendered template. + """ + + # If the template is a sequence but not derived from a string + if isinstance(template, Sequence) and not isinstance(template, str): + # Then we don't need to tokenize it + # But it does need to be a generator + tokens: Iterator[tuple[str, str]] = (token for token in template) + else: + if template in g_token_cache: + tokens = (token for token in g_token_cache[template]) + else: + # Otherwise make a generator + tokens = tokenize(template, def_ldel, def_rdel) + + output = "" + + if scopes is None: + scopes = [data] + + # Run through the tokens + for tag, key in tokens: + # Set the current scope + current_scope = scopes[0] + + # If we're an end tag + if tag == "end": + # Pop out of the latest scope + del scopes[0] + + # If the current scope is falsy and not the only scope + elif not current_scope and len(scopes) != 1: + if tag in ["section", "inverted section"]: + # Set the most recent scope to a falsy value + scopes.insert(0, False) + + # If we're a literal tag + elif tag == "literal": + # Add padding to the key and add it to the output + output += key.replace("\n", "\n" + padding) + + # If we're a variable tag + elif tag == "variable": + # Add the html escaped key to the output + thing = _get_key(key, scopes, warn=warn, keep=keep, def_ldel=def_ldel, def_rdel=def_rdel) + if thing is True and key == ".": + # if we've coerced into a boolean by accident + # (inverted tags do this) + # then get the un-coerced object (next in the stack) + thing = scopes[1] + if not isinstance(thing, str): + thing = str(thing) + output += _html_escape(thing) + + # If we're a no html escape tag + elif tag == "no escape": + # Just lookup the key and add it + thing = _get_key(key, scopes, warn=warn, keep=keep, def_ldel=def_ldel, def_rdel=def_rdel) + if not isinstance(thing, str): + thing = str(thing) + output += thing + + # If we're a section tag + elif tag == "section": + # Get the sections scope + scope = _get_key(key, scopes, warn=warn, keep=keep, def_ldel=def_ldel, def_rdel=def_rdel) + + # If the scope is a callable (as described in + # https://mustache.github.io/mustache.5.html) + if callable(scope): + # Generate template text from tags + text = "" + tags: List[tuple[str, str]] = [] + for token in tokens: + if token == ("end", key): + break + + tags.append(token) + tag_type, tag_key = token + if tag_type == "literal": + text += tag_key + elif tag_type == "no escape": + text += f"{def_ldel}& {tag_key} {def_rdel}" + else: + text += "{}{} {}{}".format( + def_ldel, + { + "comment": "!", + "section": "#", + "inverted section": "^", + "end": "/", + "partial": ">", + "set delimiter": "=", + "no escape": "&", + "variable": "", + }[tag_type], + tag_key, + def_rdel, + ) + + g_token_cache[text] = tags + + rend = scope( + text, + lambda template, data=None: render( + template, + data={}, + partials_dict=partials_dict, + padding=padding, + def_ldel=def_ldel, + def_rdel=def_rdel, + scopes=data and [data] + scopes or scopes, + warn=warn, + keep=keep, + ), + ) + + output += rend # type: ignore[reportOperatorIssue] + + # If the scope is a sequence, an iterator or generator but not + # derived from a string + elif isinstance(scope, (Sequence, Iterator)) and not isinstance(scope, str): + # Then we need to do some looping + + # Gather up all the tags inside the section + # (And don't be tricked by nested end tags with the same key) + # TODO: This feels like it still has edge cases, no? + tags = [] + tags_with_same_key = 0 + for token in tokens: + if token == ("section", key): + tags_with_same_key += 1 + if token == ("end", key): + tags_with_same_key -= 1 + if tags_with_same_key < 0: + break + tags.append(token) + + # For every item in the scope + for thing in scope: + # Append it as the most recent scope and render + new_scope = [thing] + scopes + rend = render( + template=tags, + scopes=new_scope, + padding=padding, + partials_dict=partials_dict, + def_ldel=def_ldel, + def_rdel=def_rdel, + warn=warn, + keep=keep, + ) + + output += rend + + else: + # Otherwise we're just a scope section + scopes.insert(0, scope) # type: ignore[reportArgumentType] + + # If we're an inverted section + elif tag == "inverted section": + # Add the flipped scope to the scopes + scope = _get_key(key, scopes, warn=warn, keep=keep, def_ldel=def_ldel, def_rdel=def_rdel) + scopes.insert(0, cast(Literal[False], not scope)) + + # If we're a partial + elif tag == "partial": + # Load the partial + partial = _get_partial(key, partials_dict) + + # Find what to pad the partial with + left = output.rpartition("\n")[2] + part_padding = padding + if left.isspace(): + part_padding += left + + # Render the partial + part_out = render( + template=partial, + partials_dict=partials_dict, + def_ldel=def_ldel, + def_rdel=def_rdel, + padding=part_padding, + scopes=scopes, + warn=warn, + keep=keep, + ) + + # If the partial was indented + if left.isspace(): + # then remove the spaces from the end + part_out = part_out.rstrip(" \t") + + # Add the partials output to the output + output += part_out + + return output -- cgit v1.2.3