about summary refs log tree commit diff
path: root/.venv/lib/python3.12/site-packages/charset_normalizer/cli
diff options
context:
space:
mode:
Diffstat (limited to '.venv/lib/python3.12/site-packages/charset_normalizer/cli')
-rw-r--r--.venv/lib/python3.12/site-packages/charset_normalizer/cli/__init__.py8
-rw-r--r--.venv/lib/python3.12/site-packages/charset_normalizer/cli/__main__.py321
2 files changed, 329 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/charset_normalizer/cli/__init__.py b/.venv/lib/python3.12/site-packages/charset_normalizer/cli/__init__.py
new file mode 100644
index 00000000..543a5a4d
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/charset_normalizer/cli/__init__.py
@@ -0,0 +1,8 @@
+from __future__ import annotations
+
+from .__main__ import cli_detect, query_yes_no
+
+__all__ = (
+    "cli_detect",
+    "query_yes_no",
+)
diff --git a/.venv/lib/python3.12/site-packages/charset_normalizer/cli/__main__.py b/.venv/lib/python3.12/site-packages/charset_normalizer/cli/__main__.py
new file mode 100644
index 00000000..64a290f2
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/charset_normalizer/cli/__main__.py
@@ -0,0 +1,321 @@
+from __future__ import annotations
+
+import argparse
+import sys
+from json import dumps
+from os.path import abspath, basename, dirname, join, realpath
+from platform import python_version
+from unicodedata import unidata_version
+
+import charset_normalizer.md as md_module
+from charset_normalizer import from_fp
+from charset_normalizer.models import CliDetectionResult
+from charset_normalizer.version import __version__
+
+
+def query_yes_no(question: str, default: str = "yes") -> bool:
+    """Ask a yes/no question via input() and return their answer.
+
+    "question" is a string that is presented to the user.
+    "default" is the presumed answer if the user just hits <Enter>.
+        It must be "yes" (the default), "no" or None (meaning
+        an answer is required of the user).
+
+    The "answer" return value is True for "yes" or False for "no".
+
+    Credit goes to (c) https://stackoverflow.com/questions/3041986/apt-command-line-interface-like-yes-no-input
+    """
+    valid = {"yes": True, "y": True, "ye": True, "no": False, "n": False}
+    if default is None:
+        prompt = " [y/n] "
+    elif default == "yes":
+        prompt = " [Y/n] "
+    elif default == "no":
+        prompt = " [y/N] "
+    else:
+        raise ValueError("invalid default answer: '%s'" % default)
+
+    while True:
+        sys.stdout.write(question + prompt)
+        choice = input().lower()
+        if default is not None and choice == "":
+            return valid[default]
+        elif choice in valid:
+            return valid[choice]
+        else:
+            sys.stdout.write("Please respond with 'yes' or 'no' " "(or 'y' or 'n').\n")
+
+
+def cli_detect(argv: list[str] | None = None) -> int:
+    """
+    CLI assistant using ARGV and ArgumentParser
+    :param argv:
+    :return: 0 if everything is fine, anything else equal trouble
+    """
+    parser = argparse.ArgumentParser(
+        description="The Real First Universal Charset Detector. "
+        "Discover originating encoding used on text file. "
+        "Normalize text to unicode."
+    )
+
+    parser.add_argument(
+        "files", type=argparse.FileType("rb"), nargs="+", help="File(s) to be analysed"
+    )
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        action="store_true",
+        default=False,
+        dest="verbose",
+        help="Display complementary information about file if any. "
+        "Stdout will contain logs about the detection process.",
+    )
+    parser.add_argument(
+        "-a",
+        "--with-alternative",
+        action="store_true",
+        default=False,
+        dest="alternatives",
+        help="Output complementary possibilities if any. Top-level JSON WILL be a list.",
+    )
+    parser.add_argument(
+        "-n",
+        "--normalize",
+        action="store_true",
+        default=False,
+        dest="normalize",
+        help="Permit to normalize input file. If not set, program does not write anything.",
+    )
+    parser.add_argument(
+        "-m",
+        "--minimal",
+        action="store_true",
+        default=False,
+        dest="minimal",
+        help="Only output the charset detected to STDOUT. Disabling JSON output.",
+    )
+    parser.add_argument(
+        "-r",
+        "--replace",
+        action="store_true",
+        default=False,
+        dest="replace",
+        help="Replace file when trying to normalize it instead of creating a new one.",
+    )
+    parser.add_argument(
+        "-f",
+        "--force",
+        action="store_true",
+        default=False,
+        dest="force",
+        help="Replace file without asking if you are sure, use this flag with caution.",
+    )
+    parser.add_argument(
+        "-i",
+        "--no-preemptive",
+        action="store_true",
+        default=False,
+        dest="no_preemptive",
+        help="Disable looking at a charset declaration to hint the detector.",
+    )
+    parser.add_argument(
+        "-t",
+        "--threshold",
+        action="store",
+        default=0.2,
+        type=float,
+        dest="threshold",
+        help="Define a custom maximum amount of noise allowed in decoded content. 0. <= noise <= 1.",
+    )
+    parser.add_argument(
+        "--version",
+        action="version",
+        version="Charset-Normalizer {} - Python {} - Unicode {} - SpeedUp {}".format(
+            __version__,
+            python_version(),
+            unidata_version,
+            "OFF" if md_module.__file__.lower().endswith(".py") else "ON",
+        ),
+        help="Show version information and exit.",
+    )
+
+    args = parser.parse_args(argv)
+
+    if args.replace is True and args.normalize is False:
+        if args.files:
+            for my_file in args.files:
+                my_file.close()
+        print("Use --replace in addition of --normalize only.", file=sys.stderr)
+        return 1
+
+    if args.force is True and args.replace is False:
+        if args.files:
+            for my_file in args.files:
+                my_file.close()
+        print("Use --force in addition of --replace only.", file=sys.stderr)
+        return 1
+
+    if args.threshold < 0.0 or args.threshold > 1.0:
+        if args.files:
+            for my_file in args.files:
+                my_file.close()
+        print("--threshold VALUE should be between 0. AND 1.", file=sys.stderr)
+        return 1
+
+    x_ = []
+
+    for my_file in args.files:
+        matches = from_fp(
+            my_file,
+            threshold=args.threshold,
+            explain=args.verbose,
+            preemptive_behaviour=args.no_preemptive is False,
+        )
+
+        best_guess = matches.best()
+
+        if best_guess is None:
+            print(
+                'Unable to identify originating encoding for "{}". {}'.format(
+                    my_file.name,
+                    (
+                        "Maybe try increasing maximum amount of chaos."
+                        if args.threshold < 1.0
+                        else ""
+                    ),
+                ),
+                file=sys.stderr,
+            )
+            x_.append(
+                CliDetectionResult(
+                    abspath(my_file.name),
+                    None,
+                    [],
+                    [],
+                    "Unknown",
+                    [],
+                    False,
+                    1.0,
+                    0.0,
+                    None,
+                    True,
+                )
+            )
+        else:
+            x_.append(
+                CliDetectionResult(
+                    abspath(my_file.name),
+                    best_guess.encoding,
+                    best_guess.encoding_aliases,
+                    [
+                        cp
+                        for cp in best_guess.could_be_from_charset
+                        if cp != best_guess.encoding
+                    ],
+                    best_guess.language,
+                    best_guess.alphabets,
+                    best_guess.bom,
+                    best_guess.percent_chaos,
+                    best_guess.percent_coherence,
+                    None,
+                    True,
+                )
+            )
+
+            if len(matches) > 1 and args.alternatives:
+                for el in matches:
+                    if el != best_guess:
+                        x_.append(
+                            CliDetectionResult(
+                                abspath(my_file.name),
+                                el.encoding,
+                                el.encoding_aliases,
+                                [
+                                    cp
+                                    for cp in el.could_be_from_charset
+                                    if cp != el.encoding
+                                ],
+                                el.language,
+                                el.alphabets,
+                                el.bom,
+                                el.percent_chaos,
+                                el.percent_coherence,
+                                None,
+                                False,
+                            )
+                        )
+
+            if args.normalize is True:
+                if best_guess.encoding.startswith("utf") is True:
+                    print(
+                        '"{}" file does not need to be normalized, as it already came from unicode.'.format(
+                            my_file.name
+                        ),
+                        file=sys.stderr,
+                    )
+                    if my_file.closed is False:
+                        my_file.close()
+                    continue
+
+                dir_path = dirname(realpath(my_file.name))
+                file_name = basename(realpath(my_file.name))
+
+                o_: list[str] = file_name.split(".")
+
+                if args.replace is False:
+                    o_.insert(-1, best_guess.encoding)
+                    if my_file.closed is False:
+                        my_file.close()
+                elif (
+                    args.force is False
+                    and query_yes_no(
+                        'Are you sure to normalize "{}" by replacing it ?'.format(
+                            my_file.name
+                        ),
+                        "no",
+                    )
+                    is False
+                ):
+                    if my_file.closed is False:
+                        my_file.close()
+                    continue
+
+                try:
+                    x_[0].unicode_path = join(dir_path, ".".join(o_))
+
+                    with open(x_[0].unicode_path, "wb") as fp:
+                        fp.write(best_guess.output())
+                except OSError as e:
+                    print(str(e), file=sys.stderr)
+                    if my_file.closed is False:
+                        my_file.close()
+                    return 2
+
+        if my_file.closed is False:
+            my_file.close()
+
+    if args.minimal is False:
+        print(
+            dumps(
+                [el.__dict__ for el in x_] if len(x_) > 1 else x_[0].__dict__,
+                ensure_ascii=True,
+                indent=4,
+            )
+        )
+    else:
+        for my_file in args.files:
+            print(
+                ", ".join(
+                    [
+                        el.encoding or "undefined"
+                        for el in x_
+                        if el.path == abspath(my_file.name)
+                    ]
+                )
+            )
+
+    return 0
+
+
+if __name__ == "__main__":
+    cli_detect()