diff options
| author | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
|---|---|---|
| committer | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
| commit | 4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch) | |
| tree | ee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/bin/runxlrd.py | |
| parent | cc961e04ba734dd72309fb548a2f97d67d578813 (diff) | |
| download | gn-ai-master.tar.gz | |
Diffstat (limited to '.venv/bin/runxlrd.py')
| -rwxr-xr-x | .venv/bin/runxlrd.py | 410 |
1 files changed, 410 insertions, 0 deletions
diff --git a/.venv/bin/runxlrd.py b/.venv/bin/runxlrd.py new file mode 100755 index 00000000..746e04fa --- /dev/null +++ b/.venv/bin/runxlrd.py @@ -0,0 +1,410 @@ +#!/home/shebes/Research/code/gn/gn-ai/.venv/bin/python +# Copyright (c) 2005-2012 Stephen John Machin, Lingfo Pty Ltd +# This script is part of the xlrd package, which is released under a +# BSD-style licence. + +from __future__ import print_function + +cmd_doc = """ +Commands: + +2rows Print the contents of first and last row in each sheet +3rows Print the contents of first, second and last row in each sheet +bench Same as "show", but doesn't print -- for profiling +biff_count[1] Print a count of each type of BIFF record in the file +biff_dump[1] Print a dump (char and hex) of the BIFF records in the file +fonts hdr + print a dump of all font objects +hdr Mini-overview of file (no per-sheet information) +hotshot Do a hotshot profile run e.g. ... -f1 hotshot bench bigfile*.xls +labels Dump of sheet.col_label_ranges and ...row... for each sheet +name_dump Dump of each object in book.name_obj_list +names Print brief information for each NAME record +ov Overview of file +profile Like "hotshot", but uses cProfile +show Print the contents of all rows in each sheet +version[0] Print versions of xlrd and Python and exit +xfc Print "XF counts" and cell-type counts -- see code for details + +[0] means no file arg +[1] means only one file arg i.e. no glob.glob pattern +""" + +options = None +if __name__ == "__main__": + import xlrd + import sys + import time + import glob + import traceback + import gc + + from xlrd.timemachine import xrange, REPR + + + class LogHandler(object): + + def __init__(self, logfileobj): + self.logfileobj = logfileobj + self.fileheading = None + self.shown = 0 + + def setfileheading(self, fileheading): + self.fileheading = fileheading + self.shown = 0 + + def write(self, text): + if self.fileheading and not self.shown: + self.logfileobj.write(self.fileheading) + self.shown = 1 + self.logfileobj.write(text) + + null_cell = xlrd.empty_cell + + def show_row(bk, sh, rowx, colrange, printit): + if bk.ragged_rows: + colrange = range(sh.row_len(rowx)) + if not colrange: return + if printit: print() + if bk.formatting_info: + for colx, ty, val, cxfx in get_row_data(bk, sh, rowx, colrange): + if printit: + print("cell %s%d: type=%d, data: %r, xfx: %s" + % (xlrd.colname(colx), rowx+1, ty, val, cxfx)) + else: + for colx, ty, val, _unused in get_row_data(bk, sh, rowx, colrange): + if printit: + print("cell %s%d: type=%d, data: %r" % (xlrd.colname(colx), rowx+1, ty, val)) + + def get_row_data(bk, sh, rowx, colrange): + result = [] + dmode = bk.datemode + ctys = sh.row_types(rowx) + cvals = sh.row_values(rowx) + for colx in colrange: + cty = ctys[colx] + cval = cvals[colx] + if bk.formatting_info: + cxfx = str(sh.cell_xf_index(rowx, colx)) + else: + cxfx = '' + if cty == xlrd.XL_CELL_DATE: + try: + showval = xlrd.xldate_as_tuple(cval, dmode) + except xlrd.XLDateError as e: + showval = "%s:%s" % (type(e).__name__, e) + cty = xlrd.XL_CELL_ERROR + elif cty == xlrd.XL_CELL_ERROR: + showval = xlrd.error_text_from_code.get(cval, '<Unknown error code 0x%02x>' % cval) + else: + showval = cval + result.append((colx, cty, showval, cxfx)) + return result + + def bk_header(bk): + print() + print("BIFF version: %s; datemode: %s" + % (xlrd.biff_text_from_num[bk.biff_version], bk.datemode)) + print("codepage: %r (encoding: %s); countries: %r" + % (bk.codepage, bk.encoding, bk.countries)) + print("Last saved by: %r" % bk.user_name) + print("Number of data sheets: %d" % bk.nsheets) + print("Use mmap: %d; Formatting: %d; On demand: %d" + % (bk.use_mmap, bk.formatting_info, bk.on_demand)) + print("Ragged rows: %d" % bk.ragged_rows) + if bk.formatting_info: + print("FORMATs: %d, FONTs: %d, XFs: %d" + % (len(bk.format_list), len(bk.font_list), len(bk.xf_list))) + if not options.suppress_timing: + print("Load time: %.2f seconds (stage 1) %.2f seconds (stage 2)" + % (bk.load_time_stage_1, bk.load_time_stage_2)) + print() + + def show_fonts(bk): + print("Fonts:") + for x in xrange(len(bk.font_list)): + font = bk.font_list[x] + font.dump(header='== Index %d ==' % x, indent=4) + + def show_names(bk, dump=0): + bk_header(bk) + if bk.biff_version < 50: + print("Names not extracted in this BIFF version") + return + nlist = bk.name_obj_list + print("Name list: %d entries" % len(nlist)) + for nobj in nlist: + if dump: + nobj.dump(sys.stdout, + header="\n=== Dump of name_obj_list[%d] ===" % nobj.name_index) + else: + print("[%d]\tName:%r macro:%r scope:%d\n\tresult:%r\n" + % (nobj.name_index, nobj.name, nobj.macro, nobj.scope, nobj.result)) + + def print_labels(sh, labs, title): + if not labs:return + for rlo, rhi, clo, chi in labs: + print("%s label range %s:%s contains:" + % (title, xlrd.cellname(rlo, clo), xlrd.cellname(rhi-1, chi-1))) + for rx in xrange(rlo, rhi): + for cx in xrange(clo, chi): + print(" %s: %r" % (xlrd.cellname(rx, cx), sh.cell_value(rx, cx))) + + def show_labels(bk): + # bk_header(bk) + hdr = 0 + for shx in range(bk.nsheets): + sh = bk.sheet_by_index(shx) + clabs = sh.col_label_ranges + rlabs = sh.row_label_ranges + if clabs or rlabs: + if not hdr: + bk_header(bk) + hdr = 1 + print("sheet %d: name = %r; nrows = %d; ncols = %d" % + (shx, sh.name, sh.nrows, sh.ncols)) + print_labels(sh, clabs, 'Col') + print_labels(sh, rlabs, 'Row') + if bk.on_demand: bk.unload_sheet(shx) + + def show(bk, nshow=65535, printit=1): + bk_header(bk) + if 0: + rclist = xlrd.sheet.rc_stats.items() + rclist = sorted(rclist) + print("rc stats") + for k, v in rclist: + print("0x%04x %7d" % (k, v)) + if options.onesheet: + try: + shx = int(options.onesheet) + except ValueError: + shx = bk.sheet_by_name(options.onesheet).number + shxrange = [shx] + else: + shxrange = range(bk.nsheets) + # print("shxrange", list(shxrange)) + for shx in shxrange: + sh = bk.sheet_by_index(shx) + nrows, ncols = sh.nrows, sh.ncols + colrange = range(ncols) + anshow = min(nshow, nrows) + print("sheet %d: name = %s; nrows = %d; ncols = %d" % + (shx, REPR(sh.name), sh.nrows, sh.ncols)) + if nrows and ncols: + # Beat the bounds + for rowx in xrange(nrows): + nc = sh.row_len(rowx) + if nc: + sh.row_types(rowx)[nc-1] + sh.row_values(rowx)[nc-1] + sh.cell(rowx, nc-1) + for rowx in xrange(anshow-1): + if not printit and rowx % 10000 == 1 and rowx > 1: + print("done %d rows" % (rowx-1,)) + show_row(bk, sh, rowx, colrange, printit) + if anshow and nrows: + show_row(bk, sh, nrows-1, colrange, printit) + print() + if bk.on_demand: bk.unload_sheet(shx) + + def count_xfs(bk): + bk_header(bk) + for shx in range(bk.nsheets): + sh = bk.sheet_by_index(shx) + nrows = sh.nrows + print("sheet %d: name = %r; nrows = %d; ncols = %d" % + (shx, sh.name, sh.nrows, sh.ncols)) + # Access all xfindexes to force gathering stats + type_stats = [0, 0, 0, 0, 0, 0, 0] + for rowx in xrange(nrows): + for colx in xrange(sh.row_len(rowx)): + xfx = sh.cell_xf_index(rowx, colx) + assert xfx >= 0 + cty = sh.cell_type(rowx, colx) + type_stats[cty] += 1 + print("XF stats", sh._xf_index_stats) + print("type stats", type_stats) + print() + if bk.on_demand: bk.unload_sheet(shx) + + def main(cmd_args): + import optparse + global options + usage = "\n%prog [options] command [input-file-patterns]\n" + cmd_doc + oparser = optparse.OptionParser(usage) + oparser.add_option( + "-l", "--logfilename", + default="", + help="contains error messages") + oparser.add_option( + "-v", "--verbosity", + type="int", default=0, + help="level of information and diagnostics provided") + oparser.add_option( + "-m", "--mmap", + type="int", default=-1, + help="1: use mmap; 0: don't use mmap; -1: accept heuristic") + oparser.add_option( + "-e", "--encoding", + default="", + help="encoding override") + oparser.add_option( + "-f", "--formatting", + type="int", default=0, + help="0 (default): no fmt info\n" + "1: fmt info (all cells)\n", + ) + oparser.add_option( + "-g", "--gc", + type="int", default=0, + help="0: auto gc enabled; 1: auto gc disabled, manual collect after each file; 2: no gc") + oparser.add_option( + "-s", "--onesheet", + default="", + help="restrict output to this sheet (name or index)") + oparser.add_option( + "-u", "--unnumbered", + action="store_true", default=0, + help="omit line numbers or offsets in biff_dump") + oparser.add_option( + "-d", "--on-demand", + action="store_true", default=0, + help="load sheets on demand instead of all at once") + oparser.add_option( + "-t", "--suppress-timing", + action="store_true", default=0, + help="don't print timings (diffs are less messy)") + oparser.add_option( + "-r", "--ragged-rows", + action="store_true", default=0, + help="open_workbook(..., ragged_rows=True)") + options, args = oparser.parse_args(cmd_args) + if len(args) == 1 and args[0] in ("version", ): + pass + elif len(args) < 2: + oparser.error("Expected at least 2 args, found %d" % len(args)) + cmd = args[0] + xlrd_version = getattr(xlrd, "__VERSION__", "unknown; before 0.5") + if cmd == 'biff_dump': + xlrd.dump(args[1], unnumbered=options.unnumbered) + sys.exit(0) + if cmd == 'biff_count': + xlrd.count_records(args[1]) + sys.exit(0) + if cmd == 'version': + print("xlrd: %s, from %s" % (xlrd_version, xlrd.__file__)) + print("Python:", sys.version) + sys.exit(0) + if options.logfilename: + logfile = LogHandler(open(options.logfilename, 'w')) + else: + logfile = sys.stdout + mmap_opt = options.mmap + mmap_arg = xlrd.USE_MMAP + if mmap_opt in (1, 0): + mmap_arg = mmap_opt + elif mmap_opt != -1: + print('Unexpected value (%r) for mmap option -- assuming default' % mmap_opt) + fmt_opt = options.formatting | (cmd in ('xfc', )) + gc_mode = options.gc + if gc_mode: + gc.disable() + for pattern in args[1:]: + for fname in glob.glob(pattern): + print("\n=== File: %s ===" % fname) + if logfile != sys.stdout: + logfile.setfileheading("\n=== File: %s ===\n" % fname) + if gc_mode == 1: + n_unreachable = gc.collect() + if n_unreachable: + print("GC before open:", n_unreachable, "unreachable objects") + try: + t0 = time.time() + bk = xlrd.open_workbook( + fname, + verbosity=options.verbosity, logfile=logfile, + use_mmap=mmap_arg, + encoding_override=options.encoding, + formatting_info=fmt_opt, + on_demand=options.on_demand, + ragged_rows=options.ragged_rows, + ) + t1 = time.time() + if not options.suppress_timing: + print("Open took %.2f seconds" % (t1-t0,)) + except xlrd.XLRDError as e: + print("*** Open failed: %s: %s" % (type(e).__name__, e)) + continue + except KeyboardInterrupt: + print("*** KeyboardInterrupt ***") + traceback.print_exc(file=sys.stdout) + sys.exit(1) + except BaseException as e: + print("*** Open failed: %s: %s" % (type(e).__name__, e)) + traceback.print_exc(file=sys.stdout) + continue + t0 = time.time() + if cmd == 'hdr': + bk_header(bk) + elif cmd == 'ov': # OverView + show(bk, 0) + elif cmd == 'show': # all rows + show(bk) + elif cmd == '2rows': # first row and last row + show(bk, 2) + elif cmd == '3rows': # first row, 2nd row and last row + show(bk, 3) + elif cmd == 'bench': + show(bk, printit=0) + elif cmd == 'fonts': + bk_header(bk) + show_fonts(bk) + elif cmd == 'names': # named reference list + show_names(bk) + elif cmd == 'name_dump': # named reference list + show_names(bk, dump=1) + elif cmd == 'labels': + show_labels(bk) + elif cmd == 'xfc': + count_xfs(bk) + else: + print("*** Unknown command <%s>" % cmd) + sys.exit(1) + del bk + if gc_mode == 1: + n_unreachable = gc.collect() + if n_unreachable: + print("GC post cmd:", fname, "->", n_unreachable, "unreachable objects") + if not options.suppress_timing: + t1 = time.time() + print("\ncommand took %.2f seconds\n" % (t1-t0,)) + + return None + + av = sys.argv[1:] + if not av: + main(av) + firstarg = av[0].lower() + if firstarg == "hotshot": + import hotshot + import hotshot.stats + av = av[1:] + prof_log_name = "XXXX.prof" + prof = hotshot.Profile(prof_log_name) + # benchtime, result = prof.runcall(main, *av) + result = prof.runcall(main, *(av, )) + print("result", repr(result)) + prof.close() + stats = hotshot.stats.load(prof_log_name) + stats.strip_dirs() + stats.sort_stats('time', 'calls') + stats.print_stats(20) + elif firstarg == "profile": + import cProfile + av = av[1:] + cProfile.run('main(av)', 'YYYY.prof') + import pstats + p = pstats.Stats('YYYY.prof') + p.strip_dirs().sort_stats('cumulative').print_stats(30) + else: + main(av) |
