From 4a52a71956a8d46fcb7294ac71734504bb09bcc2 Mon Sep 17 00:00:00 2001 From: S. Solomon Darnell Date: Fri, 28 Mar 2025 21:52:21 -0500 Subject: two version of R2R are here --- .../site-packages/lxml/includes/htmlparser.pxd | 56 ++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 .venv/lib/python3.12/site-packages/lxml/includes/htmlparser.pxd (limited to '.venv/lib/python3.12/site-packages/lxml/includes/htmlparser.pxd') diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/htmlparser.pxd b/.venv/lib/python3.12/site-packages/lxml/includes/htmlparser.pxd new file mode 100644 index 00000000..31dcc406 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/htmlparser.pxd @@ -0,0 +1,56 @@ +from libc.string cimport const_char + +from lxml.includes.tree cimport xmlDoc +from lxml.includes.tree cimport xmlInputReadCallback, xmlInputCloseCallback +from lxml.includes.xmlparser cimport xmlParserCtxt, xmlSAXHandler, xmlSAXHandlerV1 + +cdef extern from "libxml/HTMLparser.h" nogil: + ctypedef enum htmlParserOption: + HTML_PARSE_NOERROR # suppress error reports + HTML_PARSE_NOWARNING # suppress warning reports + HTML_PARSE_PEDANTIC # pedantic error reporting + HTML_PARSE_NOBLANKS # remove blank nodes + HTML_PARSE_NONET # Forbid network access + # libxml2 2.6.21+ only: + HTML_PARSE_RECOVER # Relaxed parsing + HTML_PARSE_COMPACT # compact small text nodes + # libxml2 2.7.7+ only: + HTML_PARSE_NOIMPLIED # Do not add implied html/body... elements + # libxml2 2.7.8+ only: + HTML_PARSE_NODEFDTD # do not default a doctype if not found + # libxml2 2.8.0+ only: + XML_PARSE_IGNORE_ENC # ignore internal document encoding hint + + xmlSAXHandlerV1 htmlDefaultSAXHandler + + cdef xmlParserCtxt* htmlCreateMemoryParserCtxt( + char* buffer, int size) + cdef xmlParserCtxt* htmlCreateFileParserCtxt( + char* filename, char* encoding) + cdef xmlParserCtxt* htmlCreatePushParserCtxt(xmlSAXHandler* sax, + void* user_data, + char* chunk, int size, + char* filename, int enc) + cdef void htmlFreeParserCtxt(xmlParserCtxt* ctxt) + cdef void htmlCtxtReset(xmlParserCtxt* ctxt) + cdef int htmlCtxtUseOptions(xmlParserCtxt* ctxt, int options) + cdef int htmlParseDocument(xmlParserCtxt* ctxt) + cdef int htmlParseChunk(xmlParserCtxt* ctxt, + char* chunk, int size, int terminate) + + cdef xmlDoc* htmlCtxtReadFile(xmlParserCtxt* ctxt, + char* filename, const_char* encoding, + int options) + cdef xmlDoc* htmlCtxtReadDoc(xmlParserCtxt* ctxt, + char* buffer, char* URL, const_char* encoding, + int options) + cdef xmlDoc* htmlCtxtReadIO(xmlParserCtxt* ctxt, + xmlInputReadCallback ioread, + xmlInputCloseCallback ioclose, + void* ioctx, + char* URL, const_char* encoding, + int options) + cdef xmlDoc* htmlCtxtReadMemory(xmlParserCtxt* ctxt, + char* buffer, int size, + char* filename, const_char* encoding, + int options) -- cgit v1.2.3