1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
|
# support for extension functions in XPath and XSLT
cdef class XPathError(LxmlError):
"""Base class of all XPath errors.
"""
cdef class XPathEvalError(XPathError):
"""Error during XPath evaluation.
"""
cdef class XPathFunctionError(XPathEvalError):
"""Internal error looking up an XPath extension function.
"""
cdef class XPathResultError(XPathEvalError):
"""Error handling an XPath result.
"""
# forward declarations
ctypedef int (*_register_function)(void* ctxt, name_utf, ns_uri_utf)
cdef class _ExsltRegExp
################################################################################
# Base class for XSLT and XPath evaluation contexts: functions, namespaces, ...
@cython.internal
cdef class _BaseContext:
cdef xpath.xmlXPathContext* _xpathCtxt
cdef _Document _doc
cdef dict _extensions
cdef list _namespaces
cdef list _global_namespaces
cdef dict _utf_refs
cdef dict _function_cache
cdef dict _eval_context_dict
cdef bint _build_smart_strings
# for exception handling and temporary reference keeping:
cdef _TempStore _temp_refs
cdef set _temp_documents
cdef _ExceptionContext _exc
cdef _ErrorLog _error_log
def __cinit__(self):
self._xpathCtxt = NULL
def __init__(self, namespaces, extensions, error_log, enable_regexp,
build_smart_strings):
cdef _ExsltRegExp _regexp
cdef dict new_extensions
cdef list ns
self._utf_refs = {}
self._global_namespaces = []
self._function_cache = {}
self._eval_context_dict = None
self._error_log = error_log
if extensions is not None:
# convert extensions to UTF-8
if isinstance(extensions, dict):
extensions = (extensions,)
# format: [ {(ns, name):function} ] -> {(ns_utf, name_utf):function}
new_extensions = {}
for extension in extensions:
for (ns_uri, name), function in extension.items():
if name is None:
raise ValueError, "extensions must have non empty names"
ns_utf = self._to_utf(ns_uri)
name_utf = self._to_utf(name)
new_extensions[(ns_utf, name_utf)] = function
extensions = new_extensions or None
if namespaces is not None:
if isinstance(namespaces, dict):
namespaces = namespaces.items()
if namespaces:
ns = []
for prefix, ns_uri in namespaces:
if prefix is None or not prefix:
raise TypeError, \
"empty namespace prefix is not supported in XPath"
if ns_uri is None or not ns_uri:
raise TypeError, \
"setting default namespace is not supported in XPath"
prefix_utf = self._to_utf(prefix)
ns_uri_utf = self._to_utf(ns_uri)
ns.append( (prefix_utf, ns_uri_utf) )
namespaces = ns
else:
namespaces = None
self._doc = None
self._exc = _ExceptionContext()
self._extensions = extensions
self._namespaces = namespaces
self._temp_refs = _TempStore()
self._temp_documents = set()
self._build_smart_strings = build_smart_strings
if enable_regexp:
_regexp = _ExsltRegExp()
_regexp._register_in_context(self)
cdef _BaseContext _copy(self):
cdef _BaseContext context
if self._namespaces is not None:
namespaces = self._namespaces[:]
else:
namespaces = None
context = self.__class__(namespaces, None, self._error_log, False,
self._build_smart_strings)
if self._extensions is not None:
context._extensions = self._extensions.copy()
return context
cdef bytes _to_utf(self, s):
"Convert to UTF-8 and keep a reference to the encoded string"
cdef python.PyObject* dict_result
if s is None:
return None
dict_result = python.PyDict_GetItem(self._utf_refs, s)
if dict_result is not NULL:
return <bytes>dict_result
utf = _utf8(s)
self._utf_refs[s] = utf
if python.IS_PYPY:
# use C level refs, PyPy refs are not enough!
python.Py_INCREF(utf)
return utf
cdef void _set_xpath_context(self, xpath.xmlXPathContext* xpathCtxt) noexcept:
self._xpathCtxt = xpathCtxt
xpathCtxt.userData = <void*>self
# Need a cast here because older libxml2 releases do not use 'const' in the functype.
xpathCtxt.error = <xmlerror.xmlStructuredErrorFunc> _receiveXPathError
@cython.final
cdef _register_context(self, _Document doc):
self._doc = doc
self._exc.clear()
@cython.final
cdef _cleanup_context(self):
#xpath.xmlXPathRegisteredNsCleanup(self._xpathCtxt)
#self.unregisterGlobalNamespaces()
if python.IS_PYPY:
# clean up double refs in PyPy (see "_to_utf()" method)
for ref in self._utf_refs.itervalues():
python.Py_DECREF(ref)
self._utf_refs.clear()
self._eval_context_dict = None
self._doc = None
@cython.final
cdef _release_context(self):
if self._xpathCtxt is not NULL:
self._xpathCtxt.userData = NULL
self._xpathCtxt = NULL
# namespaces (internal UTF-8 methods with leading '_')
cdef addNamespace(self, prefix, ns_uri):
cdef list namespaces
if prefix is None:
raise TypeError, "empty prefix is not supported in XPath"
prefix_utf = self._to_utf(prefix)
ns_uri_utf = self._to_utf(ns_uri)
new_item = (prefix_utf, ns_uri_utf)
if self._namespaces is None:
self._namespaces = [new_item]
else:
namespaces = []
for item in self._namespaces:
if item[0] == prefix_utf:
item = new_item
new_item = None
namespaces.append(item)
if new_item is not None:
namespaces.append(new_item)
self._namespaces = namespaces
if self._xpathCtxt is not NULL:
xpath.xmlXPathRegisterNs(
self._xpathCtxt, _xcstr(prefix_utf), _xcstr(ns_uri_utf))
cdef registerNamespace(self, prefix, ns_uri):
if prefix is None:
raise TypeError, "empty prefix is not supported in XPath"
prefix_utf = self._to_utf(prefix)
ns_uri_utf = self._to_utf(ns_uri)
self._global_namespaces.append(prefix_utf)
xpath.xmlXPathRegisterNs(self._xpathCtxt,
_xcstr(prefix_utf), _xcstr(ns_uri_utf))
cdef registerLocalNamespaces(self):
if self._namespaces is None:
return
for prefix_utf, ns_uri_utf in self._namespaces:
xpath.xmlXPathRegisterNs(
self._xpathCtxt, _xcstr(prefix_utf), _xcstr(ns_uri_utf))
cdef registerGlobalNamespaces(self):
cdef list ns_prefixes = _find_all_extension_prefixes()
if python.PyList_GET_SIZE(ns_prefixes) > 0:
for prefix_utf, ns_uri_utf in ns_prefixes:
self._global_namespaces.append(prefix_utf)
xpath.xmlXPathRegisterNs(
self._xpathCtxt, _xcstr(prefix_utf), _xcstr(ns_uri_utf))
cdef unregisterGlobalNamespaces(self):
if python.PyList_GET_SIZE(self._global_namespaces) > 0:
for prefix_utf in self._global_namespaces:
xpath.xmlXPathRegisterNs(self._xpathCtxt,
_xcstr(prefix_utf), NULL)
del self._global_namespaces[:]
cdef void _unregisterNamespace(self, prefix_utf) noexcept:
xpath.xmlXPathRegisterNs(self._xpathCtxt,
_xcstr(prefix_utf), NULL)
# extension functions
cdef int _addLocalExtensionFunction(self, ns_utf, name_utf, function) except -1:
if self._extensions is None:
self._extensions = {}
self._extensions[(ns_utf, name_utf)] = function
return 0
cdef registerGlobalFunctions(self, void* ctxt,
_register_function reg_func):
cdef python.PyObject* dict_result
cdef dict d
for ns_utf, ns_functions in __FUNCTION_NAMESPACE_REGISTRIES.iteritems():
dict_result = python.PyDict_GetItem(
self._function_cache, ns_utf)
if dict_result is not NULL:
d = <dict>dict_result
else:
d = {}
self._function_cache[ns_utf] = d
for name_utf, function in ns_functions.iteritems():
d[name_utf] = function
reg_func(ctxt, name_utf, ns_utf)
cdef registerLocalFunctions(self, void* ctxt,
_register_function reg_func):
cdef python.PyObject* dict_result
cdef dict d
if self._extensions is None:
return # done
last_ns = None
d = None
for (ns_utf, name_utf), function in self._extensions.iteritems():
if ns_utf is not last_ns or d is None:
last_ns = ns_utf
dict_result = python.PyDict_GetItem(
self._function_cache, ns_utf)
if dict_result is not NULL:
d = <dict>dict_result
else:
d = {}
self._function_cache[ns_utf] = d
d[name_utf] = function
reg_func(ctxt, name_utf, ns_utf)
cdef unregisterAllFunctions(self, void* ctxt,
_register_function unreg_func):
for ns_utf, functions in self._function_cache.iteritems():
for name_utf in functions:
unreg_func(ctxt, name_utf, ns_utf)
cdef unregisterGlobalFunctions(self, void* ctxt,
_register_function unreg_func):
for ns_utf, functions in self._function_cache.items():
for name_utf in functions:
if self._extensions is None or \
(ns_utf, name_utf) not in self._extensions:
unreg_func(ctxt, name_utf, ns_utf)
@cython.final
cdef _find_cached_function(self, const_xmlChar* c_ns_uri, const_xmlChar* c_name):
"""Lookup an extension function in the cache and return it.
Parameters: c_ns_uri may be NULL, c_name must not be NULL
"""
cdef python.PyObject* c_dict
cdef python.PyObject* dict_result
c_dict = python.PyDict_GetItem(
self._function_cache, None if c_ns_uri is NULL else c_ns_uri)
if c_dict is not NULL:
dict_result = python.PyDict_GetItem(
<object>c_dict, <unsigned char*>c_name)
if dict_result is not NULL:
return <object>dict_result
return None
# Python access to the XPath context for extension functions
@property
def context_node(self):
cdef xmlNode* c_node
if self._xpathCtxt is NULL:
raise XPathError, \
"XPath context is only usable during the evaluation"
c_node = self._xpathCtxt.node
if c_node is NULL:
raise XPathError, "no context node"
if c_node.doc != self._xpathCtxt.doc:
raise XPathError, \
"document-external context nodes are not supported"
if self._doc is None:
raise XPathError, "document context is missing"
return _elementFactory(self._doc, c_node)
@property
def eval_context(self):
if self._eval_context_dict is None:
self._eval_context_dict = {}
return self._eval_context_dict
# Python reference keeping during XPath function evaluation
@cython.final
cdef _release_temp_refs(self):
"Free temporarily referenced objects from this context."
self._temp_refs.clear()
self._temp_documents.clear()
@cython.final
cdef _hold(self, obj):
"""A way to temporarily hold references to nodes in the evaluator.
This is needed because otherwise nodes created in XPath extension
functions would be reference counted too soon, during the XPath
evaluation. This is most important in the case of exceptions.
"""
cdef _Element element
if isinstance(obj, _Element):
self._temp_refs.add(obj)
self._temp_documents.add((<_Element>obj)._doc)
return
elif _isString(obj) or not python.PySequence_Check(obj):
return
for o in obj:
if isinstance(o, _Element):
#print "Holding element:", <int>element._c_node
self._temp_refs.add(o)
#print "Holding document:", <int>element._doc._c_doc
self._temp_documents.add((<_Element>o)._doc)
@cython.final
cdef _Document _findDocumentForNode(self, xmlNode* c_node):
"""If an XPath expression returns an element from a different
document than the current context document, we call this to
see if it was possibly created by an extension and is a known
document instance.
"""
cdef _Document doc
for doc in self._temp_documents:
if doc is not None and doc._c_doc is c_node.doc:
return doc
return None
# libxml2 keeps these error messages in a static array in its code
# and doesn't give us access to them ...
cdef tuple LIBXML2_XPATH_ERROR_MESSAGES = (
b"Ok",
b"Number encoding",
b"Unfinished literal",
b"Start of literal",
b"Expected $ for variable reference",
b"Undefined variable",
b"Invalid predicate",
b"Invalid expression",
b"Missing closing curly brace",
b"Unregistered function",
b"Invalid operand",
b"Invalid type",
b"Invalid number of arguments",
b"Invalid context size",
b"Invalid context position",
b"Memory allocation error",
b"Syntax error",
b"Resource error",
b"Sub resource error",
b"Undefined namespace prefix",
b"Encoding error",
b"Char out of XML range",
b"Invalid or incomplete context",
b"Stack usage error",
b"Forbidden variable\n",
b"?? Unknown error ??\n",
)
cdef void _forwardXPathError(void* c_ctxt, const xmlerror.xmlError* c_error) noexcept with gil:
cdef xmlerror.xmlError error
cdef int xpath_code
if c_error.message is not NULL:
error.message = c_error.message
else:
xpath_code = c_error.code - xmlerror.XML_XPATH_EXPRESSION_OK
if 0 <= xpath_code < len(LIBXML2_XPATH_ERROR_MESSAGES):
error.message = _cstr(LIBXML2_XPATH_ERROR_MESSAGES[xpath_code])
else:
error.message = b"unknown error"
error.domain = c_error.domain
error.code = c_error.code
error.level = c_error.level
error.line = c_error.line
error.int2 = c_error.int1 # column
error.file = c_error.file
error.node = NULL
(<_BaseContext>c_ctxt)._error_log._receive(&error)
cdef void _receiveXPathError(void* c_context, const xmlerror.xmlError* error) noexcept nogil:
if not __DEBUG:
return
if c_context is NULL:
_forwardError(NULL, error)
else:
_forwardXPathError(c_context, error)
def Extension(module, function_mapping=None, *, ns=None):
"""Extension(module, function_mapping=None, ns=None)
Build a dictionary of extension functions from the functions
defined in a module or the methods of an object.
As second argument, you can pass an additional mapping of
attribute names to XPath function names, or a list of function
names that should be taken.
The ``ns`` keyword argument accepts a namespace URI for the XPath
functions.
"""
cdef dict functions = {}
if isinstance(function_mapping, dict):
for function_name, xpath_name in function_mapping.items():
functions[(ns, xpath_name)] = getattr(module, function_name)
else:
if function_mapping is None:
function_mapping = [ name for name in dir(module)
if not name.startswith('_') ]
for function_name in function_mapping:
functions[(ns, function_name)] = getattr(module, function_name)
return functions
################################################################################
# EXSLT regexp implementation
@cython.final
@cython.internal
cdef class _ExsltRegExp:
cdef dict _compile_map
def __cinit__(self):
self._compile_map = {}
cdef _make_string(self, value):
if _isString(value):
return value
elif isinstance(value, list):
# node set: take recursive text concatenation of first element
if python.PyList_GET_SIZE(value) == 0:
return ''
firstnode = value[0]
if _isString(firstnode):
return firstnode
elif isinstance(firstnode, _Element):
c_text = tree.xmlNodeGetContent((<_Element>firstnode)._c_node)
if c_text is NULL:
raise MemoryError()
try:
return funicode(c_text)
finally:
tree.xmlFree(c_text)
else:
return unicode(firstnode)
else:
return unicode(value)
cdef _compile(self, rexp, ignore_case):
cdef python.PyObject* c_result
rexp = self._make_string(rexp)
key = (rexp, ignore_case)
c_result = python.PyDict_GetItem(self._compile_map, key)
if c_result is not NULL:
return <object>c_result
py_flags = re.UNICODE
if ignore_case:
py_flags = py_flags | re.IGNORECASE
rexp_compiled = re.compile(rexp, py_flags)
self._compile_map[key] = rexp_compiled
return rexp_compiled
def test(self, ctxt, s, rexp, flags=''):
flags = self._make_string(flags)
s = self._make_string(s)
rexpc = self._compile(rexp, 'i' in flags)
if rexpc.search(s) is None:
return False
else:
return True
def match(self, ctxt, s, rexp, flags=''):
cdef list result_list
flags = self._make_string(flags)
s = self._make_string(s)
rexpc = self._compile(rexp, 'i' in flags)
if 'g' in flags:
results = rexpc.findall(s)
if not results:
return ()
else:
result = rexpc.search(s)
if not result:
return ()
results = [ result.group() ]
results.extend( result.groups('') )
result_list = []
root = Element('matches')
for s_match in results:
if python.PyTuple_CheckExact(s_match):
s_match = ''.join(s_match)
elem = SubElement(root, 'match')
elem.text = s_match
result_list.append(elem)
return result_list
def replace(self, ctxt, s, rexp, flags, replacement):
replacement = self._make_string(replacement)
flags = self._make_string(flags)
s = self._make_string(s)
rexpc = self._compile(rexp, 'i' in flags)
count: object = 0 if 'g' in flags else 1
return rexpc.sub(replacement, s, count)
cdef _register_in_context(self, _BaseContext context):
ns = b"http://exslt.org/regular-expressions"
context._addLocalExtensionFunction(ns, b"test", self.test)
context._addLocalExtensionFunction(ns, b"match", self.match)
context._addLocalExtensionFunction(ns, b"replace", self.replace)
################################################################################
# helper functions
cdef xpath.xmlXPathObject* _wrapXPathObject(object obj, _Document doc,
_BaseContext context) except NULL:
cdef xpath.xmlNodeSet* resultSet
cdef _Element fake_node = None
cdef xmlNode* c_node
if isinstance(obj, unicode):
obj = _utf8(obj)
if isinstance(obj, bytes):
# libxml2 copies the string value
return xpath.xmlXPathNewCString(_cstr(obj))
if isinstance(obj, bool):
return xpath.xmlXPathNewBoolean(obj)
if python.PyNumber_Check(obj):
return xpath.xmlXPathNewFloat(obj)
if obj is None:
resultSet = xpath.xmlXPathNodeSetCreate(NULL)
elif isinstance(obj, _Element):
resultSet = xpath.xmlXPathNodeSetCreate((<_Element>obj)._c_node)
elif python.PySequence_Check(obj):
resultSet = xpath.xmlXPathNodeSetCreate(NULL)
try:
for value in obj:
if isinstance(value, _Element):
if context is not None:
context._hold(value)
xpath.xmlXPathNodeSetAdd(resultSet, (<_Element>value)._c_node)
else:
if context is None or doc is None:
raise XPathResultError, \
f"Non-Element values not supported at this point - got {value!r}"
# support strings by appending text nodes to an Element
if isinstance(value, unicode):
value = _utf8(value)
if isinstance(value, bytes):
if fake_node is None:
fake_node = _makeElement("text-root", NULL, doc, None,
None, None, None, None, None)
context._hold(fake_node)
else:
# append a comment node to keep the text nodes separate
c_node = tree.xmlNewDocComment(doc._c_doc, <unsigned char*>"")
if c_node is NULL:
raise MemoryError()
tree.xmlAddChild(fake_node._c_node, c_node)
context._hold(value)
c_node = tree.xmlNewDocText(doc._c_doc, _xcstr(value))
if c_node is NULL:
raise MemoryError()
tree.xmlAddChild(fake_node._c_node, c_node)
xpath.xmlXPathNodeSetAdd(resultSet, c_node)
else:
raise XPathResultError, \
f"This is not a supported node-set result: {value!r}"
except:
xpath.xmlXPathFreeNodeSet(resultSet)
raise
else:
raise XPathResultError, f"Unknown return type: {python._fqtypename(obj).decode('utf8')}"
return xpath.xmlXPathWrapNodeSet(resultSet)
cdef object _unwrapXPathObject(xpath.xmlXPathObject* xpathObj,
_Document doc, _BaseContext context):
if xpathObj.type == xpath.XPATH_UNDEFINED:
raise XPathResultError, "Undefined xpath result"
elif xpathObj.type == xpath.XPATH_NODESET:
return _createNodeSetResult(xpathObj, doc, context)
elif xpathObj.type == xpath.XPATH_BOOLEAN:
return xpathObj.boolval
elif xpathObj.type == xpath.XPATH_NUMBER:
return xpathObj.floatval
elif xpathObj.type == xpath.XPATH_STRING:
stringval = funicode(xpathObj.stringval)
if context._build_smart_strings:
stringval = _elementStringResultFactory(
stringval, None, None, False)
return stringval
elif xpathObj.type == xpath.XPATH_POINT:
raise NotImplementedError, "XPATH_POINT"
elif xpathObj.type == xpath.XPATH_RANGE:
raise NotImplementedError, "XPATH_RANGE"
elif xpathObj.type == xpath.XPATH_LOCATIONSET:
raise NotImplementedError, "XPATH_LOCATIONSET"
elif xpathObj.type == xpath.XPATH_USERS:
raise NotImplementedError, "XPATH_USERS"
elif xpathObj.type == xpath.XPATH_XSLT_TREE:
return _createNodeSetResult(xpathObj, doc, context)
else:
raise XPathResultError, f"Unknown xpath result {xpathObj.type}"
cdef object _createNodeSetResult(xpath.xmlXPathObject* xpathObj, _Document doc,
_BaseContext context):
cdef xmlNode* c_node
cdef int i
cdef list result
result = []
if xpathObj.nodesetval is NULL:
return result
for i in range(xpathObj.nodesetval.nodeNr):
c_node = xpathObj.nodesetval.nodeTab[i]
_unpackNodeSetEntry(result, c_node, doc, context,
xpathObj.type == xpath.XPATH_XSLT_TREE)
return result
cdef _unpackNodeSetEntry(list results, xmlNode* c_node, _Document doc,
_BaseContext context, bint is_fragment):
cdef xmlNode* c_child
if _isElement(c_node):
if c_node.doc != doc._c_doc and c_node.doc._private is NULL:
# XXX: works, but maybe not always the right thing to do?
# XPath: only runs when extensions create or copy trees
# -> we store Python refs to these, so that is OK
# XSLT: can it leak when merging trees from multiple sources?
c_node = tree.xmlDocCopyNode(c_node, doc._c_doc, 1)
# FIXME: call _instantiateElementFromXPath() instead?
results.append(
_fakeDocElementFactory(doc, c_node))
elif c_node.type == tree.XML_TEXT_NODE or \
c_node.type == tree.XML_CDATA_SECTION_NODE or \
c_node.type == tree.XML_ATTRIBUTE_NODE:
results.append(
_buildElementStringResult(doc, c_node, context))
elif c_node.type == tree.XML_NAMESPACE_DECL:
results.append( (funicodeOrNone((<xmlNs*>c_node).prefix),
funicodeOrNone((<xmlNs*>c_node).href)) )
elif c_node.type == tree.XML_DOCUMENT_NODE or \
c_node.type == tree.XML_HTML_DOCUMENT_NODE:
# ignored for everything but result tree fragments
if is_fragment:
c_child = c_node.children
while c_child is not NULL:
_unpackNodeSetEntry(results, c_child, doc, context, 0)
c_child = c_child.next
elif c_node.type == tree.XML_XINCLUDE_START or \
c_node.type == tree.XML_XINCLUDE_END:
pass
else:
raise NotImplementedError, \
f"Not yet implemented result node type: {c_node.type}"
cdef void _freeXPathObject(xpath.xmlXPathObject* xpathObj) noexcept:
"""Free the XPath object, but *never* free the *content* of node sets.
Python dealloc will do that for us.
"""
if xpathObj.nodesetval is not NULL:
xpath.xmlXPathFreeNodeSet(xpathObj.nodesetval)
xpathObj.nodesetval = NULL
xpath.xmlXPathFreeObject(xpathObj)
cdef _Element _instantiateElementFromXPath(xmlNode* c_node, _Document doc,
_BaseContext context):
# NOTE: this may copy the element - only call this when it can't leak
if c_node.doc != doc._c_doc and c_node.doc._private is NULL:
# not from the context document and not from a fake document
# either => may still be from a known document, e.g. one
# created by an extension function
node_doc = context._findDocumentForNode(c_node)
if node_doc is None:
# not from a known document at all! => can only make a
# safety copy here
c_node = tree.xmlDocCopyNode(c_node, doc._c_doc, 1)
else:
doc = node_doc
return _fakeDocElementFactory(doc, c_node)
################################################################################
# special str/unicode subclasses
@cython.final
cdef class _ElementUnicodeResult(unicode):
cdef _Element _parent
cdef readonly object attrname
cdef readonly bint is_tail
def getparent(self):
return self._parent
@property
def is_text(self):
return self._parent is not None and not (self.is_tail or self.attrname is not None)
@property
def is_attribute(self):
return self.attrname is not None
cdef object _elementStringResultFactory(string_value, _Element parent,
attrname, bint is_tail):
result = _ElementUnicodeResult(string_value)
result._parent = parent
result.is_tail = is_tail
result.attrname = attrname
return result
cdef object _buildElementStringResult(_Document doc, xmlNode* c_node,
_BaseContext context):
cdef _Element parent = None
cdef object attrname = None
cdef xmlNode* c_element
cdef bint is_tail
if c_node.type == tree.XML_ATTRIBUTE_NODE:
attrname = _namespacedName(c_node)
is_tail = 0
s = tree.xmlNodeGetContent(c_node)
try:
value = funicode(s)
finally:
tree.xmlFree(s)
c_element = NULL
else:
#assert c_node.type == tree.XML_TEXT_NODE or c_node.type == tree.XML_CDATA_SECTION_NODE, "invalid node type"
# may be tail text or normal text
value = funicode(c_node.content)
c_element = _previousElement(c_node)
is_tail = c_element is not NULL
if not context._build_smart_strings:
return value
if c_element is NULL:
# non-tail text or attribute text
c_element = c_node.parent
while c_element is not NULL and not _isElement(c_element):
c_element = c_element.parent
if c_element is not NULL:
parent = _instantiateElementFromXPath(c_element, doc, context)
return _elementStringResultFactory(
value, parent, attrname, is_tail)
################################################################################
# callbacks for XPath/XSLT extension functions
cdef void _extension_function_call(_BaseContext context, function,
xpath.xmlXPathParserContext* ctxt, int nargs) noexcept:
cdef _Document doc
cdef xpath.xmlXPathObject* obj
cdef list args
cdef int i
doc = context._doc
try:
args = []
for i in range(nargs):
obj = xpath.valuePop(ctxt)
o = _unwrapXPathObject(obj, doc, context)
_freeXPathObject(obj)
args.append(o)
args.reverse()
res = function(context, *args)
# wrap result for XPath consumption
obj = _wrapXPathObject(res, doc, context)
# prevent Python from deallocating elements handed to libxml2
context._hold(res)
xpath.valuePush(ctxt, obj)
except:
xpath.xmlXPathErr(ctxt, xpath.XPATH_EXPR_ERROR)
context._exc._store_raised()
finally:
return # swallow any further exceptions
# lookup the function by name and call it
cdef void _xpath_function_call(xpath.xmlXPathParserContext* ctxt,
int nargs) noexcept with gil:
cdef _BaseContext context
cdef xpath.xmlXPathContext* rctxt = ctxt.context
context = <_BaseContext> rctxt.userData
try:
function = context._find_cached_function(rctxt.functionURI, rctxt.function)
if function is not None:
_extension_function_call(context, function, ctxt, nargs)
else:
xpath.xmlXPathErr(ctxt, xpath.XPATH_UNKNOWN_FUNC_ERROR)
context._exc._store_exception(XPathFunctionError(
f"XPath function '{_namespacedNameFromNsName(rctxt.functionURI, rctxt.function)}' not found"))
except:
# may not be the right error, but we need to tell libxml2 *something*
xpath.xmlXPathErr(ctxt, xpath.XPATH_UNKNOWN_FUNC_ERROR)
context._exc._store_raised()
finally:
return # swallow any further exceptions
|