You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
333 lines
11 KiB
333 lines
11 KiB
5 months ago
|
################################################################################
|
||
|
# ObjectPath
|
||
|
|
||
|
ctypedef struct _ObjectPath:
|
||
|
const_xmlChar* href
|
||
|
const_xmlChar* name
|
||
|
Py_ssize_t index
|
||
|
|
||
|
|
||
|
cdef object _NO_DEFAULT = object()
|
||
|
|
||
|
|
||
|
cdef class ObjectPath:
|
||
|
"""ObjectPath(path)
|
||
|
Immutable object that represents a compiled object path.
|
||
|
|
||
|
Example for a path: 'root.child[1].{other}child[25]'
|
||
|
"""
|
||
|
cdef readonly object find
|
||
|
cdef list _path
|
||
|
cdef object _path_str
|
||
|
cdef _ObjectPath* _c_path
|
||
|
cdef Py_ssize_t _path_len
|
||
|
def __init__(self, path):
|
||
|
if python._isString(path):
|
||
|
self._path = _parse_object_path_string(path)
|
||
|
self._path_str = path
|
||
|
else:
|
||
|
self._path = _parse_object_path_list(path)
|
||
|
self._path_str = '.'.join(path)
|
||
|
self._path_len = len(self._path)
|
||
|
self._c_path = _build_object_path_segments(self._path)
|
||
|
self.find = self.__call__
|
||
|
|
||
|
def __dealloc__(self):
|
||
|
if self._c_path is not NULL:
|
||
|
python.lxml_free(self._c_path)
|
||
|
|
||
|
def __str__(self):
|
||
|
return self._path_str
|
||
|
|
||
|
def __call__(self, _Element root not None, *_default):
|
||
|
"""Follow the attribute path in the object structure and return the
|
||
|
target attribute value.
|
||
|
|
||
|
If it it not found, either returns a default value (if one was passed
|
||
|
as second argument) or raises AttributeError.
|
||
|
"""
|
||
|
if _default:
|
||
|
if len(_default) > 1:
|
||
|
raise TypeError, "invalid number of arguments: needs one or two"
|
||
|
default = _default[0]
|
||
|
else:
|
||
|
default = _NO_DEFAULT
|
||
|
return _find_object_path(root, self._c_path, self._path_len, default)
|
||
|
|
||
|
def hasattr(self, _Element root not None):
|
||
|
"hasattr(self, root)"
|
||
|
try:
|
||
|
_find_object_path(root, self._c_path, self._path_len, _NO_DEFAULT)
|
||
|
except AttributeError:
|
||
|
return False
|
||
|
return True
|
||
|
|
||
|
def setattr(self, _Element root not None, value):
|
||
|
"""setattr(self, root, value)
|
||
|
|
||
|
Set the value of the target element in a subtree.
|
||
|
|
||
|
If any of the children on the path does not exist, it is created.
|
||
|
"""
|
||
|
_create_object_path(root, self._c_path, self._path_len, 1, value)
|
||
|
|
||
|
def addattr(self, _Element root not None, value):
|
||
|
"""addattr(self, root, value)
|
||
|
|
||
|
Append a value to the target element in a subtree.
|
||
|
|
||
|
If any of the children on the path does not exist, it is created.
|
||
|
"""
|
||
|
_create_object_path(root, self._c_path, self._path_len, 0, value)
|
||
|
|
||
|
|
||
|
cdef object __MATCH_PATH_SEGMENT = re.compile(
|
||
|
r"(\.?)\s*(?:\{([^}]*)\})?\s*([^.{}\[\]\s]+)\s*(?:\[\s*([-0-9]+)\s*\])?",
|
||
|
re.U).match
|
||
|
|
||
|
cdef tuple _RELATIVE_PATH_SEGMENT = (None, None, 0)
|
||
|
|
||
|
|
||
|
cdef list _parse_object_path_string(_path):
|
||
|
"""Parse object path string into a (ns, name, index) list.
|
||
|
"""
|
||
|
cdef bint has_dot
|
||
|
cdef unicode path
|
||
|
new_path = []
|
||
|
if isinstance(_path, bytes):
|
||
|
path = (<bytes>_path).decode('ascii')
|
||
|
elif type(_path) is not unicode:
|
||
|
path = unicode(_path)
|
||
|
else:
|
||
|
path = _path
|
||
|
path = path.strip()
|
||
|
if path == '.':
|
||
|
return [_RELATIVE_PATH_SEGMENT]
|
||
|
path_pos = 0
|
||
|
while path:
|
||
|
match = __MATCH_PATH_SEGMENT(path, path_pos)
|
||
|
if match is None:
|
||
|
break
|
||
|
|
||
|
dot, ns, name, index = match.groups()
|
||
|
index = int(index) if index else 0
|
||
|
has_dot = dot == '.'
|
||
|
if not new_path:
|
||
|
if has_dot:
|
||
|
# path '.child' => ignore root
|
||
|
new_path.append(_RELATIVE_PATH_SEGMENT)
|
||
|
elif index:
|
||
|
raise ValueError, "index not allowed on root node"
|
||
|
elif not has_dot:
|
||
|
raise ValueError, "invalid path"
|
||
|
if ns is not None:
|
||
|
ns = python.PyUnicode_AsUTF8String(ns)
|
||
|
name = python.PyUnicode_AsUTF8String(name)
|
||
|
new_path.append( (ns, name, index) )
|
||
|
|
||
|
path_pos = match.end()
|
||
|
if not new_path or len(path) > path_pos:
|
||
|
raise ValueError, "invalid path"
|
||
|
return new_path
|
||
|
|
||
|
|
||
|
cdef list _parse_object_path_list(path):
|
||
|
"""Parse object path sequence into a (ns, name, index) list.
|
||
|
"""
|
||
|
new_path = []
|
||
|
for item in path:
|
||
|
item = item.strip()
|
||
|
if not new_path and item == '':
|
||
|
# path '.child' => ignore root
|
||
|
ns = name = None
|
||
|
index = 0
|
||
|
else:
|
||
|
ns, name = cetree.getNsTag(item)
|
||
|
c_name = _xcstr(name)
|
||
|
index_pos = tree.xmlStrchr(c_name, c'[')
|
||
|
if index_pos is NULL:
|
||
|
index = 0
|
||
|
else:
|
||
|
index_end = tree.xmlStrchr(index_pos + 1, c']')
|
||
|
if index_end is NULL:
|
||
|
raise ValueError, "index must be enclosed in []"
|
||
|
index = int(index_pos[1:index_end - index_pos])
|
||
|
if not new_path and index != 0:
|
||
|
raise ValueError, "index not allowed on root node"
|
||
|
name = <bytes>c_name[:index_pos - c_name]
|
||
|
new_path.append( (ns, name, index) )
|
||
|
if not new_path:
|
||
|
raise ValueError, "invalid path"
|
||
|
return new_path
|
||
|
|
||
|
|
||
|
cdef _ObjectPath* _build_object_path_segments(list path_list) except NULL:
|
||
|
cdef _ObjectPath* c_path
|
||
|
cdef _ObjectPath* c_path_segments
|
||
|
c_path_segments = <_ObjectPath*>python.lxml_malloc(len(path_list), sizeof(_ObjectPath))
|
||
|
if c_path_segments is NULL:
|
||
|
raise MemoryError()
|
||
|
c_path = c_path_segments
|
||
|
for href, name, index in path_list:
|
||
|
c_path[0].href = _xcstr(href) if href is not None else NULL
|
||
|
c_path[0].name = _xcstr(name) if name is not None else NULL
|
||
|
c_path[0].index = index
|
||
|
c_path += 1
|
||
|
return c_path_segments
|
||
|
|
||
|
|
||
|
cdef _find_object_path(_Element root, _ObjectPath* c_path, Py_ssize_t c_path_len, default_value):
|
||
|
"""Follow the path to find the target element.
|
||
|
"""
|
||
|
cdef tree.xmlNode* c_node
|
||
|
cdef Py_ssize_t c_index
|
||
|
c_node = root._c_node
|
||
|
c_name = c_path[0].name
|
||
|
c_href = c_path[0].href
|
||
|
if c_href is NULL or c_href[0] == c'\0':
|
||
|
c_href = tree._getNs(c_node)
|
||
|
if not cetree.tagMatches(c_node, c_href, c_name):
|
||
|
if default_value is not _NO_DEFAULT:
|
||
|
return default_value
|
||
|
else:
|
||
|
raise ValueError(
|
||
|
f"root element does not match: need {cetree.namespacedNameFromNsName(c_href, c_name)}, got {root.tag}")
|
||
|
|
||
|
while c_node is not NULL:
|
||
|
c_path_len -= 1
|
||
|
if c_path_len <= 0:
|
||
|
break
|
||
|
|
||
|
c_path += 1
|
||
|
if c_path[0].href is not NULL:
|
||
|
c_href = c_path[0].href # otherwise: keep parent namespace
|
||
|
c_name = tree.xmlDictExists(c_node.doc.dict, c_path[0].name, -1)
|
||
|
if c_name is NULL:
|
||
|
c_name = c_path[0].name
|
||
|
c_node = NULL
|
||
|
break
|
||
|
c_index = c_path[0].index
|
||
|
c_node = c_node.last if c_index < 0 else c_node.children
|
||
|
c_node = _findFollowingSibling(c_node, c_href, c_name, c_index)
|
||
|
|
||
|
if c_node is not NULL:
|
||
|
return cetree.elementFactory(root._doc, c_node)
|
||
|
elif default_value is not _NO_DEFAULT:
|
||
|
return default_value
|
||
|
else:
|
||
|
tag = cetree.namespacedNameFromNsName(c_href, c_name)
|
||
|
raise AttributeError, f"no such child: {tag}"
|
||
|
|
||
|
|
||
|
cdef _create_object_path(_Element root, _ObjectPath* c_path,
|
||
|
Py_ssize_t c_path_len, int replace, value):
|
||
|
"""Follow the path to find the target element, build the missing children
|
||
|
as needed and set the target element to 'value'. If replace is true, an
|
||
|
existing value is replaced, otherwise the new value is added.
|
||
|
"""
|
||
|
cdef _Element child
|
||
|
cdef tree.xmlNode* c_node
|
||
|
cdef tree.xmlNode* c_child
|
||
|
cdef Py_ssize_t c_index
|
||
|
if c_path_len == 1:
|
||
|
raise TypeError, "cannot update root node"
|
||
|
|
||
|
c_node = root._c_node
|
||
|
c_name = c_path[0].name
|
||
|
c_href = c_path[0].href
|
||
|
if c_href is NULL or c_href[0] == c'\0':
|
||
|
c_href = tree._getNs(c_node)
|
||
|
if not cetree.tagMatches(c_node, c_href, c_name):
|
||
|
raise ValueError(
|
||
|
f"root element does not match: need {cetree.namespacedNameFromNsName(c_href, c_name)}, got {root.tag}")
|
||
|
|
||
|
while c_path_len > 1:
|
||
|
c_path_len -= 1
|
||
|
c_path += 1
|
||
|
if c_path[0].href is not NULL:
|
||
|
c_href = c_path[0].href # otherwise: keep parent namespace
|
||
|
c_index = c_path[0].index
|
||
|
c_name = tree.xmlDictExists(c_node.doc.dict, c_path[0].name, -1)
|
||
|
if c_name is NULL:
|
||
|
c_name = c_path[0].name
|
||
|
c_child = NULL
|
||
|
else:
|
||
|
c_child = c_node.last if c_index < 0 else c_node.children
|
||
|
c_child = _findFollowingSibling(c_child, c_href, c_name, c_index)
|
||
|
|
||
|
if c_child is not NULL:
|
||
|
c_node = c_child
|
||
|
elif c_index != 0:
|
||
|
raise TypeError, "creating indexed path attributes is not supported"
|
||
|
elif c_path_len == 1:
|
||
|
_appendValue(cetree.elementFactory(root._doc, c_node),
|
||
|
cetree.namespacedNameFromNsName(c_href, c_name),
|
||
|
value)
|
||
|
return
|
||
|
else:
|
||
|
child = cetree.makeSubElement(
|
||
|
cetree.elementFactory(root._doc, c_node),
|
||
|
cetree.namespacedNameFromNsName(c_href, c_name),
|
||
|
None, None, None, None)
|
||
|
c_node = child._c_node
|
||
|
|
||
|
# if we get here, the entire path was already there
|
||
|
if replace:
|
||
|
element = cetree.elementFactory(root._doc, c_node)
|
||
|
_replaceElement(element, value)
|
||
|
else:
|
||
|
_appendValue(cetree.elementFactory(root._doc, c_node.parent),
|
||
|
cetree.namespacedName(c_node), value)
|
||
|
|
||
|
|
||
|
cdef list _build_descendant_paths(tree.xmlNode* c_node, prefix_string):
|
||
|
"""Returns a list of all descendant paths.
|
||
|
"""
|
||
|
cdef list path, path_list
|
||
|
tag = cetree.namespacedName(c_node)
|
||
|
if prefix_string:
|
||
|
if prefix_string[-1] != '.':
|
||
|
prefix_string += '.'
|
||
|
prefix_string = prefix_string + tag
|
||
|
else:
|
||
|
prefix_string = tag
|
||
|
path = [prefix_string]
|
||
|
path_list = []
|
||
|
_recursive_build_descendant_paths(c_node, path, path_list)
|
||
|
return path_list
|
||
|
|
||
|
|
||
|
cdef int _recursive_build_descendant_paths(tree.xmlNode* c_node,
|
||
|
list path, list path_list) except -1:
|
||
|
"""Fills the list 'path_list' with all descendant paths, initial prefix
|
||
|
being in the list 'path'.
|
||
|
"""
|
||
|
cdef tree.xmlNode* c_child
|
||
|
tags = {}
|
||
|
path_list.append('.'.join(path))
|
||
|
c_href = tree._getNs(c_node)
|
||
|
c_child = c_node.children
|
||
|
while c_child is not NULL:
|
||
|
while c_child.type != tree.XML_ELEMENT_NODE:
|
||
|
c_child = c_child.next
|
||
|
if c_child is NULL:
|
||
|
return 0
|
||
|
if c_href is tree._getNs(c_child):
|
||
|
tag = pyunicode(c_child.name)
|
||
|
elif c_href is not NULL and tree._getNs(c_child) is NULL:
|
||
|
# special case: parent has namespace, child does not
|
||
|
tag = '{}' + pyunicode(c_child.name)
|
||
|
else:
|
||
|
tag = cetree.namespacedName(c_child)
|
||
|
count = tags.get(tag)
|
||
|
if count is None:
|
||
|
tags[tag] = 1
|
||
|
else:
|
||
|
tags[tag] = count + 1
|
||
|
tag += f'[{count}]'
|
||
|
path.append(tag)
|
||
|
_recursive_build_descendant_paths(c_child, path, path_list)
|
||
|
del path[-1]
|
||
|
c_child = c_child.next
|
||
|
return 0
|