• Main Page
  • Related Pages
  • Modules
  • Namespaces
  • Data Structures
  • Files
  • File List
  • Globals

contrib/opal/ZSI/build/lib/ZSI/wstools/c14n.py

00001 #! /usr/bin/env python
00002 '''XML Canonicalization
00003 
00004 Patches Applied to xml.dom.ext.c14n:
00005     http://sourceforge.net/projects/pyxml/
00006 
00007     [ 1444526 ] c14n.py: http://www.w3.org/TR/xml-exc-c14n/ fix
00008         -- includes [ 829905 ] c14n.py fix for bug #825115, 
00009            Date Submitted: 2003-10-24 23:43
00010         -- include dependent namespace declarations declared in ancestor nodes 
00011            (checking attributes and tags), 
00012         -- handle InclusiveNamespaces PrefixList parameter
00013 
00014 This module generates canonical XML of a document or element.
00015     http://www.w3.org/TR/2001/REC-xml-c14n-20010315
00016 and includes a prototype of exclusive canonicalization
00017     http://www.w3.org/Signature/Drafts/xml-exc-c14n
00018 
00019 Requires PyXML 0.7.0 or later.
00020 
00021 Known issues if using Ft.Lib.pDomlette:
00022     1. Unicode
00023     2. does not white space normalize attributes of type NMTOKEN and ID?
00024     3. seems to be include "\n" after importing external entities?
00025 
00026 Note, this version processes a DOM tree, and consequently it processes
00027 namespace nodes as attributes, not from a node's namespace axis. This
00028 permits simple document and element canonicalization without
00029 XPath. When XPath is used, the XPath result node list is passed and used to
00030 determine if the node is in the XPath result list, but little else.
00031 
00032 Authors:
00033     "Joseph M. Reagle Jr." <reagle@w3.org>
00034     "Rich Salz" <rsalz@zolera.com>
00035 
00036 $Date: 2006-03-30 23:47:16 +0000 (Thu, 30 Mar 2006) $ by $Author: boverhof $
00037 '''
00038 
00039 _copyright = '''Copyright 2001, Zolera Systems Inc.  All Rights Reserved.
00040 Copyright 2001, MIT. All Rights Reserved.
00041 
00042 Distributed under the terms of:
00043   Python 2.0 License or later.
00044   http://www.python.org/2.0.1/license.html
00045 or
00046   W3C Software License
00047   http://www.w3.org/Consortium/Legal/copyright-software-19980720
00048 '''
00049 
00050 import string
00051 from xml.dom import Node
00052 try:
00053     from xml.ns import XMLNS
00054 except:
00055     class XMLNS:
00056         BASE = "http://www.w3.org/2000/xmlns/"
00057         XML = "http://www.w3.org/XML/1998/namespace"
00058 try:
00059     import cStringIO
00060     StringIO = cStringIO
00061 except ImportError:
00062     import StringIO
00063 
00064 _attrs = lambda E: (E.attributes and E.attributes.values()) or []
00065 _children = lambda E: E.childNodes or []
00066 _IN_XML_NS = lambda n: n.name.startswith("xmlns")
00067 _inclusive = lambda n: n.unsuppressedPrefixes == None
00068 
00069 
00070 # Does a document/PI has lesser/greater document order than the
00071 # first element?
00072 _LesserElement, _Element, _GreaterElement = range(3)
00073 
00074 def _sorter(n1,n2):
00075     '''_sorter(n1,n2) -> int
00076     Sorting predicate for non-NS attributes.'''
00077 
00078     i = cmp(n1.namespaceURI, n2.namespaceURI)
00079     if i: return i
00080     return cmp(n1.localName, n2.localName)
00081 
00082 
00083 def _sorter_ns(n1,n2):
00084     '''_sorter_ns((n,v),(n,v)) -> int
00085     "(an empty namespace URI is lexicographically least)."'''
00086 
00087     if n1[0] == 'xmlns': return -1
00088     if n2[0] == 'xmlns': return 1
00089     return cmp(n1[0], n2[0])
00090 
00091 def _utilized(n, node, other_attrs, unsuppressedPrefixes):
00092     '''_utilized(n, node, other_attrs, unsuppressedPrefixes) -> boolean
00093     Return true if that nodespace is utilized within the node'''
00094     if n.startswith('xmlns:'):
00095         n = n[6:]
00096     elif n.startswith('xmlns'):
00097         n = n[5:]
00098     if (n=="" and node.prefix in ["#default", None]) or \
00099         n == node.prefix or n in unsuppressedPrefixes: 
00100             return 1
00101     for attr in other_attrs:
00102         if n == attr.prefix: return 1
00103     # For exclusive need to look at attributes
00104     if unsuppressedPrefixes is not None:
00105         for attr in _attrs(node):
00106             if n == attr.prefix: return 1
00107             
00108     return 0
00109 
00110 
00111 def _inclusiveNamespacePrefixes(node, context, unsuppressedPrefixes):
00112     '''http://www.w3.org/TR/xml-exc-c14n/ 
00113     InclusiveNamespaces PrefixList parameter, which lists namespace prefixes that 
00114     are handled in the manner described by the Canonical XML Recommendation'''
00115     inclusive = []
00116     if node.prefix:
00117         usedPrefixes = ['xmlns:%s' %node.prefix]
00118     else:
00119         usedPrefixes = ['xmlns']
00120 
00121     for a in _attrs(node):
00122         if a.nodeName.startswith('xmlns') or not a.prefix: continue
00123         usedPrefixes.append('xmlns:%s' %a.prefix)
00124 
00125     unused_namespace_dict = {}
00126     for attr in context:
00127         n = attr.nodeName
00128         if n in unsuppressedPrefixes:
00129             inclusive.append(attr)
00130         elif n.startswith('xmlns:') and n[6:] in unsuppressedPrefixes:
00131             inclusive.append(attr)
00132         elif n.startswith('xmlns') and n[5:] in unsuppressedPrefixes:
00133             inclusive.append(attr)
00134         elif attr.nodeName in usedPrefixes:
00135             inclusive.append(attr)
00136         elif n.startswith('xmlns:'):
00137             unused_namespace_dict[n] = attr.value
00138 
00139     return inclusive, unused_namespace_dict
00140 
00141 #_in_subset = lambda subset, node: not subset or node in subset
00142 _in_subset = lambda subset, node: subset is None or node in subset # rich's tweak
00143 
00144 
00145 class _implementation:
00146     '''Implementation class for C14N. This accompanies a node during it's
00147     processing and includes the parameters and processing state.'''
00148 
00149     # Handler for each node type; populated during module instantiation.
00150     handlers = {}
00151 
00152     def __init__(self, node, write, **kw):
00153         '''Create and run the implementation.'''
00154         self.write = write
00155         self.subset = kw.get('subset')
00156         self.comments = kw.get('comments', 0)
00157         self.unsuppressedPrefixes = kw.get('unsuppressedPrefixes')
00158         nsdict = kw.get('nsdict', { 'xml': XMLNS.XML, 'xmlns': XMLNS.BASE })
00159         
00160         # Processing state.
00161         self.state = (nsdict, {'xml':''}, {}, {}) #0422
00162         
00163         if node.nodeType == Node.DOCUMENT_NODE:
00164             self._do_document(node)
00165         elif node.nodeType == Node.ELEMENT_NODE:
00166             self.documentOrder = _Element        # At document element
00167             if not _inclusive(self):
00168                 inherited,unused = _inclusiveNamespacePrefixes(node, self._inherit_context(node), 
00169                                 self.unsuppressedPrefixes)
00170                 self._do_element(node, inherited, unused=unused)
00171             else:
00172                 inherited = self._inherit_context(node)
00173                 self._do_element(node, inherited)
00174         elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
00175             pass
00176         else:
00177             raise TypeError, str(node)
00178 
00179 
00180     def _inherit_context(self, node):
00181         '''_inherit_context(self, node) -> list
00182         Scan ancestors of attribute and namespace context.  Used only
00183         for single element node canonicalization, not for subset
00184         canonicalization.'''
00185 
00186         # Collect the initial list of xml:foo attributes.
00187         xmlattrs = filter(_IN_XML_NS, _attrs(node))
00188 
00189         # Walk up and get all xml:XXX attributes we inherit.
00190         inherited, parent = [], node.parentNode
00191         while parent and parent.nodeType == Node.ELEMENT_NODE:
00192             for a in filter(_IN_XML_NS, _attrs(parent)):
00193                 n = a.localName
00194                 if n not in xmlattrs:
00195                     xmlattrs.append(n)
00196                     inherited.append(a)
00197             parent = parent.parentNode
00198         return inherited
00199 
00200 
00201     def _do_document(self, node):
00202         '''_do_document(self, node) -> None
00203         Process a document node. documentOrder holds whether the document
00204         element has been encountered such that PIs/comments can be written
00205         as specified.'''
00206 
00207         self.documentOrder = _LesserElement
00208         for child in node.childNodes:
00209             if child.nodeType == Node.ELEMENT_NODE:
00210                 self.documentOrder = _Element        # At document element
00211                 self._do_element(child)
00212                 self.documentOrder = _GreaterElement # After document element
00213             elif child.nodeType == Node.PROCESSING_INSTRUCTION_NODE:
00214                 self._do_pi(child)
00215             elif child.nodeType == Node.COMMENT_NODE:
00216                 self._do_comment(child)
00217             elif child.nodeType == Node.DOCUMENT_TYPE_NODE:
00218                 pass
00219             else:
00220                 raise TypeError, str(child)
00221     handlers[Node.DOCUMENT_NODE] = _do_document
00222 
00223 
00224     def _do_text(self, node):
00225         '''_do_text(self, node) -> None
00226         Process a text or CDATA node.  Render various special characters
00227         as their C14N entity representations.'''
00228         if not _in_subset(self.subset, node): return
00229         s = string.replace(node.data, "&", "&amp;")
00230         s = string.replace(s, "<", "&lt;")
00231         s = string.replace(s, ">", "&gt;")
00232         s = string.replace(s, "\015", "&#xD;")
00233         if s: self.write(s)
00234     handlers[Node.TEXT_NODE] = _do_text
00235     handlers[Node.CDATA_SECTION_NODE] = _do_text
00236 
00237 
00238     def _do_pi(self, node):
00239         '''_do_pi(self, node) -> None
00240         Process a PI node. Render a leading or trailing #xA if the
00241         document order of the PI is greater or lesser (respectively)
00242         than the document element.
00243         '''
00244         if not _in_subset(self.subset, node): return
00245         W = self.write
00246         if self.documentOrder == _GreaterElement: W('\n')
00247         W('<?')
00248         W(node.nodeName)
00249         s = node.data
00250         if s:
00251             W(' ')
00252             W(s)
00253         W('?>')
00254         if self.documentOrder == _LesserElement: W('\n')
00255     handlers[Node.PROCESSING_INSTRUCTION_NODE] = _do_pi
00256 
00257 
00258     def _do_comment(self, node):
00259         '''_do_comment(self, node) -> None
00260         Process a comment node. Render a leading or trailing #xA if the
00261         document order of the comment is greater or lesser (respectively)
00262         than the document element.
00263         '''
00264         if not _in_subset(self.subset, node): return
00265         if self.comments:
00266             W = self.write
00267             if self.documentOrder == _GreaterElement: W('\n')
00268             W('<!--')
00269             W(node.data)
00270             W('-->')
00271             if self.documentOrder == _LesserElement: W('\n')
00272     handlers[Node.COMMENT_NODE] = _do_comment
00273 
00274 
00275     def _do_attr(self, n, value):
00276         ''''_do_attr(self, node) -> None
00277         Process an attribute.'''
00278 
00279         W = self.write
00280         W(' ')
00281         W(n)
00282         W('="')
00283         s = string.replace(value, "&", "&amp;")
00284         s = string.replace(s, "<", "&lt;")
00285         s = string.replace(s, '"', '&quot;')
00286         s = string.replace(s, '\011', '&#x9')
00287         s = string.replace(s, '\012', '&#xA')
00288         s = string.replace(s, '\015', '&#xD')
00289         W(s)
00290         W('"')
00291 
00292 
00293     def _do_element(self, node, initial_other_attrs = [], unused = None):
00294         '''_do_element(self, node, initial_other_attrs = [], unused = {}) -> None
00295         Process an element (and its children).'''
00296 
00297         # Get state (from the stack) make local copies.
00298         #   ns_parent -- NS declarations in parent
00299         #   ns_rendered -- NS nodes rendered by ancestors
00300         #        ns_local -- NS declarations relevant to this element
00301         #   xml_attrs -- Attributes in XML namespace from parent
00302         #       xml_attrs_local -- Local attributes in XML namespace.
00303         #   ns_unused_inherited -- not rendered namespaces, used for exclusive 
00304         ns_parent, ns_rendered, xml_attrs = \
00305                 self.state[0], self.state[1].copy(), self.state[2].copy() #0422
00306                 
00307         ns_unused_inherited = unused
00308         if unused is None:
00309             ns_unused_inherited = self.state[3].copy()
00310             
00311         ns_local = ns_parent.copy()
00312         inclusive = _inclusive(self)
00313         xml_attrs_local = {}
00314 
00315         # Divide attributes into NS, XML, and others.
00316         other_attrs = []
00317         in_subset = _in_subset(self.subset, node)
00318         for a in initial_other_attrs + _attrs(node):
00319             if a.namespaceURI == XMLNS.BASE:
00320                 n = a.nodeName
00321                 if n == "xmlns:": n = "xmlns"        # DOM bug workaround
00322                 ns_local[n] = a.nodeValue
00323             elif a.namespaceURI == XMLNS.XML:
00324                 if inclusive or (in_subset and  _in_subset(self.subset, a)): #020925 Test to see if attribute node in subset
00325                     xml_attrs_local[a.nodeName] = a #0426
00326             else:
00327                 if  _in_subset(self.subset, a):     #020925 Test to see if attribute node in subset
00328                     other_attrs.append(a)
00329                     
00330 #                # TODO: exclusive, might need to define xmlns:prefix here
00331 #                if not inclusive and a.prefix is not None and not ns_rendered.has_key('xmlns:%s' %a.prefix):
00332 #                    ns_local['xmlns:%s' %a.prefix] = ??
00333 
00334             #add local xml:foo attributes to ancestor's xml:foo attributes
00335             xml_attrs.update(xml_attrs_local)
00336 
00337         # Render the node
00338         W, name = self.write, None
00339         if in_subset: 
00340             name = node.nodeName
00341             if not inclusive:
00342                 if node.prefix is not None:
00343                     prefix = 'xmlns:%s' %node.prefix
00344                 else:
00345                     prefix = 'xmlns'
00346                     
00347                 if not ns_rendered.has_key(prefix) and not ns_local.has_key(prefix):
00348                     if not ns_unused_inherited.has_key(prefix):
00349                         raise RuntimeError,\
00350                             'For exclusive c14n, unable to map prefix "%s" in %s' %(
00351                             prefix, node)
00352                     
00353                     ns_local[prefix] = ns_unused_inherited[prefix]
00354                     del ns_unused_inherited[prefix]
00355                 
00356             W('<')
00357             W(name)
00358 
00359             # Create list of NS attributes to render.
00360             ns_to_render = []
00361             for n,v in ns_local.items():
00362 
00363                 # If default namespace is XMLNS.BASE or empty,
00364                 # and if an ancestor was the same
00365                 if n == "xmlns" and v in [ XMLNS.BASE, '' ] \
00366                 and ns_rendered.get('xmlns') in [ XMLNS.BASE, '', None ]:
00367                     continue
00368 
00369                 # "omit namespace node with local name xml, which defines
00370                 # the xml prefix, if its string value is
00371                 # http://www.w3.org/XML/1998/namespace."
00372                 if n in ["xmlns:xml", "xml"] \
00373                 and v in [ 'http://www.w3.org/XML/1998/namespace' ]:
00374                     continue
00375 
00376 
00377                 # If not previously rendered
00378                 # and it's inclusive  or utilized
00379                 if (n,v) not in ns_rendered.items():
00380                     if inclusive or _utilized(n, node, other_attrs, self.unsuppressedPrefixes):
00381                         ns_to_render.append((n, v))
00382                     elif not inclusive:
00383                         ns_unused_inherited[n] = v
00384 
00385             # Sort and render the ns, marking what was rendered.
00386             ns_to_render.sort(_sorter_ns)
00387             for n,v in ns_to_render:
00388                 self._do_attr(n, v)
00389                 ns_rendered[n]=v    #0417
00390 
00391             # If exclusive or the parent is in the subset, add the local xml attributes
00392             # Else, add all local and ancestor xml attributes
00393             # Sort and render the attributes.
00394             if not inclusive or _in_subset(self.subset,node.parentNode):  #0426
00395                 other_attrs.extend(xml_attrs_local.values())
00396             else:
00397                 other_attrs.extend(xml_attrs.values())
00398             other_attrs.sort(_sorter)
00399             for a in other_attrs:
00400                 self._do_attr(a.nodeName, a.value)
00401             W('>')
00402 
00403         # Push state, recurse, pop state.
00404         state, self.state = self.state, (ns_local, ns_rendered, xml_attrs, ns_unused_inherited)
00405         for c in _children(node):
00406             _implementation.handlers[c.nodeType](self, c)
00407         self.state = state
00408 
00409         if name: W('</%s>' % name)
00410     handlers[Node.ELEMENT_NODE] = _do_element
00411 
00412 
00413 def Canonicalize(node, output=None, **kw):
00414     '''Canonicalize(node, output=None, **kw) -> UTF-8
00415 
00416     Canonicalize a DOM document/element node and all descendents.
00417     Return the text; if output is specified then output.write will
00418     be called to output the text and None will be returned
00419     Keyword parameters:
00420         nsdict: a dictionary of prefix:uri namespace entries
00421                 assumed to exist in the surrounding context
00422         comments: keep comments if non-zero (default is 0)
00423         subset: Canonical XML subsetting resulting from XPath
00424                 (default is [])
00425         unsuppressedPrefixes: do exclusive C14N, and this specifies the
00426                 prefixes that should be inherited.
00427     '''
00428     if output:
00429         apply(_implementation, (node, output.write), kw)
00430     else:
00431         s = StringIO.StringIO()
00432         apply(_implementation, (node, s.write), kw)
00433         return s.getvalue()

Generated on Wed Oct 20 2010 11:12:16 for APBS by  doxygen 1.7.2