• Main Page
  • Related Pages
  • Modules
  • Namespaces
  • Data Structures
  • Files
  • File List
  • Globals

contrib/opal/ZSI/build/lib/ZSI/parse.py

00001 #! /usr/bin/env python
00002 # $Header$
00003 '''SOAP messaging parsing.
00004 '''
00005 
00006 from xml.dom import expatbuilder
00007 from ZSI import _copyright, _children, _attrs, _child_elements, _stringtypes, \
00008         _backtrace, EvaluateException, ParseException, _valid_encoding, \
00009         _Node, _find_attr, _resolve_prefix
00010 from ZSI.TC import AnyElement
00011 import types
00012 
00013 from ZSI.wstools.Namespaces import SOAP, XMLNS
00014 from ZSI.wstools.Utility import SplitQName
00015 
00016 _find_actor = lambda E: E.getAttributeNS(SOAP.ENV, "actor") or None
00017 _find_mu = lambda E: E.getAttributeNS(SOAP.ENV, "mustUnderstand")
00018 _find_root = lambda E: E.getAttributeNS(SOAP.ENC, "root")
00019 _find_id = lambda E: _find_attr(E, 'id')
00020 
00021 class DefaultReader:
00022     """ExpatReaderClass"""
00023     fromString = staticmethod(expatbuilder.parseString)
00024     fromStream = staticmethod(expatbuilder.parse)
00025 
00026 class ParsedSoap:
00027     '''A Parsed SOAP object.
00028         Convert the text to a DOM tree and parse SOAP elements.
00029         Instance data:
00030             reader -- the DOM reader
00031             dom -- the DOM object
00032             ns_cache -- dictionary (by id(node)) of namespace dictionaries
00033             id_cache -- dictionary (by XML ID attr) of elements
00034             envelope -- the node holding the SOAP Envelope
00035             header -- the node holding the SOAP Header (or None)
00036             body -- the node holding the SOAP Body
00037             body_root -- the serialization root in the SOAP Body
00038             data_elements -- list of non-root elements in the SOAP Body
00039             trailer_elements -- list of elements following the SOAP body
00040     '''
00041     defaultReaderClass = DefaultReader
00042 
00043     def __init__(self, input, readerclass=None, keepdom=False,
00044     trailers=False, resolver=None,  envelope=True, **kw):
00045         '''Initialize.
00046         Keyword arguments:
00047             trailers -- allow trailer elments (default is zero)
00048             resolver -- function (bound method) to resolve URI's
00049             readerclass -- factory class to create a reader
00050             keepdom -- do not release the DOM
00051             envelope -- look for a SOAP envelope.
00052         '''
00053 
00054         self.readerclass = readerclass
00055         self.keepdom = keepdom
00056         if not self.readerclass:
00057             self.readerclass = self.defaultReaderClass
00058 
00059         try:
00060             self.reader = self.readerclass()
00061             if type(input) in _stringtypes:
00062                 self.dom = self.reader.fromString(input)
00063             else:
00064                 self.dom = self.reader.fromStream(input)
00065         except Exception, e:
00066             # Is this in the header?  Your guess is as good as mine.
00067             #raise ParseException("Can't parse document (" + \
00068             #    str(e.__class__) + "): " + str(e), 0)
00069             raise
00070 
00071         self.ns_cache = {
00072             id(self.dom): {
00073                 'xml': XMLNS.XML,
00074                 'xmlns': XMLNS.BASE,
00075                 '': ''
00076             }
00077         }
00078         self.trailers, self.resolver, self.id_cache = trailers, resolver, {}
00079 
00080         # Exactly one child element
00081         c = [ E for E in _children(self.dom)
00082                 if E.nodeType == _Node.ELEMENT_NODE]
00083         if len(c) == 0:
00084             raise ParseException("Document has no Envelope", 0)
00085         if len(c) != 1:
00086             raise ParseException("Document has extra child elements", 0)
00087 
00088         if envelope is False:
00089             self.body_root = c[0]
00090             return
00091 
00092         # And that one child must be the Envelope
00093         elt = c[0]
00094         if elt.localName != "Envelope" \
00095         or elt.namespaceURI != SOAP.ENV:
00096             raise ParseException('Document has "' + elt.localName + \
00097                 '" element, not Envelope', 0)
00098         self._check_for_legal_children("Envelope", elt)
00099         for a in _attrs(elt):
00100             name = a.nodeName
00101             if name.find(":") == -1 and name not in [ "xmlns", "id" ]:
00102                 raise ParseException('Unqualified attribute "' + \
00103                         name + '" in Envelope', 0)
00104         self.envelope = elt
00105         if not _valid_encoding(self.envelope):
00106             raise ParseException("Envelope has invalid encoding", 0)
00107 
00108         # Get Envelope's child elements.
00109         c = [ E for E in _children(self.envelope)
00110                 if E.nodeType == _Node.ELEMENT_NODE ]
00111         if len(c) == 0:
00112             raise ParseException("Envelope is empty (no Body)", 0)
00113 
00114         # Envelope's first child might be the header; if so, nip it off.
00115         elt = c[0]
00116         if elt.localName == "Header" \
00117         and elt.namespaceURI == SOAP.ENV:
00118             self._check_for_legal_children("Header", elt)
00119             self._check_for_pi_nodes(_children(elt), 1)
00120             self.header = c.pop(0)
00121             self.header_elements = _child_elements(self.header)
00122         else:
00123             self.header, self.header_elements = None, []
00124 
00125         # Now the first child must be the body
00126         if len(c) == 0:
00127             raise ParseException("Envelope has header but no Body", 0)
00128         elt = c.pop(0)
00129         if elt.localName != "Body" \
00130         or elt.namespaceURI != SOAP.ENV:
00131             if self.header:
00132                 raise ParseException('Header followed by "' + \
00133                         elt.localName + \
00134                         '" element, not Body', 0, elt, self.dom)
00135             else:
00136                 raise ParseException('Document has "' + \
00137                         elt.localName + \
00138                         '" element, not Body', 0, elt, self.dom)
00139         self._check_for_legal_children("Body", elt, 0)
00140         self._check_for_pi_nodes(_children(elt), 0)
00141         self.body = elt
00142         if not _valid_encoding(self.body):
00143             raise ParseException("Body has invalid encoding", 0)
00144 
00145         # Trailer elements.
00146         if not self.trailers:
00147             if len(c):
00148                 raise ParseException("Element found after Body",
00149                         0, elt, self.dom)
00150             # Don't set self.trailer_elements = []; if user didn't ask
00151             # for trailers we *want* to throw an exception.
00152         else:
00153             self.trailer_elements = c
00154             for elt in self.trailer_elements:
00155                 if not elt.namespaceURI:
00156                     raise ParseException('Unqualified trailer element',
00157                             0, elt, self.dom)
00158 
00159         # Find the serialization root.  Divide the Body children into
00160         # root (root=1), no (root=0), maybe (no root attribute).
00161         self.body_root, no, maybe = None, [], []
00162         for elt in _child_elements(self.body):
00163             root = _find_root(elt)
00164             if root == "1":
00165                 if self.body_root:
00166                     raise ParseException("Multiple seralization roots found",
00167                             0, elt, self.dom)
00168                 self.body_root = elt
00169             elif root == "0":
00170                 no.append(elt)
00171             elif not root:
00172                 maybe.append(elt)
00173             else:
00174                 raise ParseException('Illegal value for root attribute',
00175                         0, elt, self.dom)
00176 
00177         # If we didn't find a root, get the first one that didn't
00178         # say "not me", unless they all said "not me."
00179         if self.body_root is None:
00180             if len(maybe):
00181                 self.body_root = maybe[0]
00182             else:
00183                 raise ParseException('No serialization root found',
00184                         0, self.body, self.dom)
00185         if not _valid_encoding(self.body_root):
00186             raise ParseException("Invalid encoding", 0,
00187                     elt, self.dom)
00188 
00189         # Now get all the non-roots (in order!).
00190         rootid = id(self.body_root)
00191         self.data_elements = [ E for E in _child_elements(self.body)
00192                                 if id(E) != rootid ]
00193         self._check_for_pi_nodes(self.data_elements, 0)
00194 
00195     def __del__(self):
00196         try:
00197             if not self.keepdom:
00198                 self.reader.releaseNode(self.dom)
00199         except:
00200             pass
00201 
00202     def _check_for_legal_children(self, name, elt, mustqualify=1):
00203         '''Check if all children of this node are elements or whitespace-only
00204         text nodes.
00205         '''
00206         inheader = name == "Header"
00207         for n in _children(elt):
00208             t = n.nodeType
00209             if t == _Node.COMMENT_NODE: continue
00210             if t != _Node.ELEMENT_NODE:
00211                 if t == _Node.TEXT_NODE and n.nodeValue.strip() == "":
00212                     continue
00213                 raise ParseException("Non-element child in " + name, 
00214                         inheader, elt, self.dom)
00215             if mustqualify and not n.namespaceURI:
00216                 raise ParseException('Unqualified element "' + \
00217                         n.nodeName + '" in ' + name, inheader, elt, self.dom)
00218 
00219     def _check_for_pi_nodes(self, list, inheader):
00220         '''Raise an exception if any of the list descendants are PI nodes.
00221         '''
00222         list = list[:]
00223         while list:
00224             elt = list.pop()
00225             t = elt.nodeType
00226             if t == _Node.PROCESSING_INSTRUCTION_NODE:
00227                 raise ParseException('Found processing instruction "<?' + \
00228                         elt.nodeName + '...>"',
00229                         inheader, elt.parentNode, self.dom)
00230             elif t == _Node.DOCUMENT_TYPE_NODE:
00231                 raise ParseException('Found DTD', inheader,
00232                         elt.parentNode, self.dom)
00233             list += _children(elt)
00234 
00235     def Backtrace(self, elt):
00236         '''Return a human-readable "backtrace" from the document root to
00237         the specified element.
00238         '''
00239         return _backtrace(elt, self.dom)
00240 
00241     def FindLocalHREF(self, href, elt, headers=1):
00242         '''Find a local HREF in the data elements.
00243         '''
00244         if href[0] != '#':
00245             raise EvaluateException(
00246                 'Absolute HREF ("%s") not implemented' % href,
00247                 self.Backtrace(elt))
00248         frag = href[1:]
00249         # Already found?
00250         e = self.id_cache.get(frag)
00251         if e: return e
00252         # Do a breadth-first search, in the data first.  Most likely
00253         # to find multi-ref targets shallow in the data area.
00254         list = self.data_elements[:] + [self.body_root]
00255         if headers: list.extend(self.header_elements)
00256         while list:
00257             e = list.pop()
00258             if e.nodeType == _Node.ELEMENT_NODE:
00259                 nodeid = _find_id(e)
00260                 if nodeid:
00261                     self.id_cache[nodeid] = e
00262                     if nodeid == frag: return e
00263             list += _children(e)
00264         raise EvaluateException('''Can't find node for HREF "%s"''' % href,
00265                 self.Backtrace(elt))
00266 
00267     def ResolveHREF(self, uri, tc, **keywords):
00268         r = getattr(tc, 'resolver', self.resolver)
00269         if not r:
00270             raise EvaluateException('No resolver for "' + uri + '"')
00271         try:
00272             if type(uri) == types.UnicodeType: uri = str(uri)
00273             retval = r(uri, tc, self, **keywords)
00274         except Exception, e:
00275             raise EvaluateException('''Can't resolve "''' + uri + '" (' + \
00276                 str(e.__class__) + "): " + str(e))
00277         return retval
00278 
00279     def GetMyHeaderElements(self, actorlist=None):
00280         '''Return a list of all elements intended for these actor(s).
00281         '''
00282         if actorlist is None:
00283             actorlist = [None, SOAP.ACTOR_NEXT]
00284         else:
00285             actorlist = list(actorlist) + [None, SOAP.ACTOR_NEXT]
00286         return [ E for E in self.header_elements
00287                 if _find_actor(E) in actorlist ]
00288 
00289     def GetElementNSdict(self, elt):
00290         '''Get a dictionary of all the namespace attributes for the indicated
00291         element.  The dictionaries are cached, and we recurse up the tree
00292         as necessary.
00293         '''
00294         d = self.ns_cache.get(id(elt))
00295         if not d:
00296             if elt != self.dom: d = self.GetElementNSdict(elt.parentNode)
00297             for a in _attrs(elt):
00298                 if a.namespaceURI == XMLNS.BASE:
00299                     if a.localName == "xmlns":
00300                         d[''] = a.nodeValue
00301                     else:
00302                         d[a.localName] = a.nodeValue
00303             self.ns_cache[id(elt)] = d
00304         return d.copy()
00305 
00306     def GetDomAndReader(self):
00307         '''Returns a tuple containing the dom and reader objects. (dom, reader)
00308         Unless keepdom is true, the dom and reader objects will go out of scope
00309         when the ParsedSoap instance is deleted. If keepdom is true, the reader
00310         object is needed to properly clean up the dom tree with
00311         reader.releaseNode(dom).
00312         '''
00313         return (self.dom, self.reader)
00314 
00315     def IsAFault(self):
00316         '''Is this a fault message?
00317         '''
00318         e = self.body_root
00319         if not e: return 0
00320         return e.namespaceURI == SOAP.ENV and e.localName == 'Fault'
00321 
00322     def Parse(self, how):
00323         '''Parse the message.
00324         '''
00325         if type(how) == types.ClassType: how = how.typecode
00326         return how.parse(self.body_root, self)
00327 
00328     def WhatMustIUnderstand(self):
00329         '''Return a list of (uri,localname) tuples for all elements in the
00330         header that have mustUnderstand set.
00331         '''
00332         return [ ( E.namespaceURI, E.localName )
00333                 for E in self.header_elements if _find_mu(E) == "1" ]
00334 
00335     def WhatActorsArePresent(self):
00336         '''Return a list of URI's of all the actor attributes found in
00337         the header.  The special actor "next" is ignored.
00338         '''
00339         results = []
00340         for E in self.header_elements:
00341             a = _find_actor(E)
00342             if a not in [ None, SOAP.ACTOR_NEXT ]: results.append(a)
00343         return results
00344 
00345     def ParseHeaderElements(self, ofwhat):
00346         '''Returns a dictionary of pyobjs.
00347         ofhow -- list of typecodes w/matching nspname/pname to the header_elements.
00348         '''
00349         d = {}
00350         lenofwhat = len(ofwhat)
00351         c, crange = self.header_elements[:], range(len(self.header_elements))
00352         for i,what in [ (i, ofwhat[i]) for i in range(lenofwhat) ]:
00353             if isinstance(what, AnyElement): 
00354                 raise EvaluateException, 'not supporting <any> as child of SOAP-ENC:Header'
00355 
00356             v = []
00357             occurs = 0
00358             namespaceURI,tagName = what.nspname,what.pname
00359             for j,c_elt in [ (j, c[j]) for j in crange if c[j] ]:
00360                 prefix,name = SplitQName(c_elt.tagName)
00361                 nsuri = _resolve_prefix(c_elt, prefix)
00362                 if tagName == name and namespaceURI == nsuri:
00363                     pyobj = what.parse(c_elt, self)
00364                 else:
00365                     continue
00366                 v.append(pyobj)
00367                 c[j] = None
00368             if what.minOccurs > len(v) > what.maxOccurs:
00369                raise EvaluateException, 'number of occurances(%d) doesnt fit constraints (%d,%s)'\
00370                    %(len(v),what.minOccurs,what.maxOccurs)
00371             if what.maxOccurs == 1:
00372                 if len(v) == 0: v = None
00373                 else: v = v[0]
00374             d[(what.nspname,what.pname)] = v
00375         return d
00376 
00377 
00378 if __name__ == '__main__': print _copyright

Generated on Wed Oct 20 2010 11:12:16 for APBS by  doxygen 1.7.2