View Javadoc

1   // ========================================================================
2   // Copyright (c) 2004-2009 Mort Bay Consulting Pty. Ltd.
3   // ------------------------------------------------------------------------
4   // All rights reserved. This program and the accompanying materials
5   // are made available under the terms of the Eclipse Public License v1.0
6   // and Apache License v2.0 which accompanies this distribution.
7   // The Eclipse Public License is available at 
8   // http://www.eclipse.org/legal/epl-v10.html
9   // The Apache License v2.0 is available at
10  // http://www.opensource.org/licenses/apache2.0.php
11  // You may elect to redistribute this code under either of these licenses. 
12  // ========================================================================
13  
14  package org.eclipse.jetty.xml;
15  
16  import java.io.File;
17  import java.io.IOException;
18  import java.io.InputStream;
19  import java.net.URL;
20  import java.util.AbstractList;
21  import java.util.ArrayList;
22  import java.util.HashMap;
23  import java.util.Iterator;
24  import java.util.Map;
25  import java.util.NoSuchElementException;
26  import java.util.Stack;
27  import java.util.StringTokenizer;
28  
29  import javax.xml.parsers.SAXParser;
30  import javax.xml.parsers.SAXParserFactory;
31  
32  import org.eclipse.jetty.util.LazyList;
33  import org.eclipse.jetty.util.log.Log;
34  import org.eclipse.jetty.util.log.Logger;
35  import org.eclipse.jetty.util.resource.Resource;
36  import org.xml.sax.Attributes;
37  import org.xml.sax.ContentHandler;
38  import org.xml.sax.InputSource;
39  import org.xml.sax.SAXException;
40  import org.xml.sax.SAXParseException;
41  import org.xml.sax.XMLReader;
42  import org.xml.sax.helpers.DefaultHandler;
43  
44  /*--------------------------------------------------------------*/
45  /**
46   * XML Parser wrapper. This class wraps any standard JAXP1.1 parser with convieniant error and
47   * entity handlers and a mini dom-like document tree.
48   * <P>
49   * By default, the parser is created as a validating parser only if xerces is present. This can be 
50   * configured by setting the "org.eclipse.jetty.xml.XmlParser.Validating" system property.
51   * 
52   * 
53   */
54  public class XmlParser
55  {
56      private static final Logger LOG = Log.getLogger(XmlParser.class);
57  
58      private Map<String,URL> _redirectMap = new HashMap<String,URL>();
59      private SAXParser _parser;
60      private Map<String,ContentHandler> _observerMap;
61      private Stack<ContentHandler> _observers = new Stack<ContentHandler>();
62      private String _xpath;
63      private Object _xpaths;
64      private String _dtd;
65  
66      /* ------------------------------------------------------------ */
67      /**
68       * Construct
69       */
70      public XmlParser()
71      {
72          SAXParserFactory factory = SAXParserFactory.newInstance();
73          boolean validating_dft = factory.getClass().toString().startsWith("org.apache.xerces.");
74          String validating_prop = System.getProperty("org.eclipse.jetty.xml.XmlParser.Validating", validating_dft ? "true" : "false");
75          boolean validating = Boolean.valueOf(validating_prop).booleanValue();
76          setValidating(validating);
77      }
78  
79      /* ------------------------------------------------------------ */
80      /**
81       * Constructor.
82       */
83      public XmlParser(boolean validating)
84      {
85          setValidating(validating);
86      }
87      
88      /* ------------------------------------------------------------ */
89      public void setValidating(boolean validating)
90      {
91          try
92          {
93              SAXParserFactory factory = SAXParserFactory.newInstance();
94              factory.setValidating(validating);
95              _parser = factory.newSAXParser();
96              
97              try
98              {
99                  if (validating)
100                     _parser.getXMLReader().setFeature("http://apache.org/xml/features/validation/schema", validating);
101             }
102             catch (Exception e)
103             {
104                 if (validating)
105                     LOG.warn("Schema validation may not be supported: ", e);
106                 else
107                     LOG.ignore(e);
108             }
109 
110             _parser.getXMLReader().setFeature("http://xml.org/sax/features/validation", validating);
111             _parser.getXMLReader().setFeature("http://xml.org/sax/features/namespaces", true);
112             _parser.getXMLReader().setFeature("http://xml.org/sax/features/namespace-prefixes", false);  
113             try
114             {
115                 if (validating)
116                     _parser.getXMLReader().setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", validating);
117             }
118             catch (Exception e)
119             {
120                 LOG.warn(e.getMessage());
121             }
122         }
123         catch (Exception e)
124         {
125             LOG.warn(Log.EXCEPTION, e);
126             throw new Error(e.toString());
127         }
128     }
129     
130     /* ------------------------------------------------------------ */
131     /**
132      * @param name
133      * @param entity
134      */
135     public synchronized void redirectEntity(String name, URL entity)
136     {
137         if (entity != null)
138             _redirectMap.put(name, entity);
139     }
140 
141     /* ------------------------------------------------------------ */
142     /**
143      * 
144      * @return Returns the xpath.
145      */
146     public String getXpath()
147     {
148         return _xpath;
149     }
150 
151     /* ------------------------------------------------------------ */
152     /**
153      * Set an XPath A very simple subset of xpath is supported to select a partial tree. Currently
154      * only path like "/node1/nodeA | /node1/nodeB" are supported.
155      * 
156      * @param xpath The xpath to set.
157      */
158     public void setXpath(String xpath)
159     {
160         _xpath = xpath;
161         StringTokenizer tok = new StringTokenizer(xpath, "| ");
162         while (tok.hasMoreTokens())
163             _xpaths = LazyList.add(_xpaths, tok.nextToken());
164     }
165 
166     /* ------------------------------------------------------------ */
167     public String getDTD()
168     {
169         return _dtd;
170     }
171 
172     /* ------------------------------------------------------------ */
173     /**
174      * Add a ContentHandler. Add an additional _content handler that is triggered on a tag name. SAX
175      * events are passed to the ContentHandler provided from a matching start element to the
176      * corresponding end element. Only a single _content handler can be registered against each tag.
177      * 
178      * @param trigger Tag local or q name.
179      * @param observer SAX ContentHandler
180      */
181     public synchronized void addContentHandler(String trigger, ContentHandler observer)
182     {
183         if (_observerMap == null)
184             _observerMap = new HashMap();
185         _observerMap.put(trigger, observer);
186     }
187 
188     /* ------------------------------------------------------------ */
189     public synchronized Node parse(InputSource source) throws IOException, SAXException
190     {
191         _dtd=null;
192         Handler handler = new Handler();
193         XMLReader reader = _parser.getXMLReader();
194         reader.setContentHandler(handler);
195         reader.setErrorHandler(handler);
196         reader.setEntityResolver(handler);
197         if (LOG.isDebugEnabled())
198             LOG.debug("parsing: sid=" + source.getSystemId() + ",pid=" + source.getPublicId());
199         _parser.parse(source, handler);
200         if (handler._error != null)
201             throw handler._error;
202         Node doc = (Node) handler._top.get(0);
203         handler.clear();
204         return doc;
205     }
206 
207     /* ------------------------------------------------------------ */
208     /**
209      * Parse String URL.
210      */
211     public synchronized Node parse(String url) throws IOException, SAXException
212     {
213         if (LOG.isDebugEnabled())
214             LOG.debug("parse: " + url);
215         return parse(new InputSource(url));
216     }
217 
218     /* ------------------------------------------------------------ */
219     /**
220      * Parse File.
221      */
222     public synchronized Node parse(File file) throws IOException, SAXException
223     {
224         if (LOG.isDebugEnabled())
225             LOG.debug("parse: " + file);
226         return parse(new InputSource(Resource.toURL(file).toString()));
227     }
228 
229     /* ------------------------------------------------------------ */
230     /**
231      * Parse InputStream.
232      */
233     public synchronized Node parse(InputStream in) throws IOException, SAXException
234     {
235         _dtd=null;
236         Handler handler = new Handler();
237         XMLReader reader = _parser.getXMLReader();
238         reader.setContentHandler(handler);
239         reader.setErrorHandler(handler);
240         reader.setEntityResolver(handler);
241         _parser.parse(new InputSource(in), handler);
242         if (handler._error != null)
243             throw handler._error;
244         Node doc = (Node) handler._top.get(0);
245         handler.clear();
246         return doc;
247     }
248 
249     /* ------------------------------------------------------------ */
250     /* ------------------------------------------------------------ */
251     private class NoopHandler extends DefaultHandler
252     {
253         Handler _next;
254         int _depth;
255 
256         NoopHandler(Handler next)
257         {
258             this._next = next;
259         }
260 
261         /* ------------------------------------------------------------ */
262         public void startElement(String uri, String localName, String qName, Attributes attrs) throws SAXException
263         {
264             _depth++;
265         }
266 
267         /* ------------------------------------------------------------ */
268         public void endElement(String uri, String localName, String qName) throws SAXException
269         {
270             if (_depth == 0)
271                 _parser.getXMLReader().setContentHandler(_next);
272             else
273                 _depth--;
274         }
275     }
276     
277     /* ------------------------------------------------------------ */
278     /* ------------------------------------------------------------ */
279     private class Handler extends DefaultHandler
280     {
281         Node _top = new Node(null, null, null);
282         SAXParseException _error;
283         private Node _context = _top;
284         private NoopHandler _noop;
285 
286         Handler()
287         {
288             _noop = new NoopHandler(this);
289         }
290 
291         /* ------------------------------------------------------------ */
292         void clear()
293         {
294             _top = null;
295             _error = null;
296             _context = null;
297         }
298 
299         /* ------------------------------------------------------------ */
300         public void startElement(String uri, String localName, String qName, Attributes attrs) throws SAXException
301         {
302             String name = null;
303             if (_parser.isNamespaceAware())
304                 name = localName;
305 
306             if (name == null || "".equals(name))
307                 name = qName;
308 
309             Node node = new Node(_context, name, attrs);
310             
311 
312             // check if the node matches any xpaths set?
313             if (_xpaths != null)
314             {
315                 String path = node.getPath();
316                 boolean match = false;
317                 for (int i = LazyList.size(_xpaths); !match && i-- > 0;)
318                 {
319                     String xpath = (String) LazyList.get(_xpaths, i);
320 
321                     match = path.equals(xpath) || xpath.startsWith(path) && xpath.length() > path.length() && xpath.charAt(path.length()) == '/';
322                 }
323 
324                 if (match)
325                 {
326                     _context.add(node);
327                     _context = node;
328                 }
329                 else
330                 {
331                     _parser.getXMLReader().setContentHandler(_noop);
332                 }
333             }
334             else
335             {
336                 _context.add(node);
337                 _context = node;
338             }
339 
340             ContentHandler observer = null;
341             if (_observerMap != null)
342                 observer = (ContentHandler) _observerMap.get(name);
343             _observers.push(observer);
344 
345             for (int i = 0; i < _observers.size(); i++)
346                 if (_observers.get(i) != null)
347                     ((ContentHandler) _observers.get(i)).startElement(uri, localName, qName, attrs);
348         }
349 
350         /* ------------------------------------------------------------ */
351         public void endElement(String uri, String localName, String qName) throws SAXException
352         {
353             _context = _context._parent;
354             for (int i = 0; i < _observers.size(); i++)
355                 if (_observers.get(i) != null)
356                     ((ContentHandler) _observers.get(i)).endElement(uri, localName, qName);
357             _observers.pop();
358         }
359 
360         /* ------------------------------------------------------------ */
361         public void ignorableWhitespace(char buf[], int offset, int len) throws SAXException
362         {
363             for (int i = 0; i < _observers.size(); i++)
364                 if (_observers.get(i) != null)
365                     ((ContentHandler) _observers.get(i)).ignorableWhitespace(buf, offset, len);
366         }
367 
368         /* ------------------------------------------------------------ */
369         public void characters(char buf[], int offset, int len) throws SAXException
370         {
371             _context.add(new String(buf, offset, len));
372             for (int i = 0; i < _observers.size(); i++)
373                 if (_observers.get(i) != null)
374                     ((ContentHandler) _observers.get(i)).characters(buf, offset, len);
375         }
376 
377         /* ------------------------------------------------------------ */
378         public void warning(SAXParseException ex)
379         {
380             LOG.debug(Log.EXCEPTION, ex);
381             LOG.warn("WARNING@" + getLocationString(ex) + " : " + ex.toString());
382         }
383 
384         /* ------------------------------------------------------------ */
385         public void error(SAXParseException ex) throws SAXException
386         {
387             // Save error and continue to report other errors
388             if (_error == null)
389                 _error = ex;
390             LOG.debug(Log.EXCEPTION, ex);
391             LOG.warn("ERROR@" + getLocationString(ex) + " : " + ex.toString());
392         }
393 
394         /* ------------------------------------------------------------ */
395         public void fatalError(SAXParseException ex) throws SAXException
396         {
397             _error = ex;
398             LOG.debug(Log.EXCEPTION, ex);
399             LOG.warn("FATAL@" + getLocationString(ex) + " : " + ex.toString());
400             throw ex;
401         }
402 
403         /* ------------------------------------------------------------ */
404         private String getLocationString(SAXParseException ex)
405         {
406             return ex.getSystemId() + " line:" + ex.getLineNumber() + " col:" + ex.getColumnNumber();
407         }
408 
409         /* ------------------------------------------------------------ */
410         public InputSource resolveEntity(String pid, String sid)
411         {
412             if (LOG.isDebugEnabled())
413                 LOG.debug("resolveEntity(" + pid + ", " + sid + ")");
414             
415             if (sid!=null && sid.endsWith(".dtd"))
416                 _dtd=sid;
417             
418             URL entity = null;
419             if (pid != null)
420                 entity = (URL) _redirectMap.get(pid);
421             if (entity == null)
422                 entity = (URL) _redirectMap.get(sid);
423             if (entity == null)
424             {
425                 String dtd = sid;
426                 if (dtd.lastIndexOf('/') >= 0)
427                     dtd = dtd.substring(dtd.lastIndexOf('/') + 1);
428 
429                 if (LOG.isDebugEnabled())
430                     LOG.debug("Can't exact match entity in redirect map, trying " + dtd);
431                 entity = (URL) _redirectMap.get(dtd);
432             }
433 
434             if (entity != null)
435             {
436                 try
437                 {
438                     InputStream in = entity.openStream();
439                     if (LOG.isDebugEnabled())
440                         LOG.debug("Redirected entity " + sid + " --> " + entity);
441                     InputSource is = new InputSource(in);
442                     is.setSystemId(sid);
443                     return is;
444                 }
445                 catch (IOException e)
446                 {
447                     LOG.ignore(e);
448                 }
449             }
450             return null;
451         }
452     }
453 
454     /* ------------------------------------------------------------ */
455     /* ------------------------------------------------------------ */
456     /**
457      * XML Attribute.
458      */
459     public static class Attribute
460     {
461         private String _name;
462         private String _value;
463 
464         Attribute(String n, String v)
465         {
466             _name = n;
467             _value = v;
468         }
469 
470         public String getName()
471         {
472             return _name;
473         }
474 
475         public String getValue()
476         {
477             return _value;
478         }
479     }
480 
481     /* ------------------------------------------------------------ */
482     /* ------------------------------------------------------------ */
483     /**
484      * XML Node. Represents an XML element with optional attributes and ordered content.
485      */
486     public static class Node extends AbstractList<Object>
487     {
488         Node _parent;
489         private ArrayList<Object> _list;
490         private String _tag;
491         private Attribute[] _attrs;
492         private boolean _lastString = false;
493         private String _path;
494 
495         /* ------------------------------------------------------------ */
496         Node(Node parent, String tag, Attributes attrs)
497         {
498             _parent = parent;
499             _tag = tag;
500 
501             if (attrs != null)
502             {
503                 _attrs = new Attribute[attrs.getLength()];
504                 for (int i = 0; i < attrs.getLength(); i++)
505                 {
506                     String name = attrs.getLocalName(i);
507                     if (name == null || name.equals(""))
508                         name = attrs.getQName(i);
509                     _attrs[i] = new Attribute(name, attrs.getValue(i));
510                 }
511             }
512         }
513 
514         /* ------------------------------------------------------------ */
515         public Node getParent()
516         {
517             return _parent;
518         }
519 
520         /* ------------------------------------------------------------ */
521         public String getTag()
522         {
523             return _tag;
524         }
525 
526         /* ------------------------------------------------------------ */
527         public String getPath()
528         {
529             if (_path == null)
530             {
531                 if (getParent() != null && getParent().getTag() != null)
532                     _path = getParent().getPath() + "/" + _tag;
533                 else
534                     _path = "/" + _tag;
535             }
536             return _path;
537         }
538 
539         /* ------------------------------------------------------------ */
540         /**
541          * Get an array of element attributes.
542          */
543         public Attribute[] getAttributes()
544         {
545             return _attrs;
546         }
547 
548         /* ------------------------------------------------------------ */
549         /**
550          * Get an element attribute.
551          * 
552          * @return attribute or null.
553          */
554         public String getAttribute(String name)
555         {
556             return getAttribute(name, null);
557         }
558 
559         /* ------------------------------------------------------------ */
560         /**
561          * Get an element attribute.
562          * 
563          * @return attribute or null.
564          */
565         public String getAttribute(String name, String dft)
566         {
567             if (_attrs == null || name == null)
568                 return dft;
569             for (int i = 0; i < _attrs.length; i++)
570                 if (name.equals(_attrs[i].getName()))
571                     return _attrs[i].getValue();
572             return dft;
573         }
574 
575         /* ------------------------------------------------------------ */
576         /**
577          * Get the number of children nodes.
578          */
579         public int size()
580         {
581             if (_list != null)
582                 return _list.size();
583             return 0;
584         }
585 
586         /* ------------------------------------------------------------ */
587         /**
588          * Get the ith child node or content.
589          * 
590          * @return Node or String.
591          */
592         public Object get(int i)
593         {
594             if (_list != null)
595                 return _list.get(i);
596             return null;
597         }
598 
599         /* ------------------------------------------------------------ */
600         /**
601          * Get the first child node with the tag.
602          * 
603          * @param tag
604          * @return Node or null.
605          */
606         public Node get(String tag)
607         {
608             if (_list != null)
609             {
610                 for (int i = 0; i < _list.size(); i++)
611                 {
612                     Object o = _list.get(i);
613                     if (o instanceof Node)
614                     {
615                         Node n = (Node) o;
616                         if (tag.equals(n._tag))
617                             return n;
618                     }
619                 }
620             }
621             return null;
622         }
623 
624         /* ------------------------------------------------------------ */
625         @Override
626         public void add(int i, Object o)
627         {
628             if (_list == null)
629                 _list = new ArrayList<Object>();
630             if (o instanceof String)
631             {
632                 if (_lastString)
633                 {
634                     int last = _list.size() - 1;
635                     _list.set(last, (String) _list.get(last) + o);
636                 }
637                 else
638                     _list.add(i, o);
639                 _lastString = true;
640             }
641             else
642             {
643                 _lastString = false;
644                 _list.add(i, o);
645             }
646         }
647 
648         /* ------------------------------------------------------------ */
649         public void clear()
650         {
651             if (_list != null)
652                 _list.clear();
653             _list = null;
654         }
655 
656         /* ------------------------------------------------------------ */
657         /**
658          * Get a tag as a string.
659          * 
660          * @param tag The tag to get
661          * @param tags IF true, tags are included in the value.
662          * @param trim If true, trim the value.
663          * @return results of get(tag).toString(tags).
664          */
665         public String getString(String tag, boolean tags, boolean trim)
666         {
667             Node node = get(tag);
668             if (node == null)
669                 return null;
670             String s = node.toString(tags);
671             if (s != null && trim)
672                 s = s.trim();
673             return s;
674         }
675 
676         /* ------------------------------------------------------------ */
677         public synchronized String toString()
678         {
679             return toString(true);
680         }
681 
682         /* ------------------------------------------------------------ */
683         /**
684          * Convert to a string.
685          * 
686          * @param tag If false, only _content is shown.
687          */
688         public synchronized String toString(boolean tag)
689         {
690             StringBuilder buf = new StringBuilder();
691             toString(buf, tag);
692             return buf.toString();
693         }
694 
695         /* ------------------------------------------------------------ */
696         /**
697          * Convert to a string.
698          * 
699          * @param tag If false, only _content is shown.
700          */
701         public synchronized String toString(boolean tag, boolean trim)
702         {
703             String s = toString(tag);
704             if (s != null && trim)
705                 s = s.trim();
706             return s;
707         }
708 
709         /* ------------------------------------------------------------ */
710         private synchronized void toString(StringBuilder buf, boolean tag)
711         {
712             if (tag)
713             {
714                 buf.append("<");
715                 buf.append(_tag);
716 
717                 if (_attrs != null)
718                 {
719                     for (int i = 0; i < _attrs.length; i++)
720                     {
721                         buf.append(' ');
722                         buf.append(_attrs[i].getName());
723                         buf.append("=\"");
724                         buf.append(_attrs[i].getValue());
725                         buf.append("\"");
726                     }
727                 }
728             }
729 
730             if (_list != null)
731             {
732                 if (tag)
733                     buf.append(">");
734                 for (int i = 0; i < _list.size(); i++)
735                 {
736                     Object o = _list.get(i);
737                     if (o == null)
738                         continue;
739                     if (o instanceof Node)
740                         ((Node) o).toString(buf, tag);
741                     else
742                         buf.append(o.toString());
743                 }
744                 if (tag)
745                 {
746                     buf.append("</");
747                     buf.append(_tag);
748                     buf.append(">");
749                 }
750             }
751             else if (tag)
752                 buf.append("/>");
753         }
754 
755         /* ------------------------------------------------------------ */
756         /**
757          * Iterator over named child nodes.
758          * 
759          * @param tag The tag of the nodes.
760          * @return Iterator over all child nodes with the specified tag.
761          */
762         public Iterator<Node> iterator(final String tag)
763         {
764             return new Iterator<Node>()
765             {
766                 int c = 0;
767                 Node _node;
768 
769                 /* -------------------------------------------------- */
770                 public boolean hasNext()
771                 {
772                     if (_node != null)
773                         return true;
774                     while (_list != null && c < _list.size())
775                     {
776                         Object o = _list.get(c);
777                         if (o instanceof Node)
778                         {
779                             Node n = (Node) o;
780                             if (tag.equals(n._tag))
781                             {
782                                 _node = n;
783                                 return true;
784                             }
785                         }
786                         c++;
787                     }
788                     return false;
789                 }
790 
791                 /* -------------------------------------------------- */
792                 public Node next()
793                 {
794                     try
795                     {
796                         if (hasNext())
797                             return _node;
798                         throw new NoSuchElementException();
799                     }
800                     finally
801                     {
802                         _node = null;
803                         c++;
804                     }
805                 }
806 
807                 /* -------------------------------------------------- */
808                 public void remove()
809                 {
810                     throw new UnsupportedOperationException("Not supported");
811                 }
812             };
813         }
814     }
815 }