View Javadoc

1   //
2   //  ========================================================================
3   //  Copyright (c) 1995-2013 Mort Bay Consulting Pty. Ltd.
4   //  ------------------------------------------------------------------------
5   //  All rights reserved. This program and the accompanying materials
6   //  are made available under the terms of the Eclipse Public License v1.0
7   //  and Apache License v2.0 which accompanies this distribution.
8   //
9   //      The Eclipse Public License is available at
10  //      http://www.eclipse.org/legal/epl-v10.html
11  //
12  //      The Apache License v2.0 is available at
13  //      http://www.opensource.org/licenses/apache2.0.php
14  //
15  //  You may elect to redistribute this code under either of these licenses.
16  //  ========================================================================
17  //
18  
19  package org.eclipse.jetty.xml;
20  
21  import java.io.File;
22  import java.io.IOException;
23  import java.io.InputStream;
24  import java.net.URL;
25  import java.util.AbstractList;
26  import java.util.ArrayList;
27  import java.util.HashMap;
28  import java.util.Iterator;
29  import java.util.Map;
30  import java.util.NoSuchElementException;
31  import java.util.Stack;
32  import java.util.StringTokenizer;
33  
34  import javax.xml.parsers.SAXParser;
35  import javax.xml.parsers.SAXParserFactory;
36  
37  import org.eclipse.jetty.util.LazyList;
38  import org.eclipse.jetty.util.log.Log;
39  import org.eclipse.jetty.util.log.Logger;
40  import org.eclipse.jetty.util.resource.Resource;
41  import org.xml.sax.Attributes;
42  import org.xml.sax.ContentHandler;
43  import org.xml.sax.InputSource;
44  import org.xml.sax.SAXException;
45  import org.xml.sax.SAXParseException;
46  import org.xml.sax.XMLReader;
47  import org.xml.sax.helpers.DefaultHandler;
48  
49  /*--------------------------------------------------------------*/
50  /**
51   * XML Parser wrapper. This class wraps any standard JAXP1.1 parser with convieniant error and
52   * entity handlers and a mini dom-like document tree.
53   * <P>
54   * By default, the parser is created as a validating parser only if xerces is present. This can be
55   * configured by setting the "org.eclipse.jetty.xml.XmlParser.Validating" system property.
56   *
57   *
58   */
59  public class XmlParser
60  {
61      private static final Logger LOG = Log.getLogger(XmlParser.class);
62  
63      private Map<String,URL> _redirectMap = new HashMap<String,URL>();
64      private SAXParser _parser;
65      private Map<String,ContentHandler> _observerMap;
66      private Stack<ContentHandler> _observers = new Stack<ContentHandler>();
67      private String _xpath;
68      private Object _xpaths;
69      private String _dtd;
70  
71      /* ------------------------------------------------------------ */
72      /**
73       * Construct
74       */
75      public XmlParser()
76      {
77          SAXParserFactory factory = SAXParserFactory.newInstance();
78          boolean validating_dft = factory.getClass().toString().startsWith("org.apache.xerces.");
79          String validating_prop = System.getProperty("org.eclipse.jetty.xml.XmlParser.Validating", validating_dft ? "true" : "false");
80          boolean validating = Boolean.valueOf(validating_prop).booleanValue();
81          setValidating(validating);
82      }
83  
84      /* ------------------------------------------------------------ */
85      /**
86       * Constructor.
87       */
88      public XmlParser(boolean validating)
89      {
90          setValidating(validating);
91      }
92  
93      /* ------------------------------------------------------------ */
94      public void setValidating(boolean validating)
95      {
96          try
97          {
98              SAXParserFactory factory = SAXParserFactory.newInstance();
99              factory.setValidating(validating);
100             _parser = factory.newSAXParser();
101 
102             try
103             {
104                 if (validating)
105                     _parser.getXMLReader().setFeature("http://apache.org/xml/features/validation/schema", validating);
106             }
107             catch (Exception e)
108             {
109                 if (validating)
110                     LOG.warn("Schema validation may not be supported: ", e);
111                 else
112                     LOG.ignore(e);
113             }
114 
115             _parser.getXMLReader().setFeature("http://xml.org/sax/features/validation", validating);
116             _parser.getXMLReader().setFeature("http://xml.org/sax/features/namespaces", true);
117             _parser.getXMLReader().setFeature("http://xml.org/sax/features/namespace-prefixes", false);
118             try
119             {
120                 if (validating)
121                     _parser.getXMLReader().setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", validating);
122             }
123             catch (Exception e)
124             {
125                 LOG.warn(e.getMessage());
126             }
127         }
128         catch (Exception e)
129         {
130             LOG.warn(Log.EXCEPTION, e);
131             throw new Error(e.toString());
132         }
133     }
134 
135     /* ------------------------------------------------------------ */
136     /**
137      * @param name
138      * @param entity
139      */
140     public synchronized void redirectEntity(String name, URL entity)
141     {
142         if (entity != null)
143             _redirectMap.put(name, entity);
144     }
145 
146     /* ------------------------------------------------------------ */
147     /**
148      *
149      * @return Returns the xpath.
150      */
151     public String getXpath()
152     {
153         return _xpath;
154     }
155 
156     /* ------------------------------------------------------------ */
157     /**
158      * Set an XPath A very simple subset of xpath is supported to select a partial tree. Currently
159      * only path like "/node1/nodeA | /node1/nodeB" are supported.
160      *
161      * @param xpath The xpath to set.
162      */
163     public void setXpath(String xpath)
164     {
165         _xpath = xpath;
166         StringTokenizer tok = new StringTokenizer(xpath, "| ");
167         while (tok.hasMoreTokens())
168             _xpaths = LazyList.add(_xpaths, tok.nextToken());
169     }
170 
171     /* ------------------------------------------------------------ */
172     public String getDTD()
173     {
174         return _dtd;
175     }
176 
177     /* ------------------------------------------------------------ */
178     /**
179      * Add a ContentHandler. Add an additional _content handler that is triggered on a tag name. SAX
180      * events are passed to the ContentHandler provided from a matching start element to the
181      * corresponding end element. Only a single _content handler can be registered against each tag.
182      *
183      * @param trigger Tag local or q name.
184      * @param observer SAX ContentHandler
185      */
186     public synchronized void addContentHandler(String trigger, ContentHandler observer)
187     {
188         if (_observerMap == null)
189             _observerMap = new HashMap();
190         _observerMap.put(trigger, observer);
191     }
192 
193     /* ------------------------------------------------------------ */
194     public synchronized Node parse(InputSource source) throws IOException, SAXException
195     {
196         _dtd=null;
197         Handler handler = new Handler();
198         XMLReader reader = _parser.getXMLReader();
199         reader.setContentHandler(handler);
200         reader.setErrorHandler(handler);
201         reader.setEntityResolver(handler);
202         if (LOG.isDebugEnabled())
203             LOG.debug("parsing: sid=" + source.getSystemId() + ",pid=" + source.getPublicId());
204         _parser.parse(source, handler);
205         if (handler._error != null)
206             throw handler._error;
207         Node doc = (Node) handler._top.get(0);
208         handler.clear();
209         return doc;
210     }
211 
212     /* ------------------------------------------------------------ */
213     /**
214      * Parse String URL.
215      */
216     public synchronized Node parse(String url) throws IOException, SAXException
217     {
218         if (LOG.isDebugEnabled())
219             LOG.debug("parse: " + url);
220         return parse(new InputSource(url));
221     }
222 
223     /* ------------------------------------------------------------ */
224     /**
225      * Parse File.
226      */
227     public synchronized Node parse(File file) throws IOException, SAXException
228     {
229         if (LOG.isDebugEnabled())
230             LOG.debug("parse: " + file);
231         return parse(new InputSource(Resource.toURL(file).toString()));
232     }
233 
234     /* ------------------------------------------------------------ */
235     /**
236      * Parse InputStream.
237      */
238     public synchronized Node parse(InputStream in) throws IOException, SAXException
239     {
240         _dtd=null;
241         Handler handler = new Handler();
242         XMLReader reader = _parser.getXMLReader();
243         reader.setContentHandler(handler);
244         reader.setErrorHandler(handler);
245         reader.setEntityResolver(handler);
246         _parser.parse(new InputSource(in), handler);
247         if (handler._error != null)
248             throw handler._error;
249         Node doc = (Node) handler._top.get(0);
250         handler.clear();
251         return doc;
252     }
253 
254     /* ------------------------------------------------------------ */
255     /* ------------------------------------------------------------ */
256     private class NoopHandler extends DefaultHandler
257     {
258         Handler _next;
259         int _depth;
260 
261         NoopHandler(Handler next)
262         {
263             this._next = next;
264         }
265 
266         /* ------------------------------------------------------------ */
267         public void startElement(String uri, String localName, String qName, Attributes attrs) throws SAXException
268         {
269             _depth++;
270         }
271 
272         /* ------------------------------------------------------------ */
273         public void endElement(String uri, String localName, String qName) throws SAXException
274         {
275             if (_depth == 0)
276                 _parser.getXMLReader().setContentHandler(_next);
277             else
278                 _depth--;
279         }
280     }
281 
282     /* ------------------------------------------------------------ */
283     /* ------------------------------------------------------------ */
284     private class Handler extends DefaultHandler
285     {
286         Node _top = new Node(null, null, null);
287         SAXParseException _error;
288         private Node _context = _top;
289         private NoopHandler _noop;
290 
291         Handler()
292         {
293             _noop = new NoopHandler(this);
294         }
295 
296         /* ------------------------------------------------------------ */
297         void clear()
298         {
299             _top = null;
300             _error = null;
301             _context = null;
302         }
303 
304         /* ------------------------------------------------------------ */
305         public void startElement(String uri, String localName, String qName, Attributes attrs) throws SAXException
306         {
307             String name = null;
308             if (_parser.isNamespaceAware())
309                 name = localName;
310 
311             if (name == null || "".equals(name))
312                 name = qName;
313 
314             Node node = new Node(_context, name, attrs);
315 
316 
317             // check if the node matches any xpaths set?
318             if (_xpaths != null)
319             {
320                 String path = node.getPath();
321                 boolean match = false;
322                 for (int i = LazyList.size(_xpaths); !match && i-- > 0;)
323                 {
324                     String xpath = (String) LazyList.get(_xpaths, i);
325 
326                     match = path.equals(xpath) || xpath.startsWith(path) && xpath.length() > path.length() && xpath.charAt(path.length()) == '/';
327                 }
328 
329                 if (match)
330                 {
331                     _context.add(node);
332                     _context = node;
333                 }
334                 else
335                 {
336                     _parser.getXMLReader().setContentHandler(_noop);
337                 }
338             }
339             else
340             {
341                 _context.add(node);
342                 _context = node;
343             }
344 
345             ContentHandler observer = null;
346             if (_observerMap != null)
347                 observer = (ContentHandler) _observerMap.get(name);
348             _observers.push(observer);
349 
350             for (int i = 0; i < _observers.size(); i++)
351                 if (_observers.get(i) != null)
352                     ((ContentHandler) _observers.get(i)).startElement(uri, localName, qName, attrs);
353         }
354 
355         /* ------------------------------------------------------------ */
356         public void endElement(String uri, String localName, String qName) throws SAXException
357         {
358             _context = _context._parent;
359             for (int i = 0; i < _observers.size(); i++)
360                 if (_observers.get(i) != null)
361                     ((ContentHandler) _observers.get(i)).endElement(uri, localName, qName);
362             _observers.pop();
363         }
364 
365         /* ------------------------------------------------------------ */
366         public void ignorableWhitespace(char buf[], int offset, int len) throws SAXException
367         {
368             for (int i = 0; i < _observers.size(); i++)
369                 if (_observers.get(i) != null)
370                     ((ContentHandler) _observers.get(i)).ignorableWhitespace(buf, offset, len);
371         }
372 
373         /* ------------------------------------------------------------ */
374         public void characters(char buf[], int offset, int len) throws SAXException
375         {
376             _context.add(new String(buf, offset, len));
377             for (int i = 0; i < _observers.size(); i++)
378                 if (_observers.get(i) != null)
379                     ((ContentHandler) _observers.get(i)).characters(buf, offset, len);
380         }
381 
382         /* ------------------------------------------------------------ */
383         public void warning(SAXParseException ex)
384         {
385             LOG.debug(Log.EXCEPTION, ex);
386             LOG.warn("WARNING@" + getLocationString(ex) + " : " + ex.toString());
387         }
388 
389         /* ------------------------------------------------------------ */
390         public void error(SAXParseException ex) throws SAXException
391         {
392             // Save error and continue to report other errors
393             if (_error == null)
394                 _error = ex;
395             LOG.debug(Log.EXCEPTION, ex);
396             LOG.warn("ERROR@" + getLocationString(ex) + " : " + ex.toString());
397         }
398 
399         /* ------------------------------------------------------------ */
400         public void fatalError(SAXParseException ex) throws SAXException
401         {
402             _error = ex;
403             LOG.debug(Log.EXCEPTION, ex);
404             LOG.warn("FATAL@" + getLocationString(ex) + " : " + ex.toString());
405             throw ex;
406         }
407 
408         /* ------------------------------------------------------------ */
409         private String getLocationString(SAXParseException ex)
410         {
411             return ex.getSystemId() + " line:" + ex.getLineNumber() + " col:" + ex.getColumnNumber();
412         }
413 
414         /* ------------------------------------------------------------ */
415         public InputSource resolveEntity(String pid, String sid)
416         {
417             if (LOG.isDebugEnabled())
418                 LOG.debug("resolveEntity(" + pid + ", " + sid + ")");
419 
420             if (sid!=null && sid.endsWith(".dtd"))
421                 _dtd=sid;
422 
423             URL entity = null;
424             if (pid != null)
425                 entity = (URL) _redirectMap.get(pid);
426             if (entity == null)
427                 entity = (URL) _redirectMap.get(sid);
428             if (entity == null)
429             {
430                 String dtd = sid;
431                 if (dtd.lastIndexOf('/') >= 0)
432                     dtd = dtd.substring(dtd.lastIndexOf('/') + 1);
433 
434                 if (LOG.isDebugEnabled())
435                     LOG.debug("Can't exact match entity in redirect map, trying " + dtd);
436                 entity = (URL) _redirectMap.get(dtd);
437             }
438 
439             if (entity != null)
440             {
441                 try
442                 {
443                     InputStream in = entity.openStream();
444                     if (LOG.isDebugEnabled())
445                         LOG.debug("Redirected entity " + sid + " --> " + entity);
446                     InputSource is = new InputSource(in);
447                     is.setSystemId(sid);
448                     return is;
449                 }
450                 catch (IOException e)
451                 {
452                     LOG.ignore(e);
453                 }
454             }
455             return null;
456         }
457     }
458 
459     /* ------------------------------------------------------------ */
460     /* ------------------------------------------------------------ */
461     /**
462      * XML Attribute.
463      */
464     public static class Attribute
465     {
466         private String _name;
467         private String _value;
468 
469         Attribute(String n, String v)
470         {
471             _name = n;
472             _value = v;
473         }
474 
475         public String getName()
476         {
477             return _name;
478         }
479 
480         public String getValue()
481         {
482             return _value;
483         }
484     }
485 
486     /* ------------------------------------------------------------ */
487     /* ------------------------------------------------------------ */
488     /**
489      * XML Node. Represents an XML element with optional attributes and ordered content.
490      */
491     public static class Node extends AbstractList<Object>
492     {
493         Node _parent;
494         private ArrayList<Object> _list;
495         private String _tag;
496         private Attribute[] _attrs;
497         private boolean _lastString = false;
498         private String _path;
499 
500         /* ------------------------------------------------------------ */
501         Node(Node parent, String tag, Attributes attrs)
502         {
503             _parent = parent;
504             _tag = tag;
505 
506             if (attrs != null)
507             {
508                 _attrs = new Attribute[attrs.getLength()];
509                 for (int i = 0; i < attrs.getLength(); i++)
510                 {
511                     String name = attrs.getLocalName(i);
512                     if (name == null || name.equals(""))
513                         name = attrs.getQName(i);
514                     _attrs[i] = new Attribute(name, attrs.getValue(i));
515                 }
516             }
517         }
518 
519         /* ------------------------------------------------------------ */
520         public Node getParent()
521         {
522             return _parent;
523         }
524 
525         /* ------------------------------------------------------------ */
526         public String getTag()
527         {
528             return _tag;
529         }
530 
531         /* ------------------------------------------------------------ */
532         public String getPath()
533         {
534             if (_path == null)
535             {
536                 if (getParent() != null && getParent().getTag() != null)
537                     _path = getParent().getPath() + "/" + _tag;
538                 else
539                     _path = "/" + _tag;
540             }
541             return _path;
542         }
543 
544         /* ------------------------------------------------------------ */
545         /**
546          * Get an array of element attributes.
547          */
548         public Attribute[] getAttributes()
549         {
550             return _attrs;
551         }
552 
553         /* ------------------------------------------------------------ */
554         /**
555          * Get an element attribute.
556          *
557          * @return attribute or null.
558          */
559         public String getAttribute(String name)
560         {
561             return getAttribute(name, null);
562         }
563 
564         /* ------------------------------------------------------------ */
565         /**
566          * Get an element attribute.
567          *
568          * @return attribute or null.
569          */
570         public String getAttribute(String name, String dft)
571         {
572             if (_attrs == null || name == null)
573                 return dft;
574             for (int i = 0; i < _attrs.length; i++)
575                 if (name.equals(_attrs[i].getName()))
576                     return _attrs[i].getValue();
577             return dft;
578         }
579 
580         /* ------------------------------------------------------------ */
581         /**
582          * Get the number of children nodes.
583          */
584         public int size()
585         {
586             if (_list != null)
587                 return _list.size();
588             return 0;
589         }
590 
591         /* ------------------------------------------------------------ */
592         /**
593          * Get the ith child node or content.
594          *
595          * @return Node or String.
596          */
597         public Object get(int i)
598         {
599             if (_list != null)
600                 return _list.get(i);
601             return null;
602         }
603 
604         /* ------------------------------------------------------------ */
605         /**
606          * Get the first child node with the tag.
607          *
608          * @param tag
609          * @return Node or null.
610          */
611         public Node get(String tag)
612         {
613             if (_list != null)
614             {
615                 for (int i = 0; i < _list.size(); i++)
616                 {
617                     Object o = _list.get(i);
618                     if (o instanceof Node)
619                     {
620                         Node n = (Node) o;
621                         if (tag.equals(n._tag))
622                             return n;
623                     }
624                 }
625             }
626             return null;
627         }
628 
629         /* ------------------------------------------------------------ */
630         @Override
631         public void add(int i, Object o)
632         {
633             if (_list == null)
634                 _list = new ArrayList<Object>();
635             if (o instanceof String)
636             {
637                 if (_lastString)
638                 {
639                     int last = _list.size() - 1;
640                     _list.set(last, (String) _list.get(last) + o);
641                 }
642                 else
643                     _list.add(i, o);
644                 _lastString = true;
645             }
646             else
647             {
648                 _lastString = false;
649                 _list.add(i, o);
650             }
651         }
652 
653         /* ------------------------------------------------------------ */
654         public void clear()
655         {
656             if (_list != null)
657                 _list.clear();
658             _list = null;
659         }
660 
661         /* ------------------------------------------------------------ */
662         /**
663          * Get a tag as a string.
664          *
665          * @param tag The tag to get
666          * @param tags IF true, tags are included in the value.
667          * @param trim If true, trim the value.
668          * @return results of get(tag).toString(tags).
669          */
670         public String getString(String tag, boolean tags, boolean trim)
671         {
672             Node node = get(tag);
673             if (node == null)
674                 return null;
675             String s = node.toString(tags);
676             if (s != null && trim)
677                 s = s.trim();
678             return s;
679         }
680 
681         /* ------------------------------------------------------------ */
682         public synchronized String toString()
683         {
684             return toString(true);
685         }
686 
687         /* ------------------------------------------------------------ */
688         /**
689          * Convert to a string.
690          *
691          * @param tag If false, only _content is shown.
692          */
693         public synchronized String toString(boolean tag)
694         {
695             StringBuilder buf = new StringBuilder();
696             toString(buf, tag);
697             return buf.toString();
698         }
699 
700         /* ------------------------------------------------------------ */
701         /**
702          * Convert to a string.
703          *
704          * @param tag If false, only _content is shown.
705          */
706         public synchronized String toString(boolean tag, boolean trim)
707         {
708             String s = toString(tag);
709             if (s != null && trim)
710                 s = s.trim();
711             return s;
712         }
713 
714         /* ------------------------------------------------------------ */
715         private synchronized void toString(StringBuilder buf, boolean tag)
716         {
717             if (tag)
718             {
719                 buf.append("<");
720                 buf.append(_tag);
721 
722                 if (_attrs != null)
723                 {
724                     for (int i = 0; i < _attrs.length; i++)
725                     {
726                         buf.append(' ');
727                         buf.append(_attrs[i].getName());
728                         buf.append("=\"");
729                         buf.append(_attrs[i].getValue());
730                         buf.append("\"");
731                     }
732                 }
733             }
734 
735             if (_list != null)
736             {
737                 if (tag)
738                     buf.append(">");
739                 for (int i = 0; i < _list.size(); i++)
740                 {
741                     Object o = _list.get(i);
742                     if (o == null)
743                         continue;
744                     if (o instanceof Node)
745                         ((Node) o).toString(buf, tag);
746                     else
747                         buf.append(o.toString());
748                 }
749                 if (tag)
750                 {
751                     buf.append("</");
752                     buf.append(_tag);
753                     buf.append(">");
754                 }
755             }
756             else if (tag)
757                 buf.append("/>");
758         }
759 
760         /* ------------------------------------------------------------ */
761         /**
762          * Iterator over named child nodes.
763          *
764          * @param tag The tag of the nodes.
765          * @return Iterator over all child nodes with the specified tag.
766          */
767         public Iterator<Node> iterator(final String tag)
768         {
769             return new Iterator<Node>()
770             {
771                 int c = 0;
772                 Node _node;
773 
774                 /* -------------------------------------------------- */
775                 public boolean hasNext()
776                 {
777                     if (_node != null)
778                         return true;
779                     while (_list != null && c < _list.size())
780                     {
781                         Object o = _list.get(c);
782                         if (o instanceof Node)
783                         {
784                             Node n = (Node) o;
785                             if (tag.equals(n._tag))
786                             {
787                                 _node = n;
788                                 return true;
789                             }
790                         }
791                         c++;
792                     }
793                     return false;
794                 }
795 
796                 /* -------------------------------------------------- */
797                 public Node next()
798                 {
799                     try
800                     {
801                         if (hasNext())
802                             return _node;
803                         throw new NoSuchElementException();
804                     }
805                     finally
806                     {
807                         _node = null;
808                         c++;
809                     }
810                 }
811 
812                 /* -------------------------------------------------- */
813                 public void remove()
814                 {
815                     throw new UnsupportedOperationException("Not supported");
816                 }
817             };
818         }
819     }
820 }