View Javadoc

1   //
2   //  ========================================================================
3   //  Copyright (c) 1995-2014 Mort Bay Consulting Pty. Ltd.
4   //  ------------------------------------------------------------------------
5   //  All rights reserved. This program and the accompanying materials
6   //  are made available under the terms of the Eclipse Public License v1.0
7   //  and Apache License v2.0 which accompanies this distribution.
8   //
9   //      The Eclipse Public License is available at
10  //      http://www.eclipse.org/legal/epl-v10.html
11  //
12  //      The Apache License v2.0 is available at
13  //      http://www.opensource.org/licenses/apache2.0.php
14  //
15  //  You may elect to redistribute this code under either of these licenses.
16  //  ========================================================================
17  //
18  
19  package org.eclipse.jetty.xml;
20  
21  import java.io.File;
22  import java.io.IOException;
23  import java.io.InputStream;
24  import java.net.URL;
25  import java.util.AbstractList;
26  import java.util.ArrayList;
27  import java.util.HashMap;
28  import java.util.Iterator;
29  import java.util.Map;
30  import java.util.NoSuchElementException;
31  import java.util.Stack;
32  import java.util.StringTokenizer;
33  
34  import javax.xml.parsers.SAXParser;
35  import javax.xml.parsers.SAXParserFactory;
36  
37  import org.eclipse.jetty.util.LazyList;
38  import org.eclipse.jetty.util.log.Log;
39  import org.eclipse.jetty.util.log.Logger;
40  import org.eclipse.jetty.util.resource.Resource;
41  import org.xml.sax.Attributes;
42  import org.xml.sax.ContentHandler;
43  import org.xml.sax.InputSource;
44  import org.xml.sax.SAXException;
45  import org.xml.sax.SAXParseException;
46  import org.xml.sax.XMLReader;
47  import org.xml.sax.helpers.DefaultHandler;
48  
49  /*--------------------------------------------------------------*/
50  /**
51   * XML Parser wrapper. This class wraps any standard JAXP1.1 parser with convieniant error and
52   * entity handlers and a mini dom-like document tree.
53   * <P>
54   * By default, the parser is created as a validating parser only if xerces is present. This can be
55   * configured by setting the "org.eclipse.jetty.xml.XmlParser.Validating" system property.
56   *
57   *
58   */
59  public class XmlParser
60  {
61      private static final Logger LOG = Log.getLogger(XmlParser.class);
62  
63      private Map<String,URL> _redirectMap = new HashMap<String,URL>();
64      private SAXParser _parser;
65      private Map<String,ContentHandler> _observerMap;
66      private Stack<ContentHandler> _observers = new Stack<ContentHandler>();
67      private String _xpath;
68      private Object _xpaths;
69      private String _dtd;
70  
71      /* ------------------------------------------------------------ */
72      /**
73       * Construct
74       */
75      public XmlParser()
76      {
77          SAXParserFactory factory = SAXParserFactory.newInstance();
78          boolean validating_dft = factory.getClass().toString().startsWith("org.apache.xerces.");
79          String validating_prop = System.getProperty("org.eclipse.jetty.xml.XmlParser.Validating", validating_dft ? "true" : "false");
80          boolean validating = Boolean.valueOf(validating_prop).booleanValue();
81          setValidating(validating);
82      }
83  
84      /* ------------------------------------------------------------ */
85      /**
86       * Constructor.
87       */
88      public XmlParser(boolean validating)
89      {
90          setValidating(validating);
91      }
92  
93      /* ------------------------------------------------------------ */
94      public void setValidating(boolean validating)
95      {
96          try
97          {
98              SAXParserFactory factory = SAXParserFactory.newInstance();
99              factory.setValidating(validating);
100             _parser = factory.newSAXParser();
101 
102             try
103             {
104                 if (validating)
105                     _parser.getXMLReader().setFeature("http://apache.org/xml/features/validation/schema", validating);
106             }
107             catch (Exception e)
108             {
109                 if (validating)
110                     LOG.warn("Schema validation may not be supported: ", e);
111                 else
112                     LOG.ignore(e);
113             }
114 
115             _parser.getXMLReader().setFeature("http://xml.org/sax/features/validation", validating);
116             _parser.getXMLReader().setFeature("http://xml.org/sax/features/namespaces", true);
117             _parser.getXMLReader().setFeature("http://xml.org/sax/features/namespace-prefixes", false);
118             try
119             {
120                 if (validating)
121                     _parser.getXMLReader().setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", validating);
122             }
123             catch (Exception e)
124             {
125                 LOG.warn(e.getMessage());
126             }
127         }
128         catch (Exception e)
129         {
130             LOG.warn(Log.EXCEPTION, e);
131             throw new Error(e.toString());
132         }
133     }
134 
135     /* ------------------------------------------------------------ */
136     public boolean isValidating()
137     {
138         return _parser.isValidating();
139     }
140     
141     /* ------------------------------------------------------------ */
142     /**
143      * @param name
144      * @param entity
145      */
146     public synchronized void redirectEntity(String name, URL entity)
147     {
148         if (entity != null)
149             _redirectMap.put(name, entity);
150     }
151 
152     /* ------------------------------------------------------------ */
153     /**
154      *
155      * @return Returns the xpath.
156      */
157     public String getXpath()
158     {
159         return _xpath;
160     }
161 
162     /* ------------------------------------------------------------ */
163     /**
164      * Set an XPath A very simple subset of xpath is supported to select a partial tree. Currently
165      * only path like "/node1/nodeA | /node1/nodeB" are supported.
166      *
167      * @param xpath The xpath to set.
168      */
169     public void setXpath(String xpath)
170     {
171         _xpath = xpath;
172         StringTokenizer tok = new StringTokenizer(xpath, "| ");
173         while (tok.hasMoreTokens())
174             _xpaths = LazyList.add(_xpaths, tok.nextToken());
175     }
176 
177     /* ------------------------------------------------------------ */
178     public String getDTD()
179     {
180         return _dtd;
181     }
182 
183     /* ------------------------------------------------------------ */
184     /**
185      * Add a ContentHandler. Add an additional _content handler that is triggered on a tag name. SAX
186      * events are passed to the ContentHandler provided from a matching start element to the
187      * corresponding end element. Only a single _content handler can be registered against each tag.
188      *
189      * @param trigger Tag local or q name.
190      * @param observer SAX ContentHandler
191      */
192     public synchronized void addContentHandler(String trigger, ContentHandler observer)
193     {
194         if (_observerMap == null)
195             _observerMap = new HashMap<>();
196         _observerMap.put(trigger, observer);
197     }
198 
199     /* ------------------------------------------------------------ */
200     public synchronized Node parse(InputSource source) throws IOException, SAXException
201     {
202         _dtd=null;
203         Handler handler = new Handler();
204         XMLReader reader = _parser.getXMLReader();
205         reader.setContentHandler(handler);
206         reader.setErrorHandler(handler);
207         reader.setEntityResolver(handler);
208         if (LOG.isDebugEnabled())
209             LOG.debug("parsing: sid=" + source.getSystemId() + ",pid=" + source.getPublicId());
210         _parser.parse(source, handler);
211         if (handler._error != null)
212             throw handler._error;
213         Node doc = (Node) handler._top.get(0);
214         handler.clear();
215         return doc;
216     }
217 
218     /* ------------------------------------------------------------ */
219     /**
220      * Parse String URL.
221      */
222     public synchronized Node parse(String url) throws IOException, SAXException
223     {
224         if (LOG.isDebugEnabled())
225             LOG.debug("parse: " + url);
226         return parse(new InputSource(url));
227     }
228 
229     /* ------------------------------------------------------------ */
230     /**
231      * Parse File.
232      */
233     public synchronized Node parse(File file) throws IOException, SAXException
234     {
235         if (LOG.isDebugEnabled())
236             LOG.debug("parse: " + file);
237         return parse(new InputSource(Resource.toURL(file).toString()));
238     }
239 
240     /* ------------------------------------------------------------ */
241     /**
242      * Parse InputStream.
243      */
244     public synchronized Node parse(InputStream in) throws IOException, SAXException
245     {
246         _dtd=null;
247         Handler handler = new Handler();
248         XMLReader reader = _parser.getXMLReader();
249         reader.setContentHandler(handler);
250         reader.setErrorHandler(handler);
251         reader.setEntityResolver(handler);
252         _parser.parse(new InputSource(in), handler);
253         if (handler._error != null)
254             throw handler._error;
255         Node doc = (Node) handler._top.get(0);
256         handler.clear();
257         return doc;
258     }
259 
260 
261     /* ------------------------------------------------------------ */
262     protected InputSource resolveEntity(String pid, String sid)
263     {
264         if (LOG.isDebugEnabled())
265             LOG.debug("resolveEntity(" + pid + ", " + sid + ")");
266 
267         if (sid!=null && sid.endsWith(".dtd"))
268             _dtd=sid;
269 
270         URL entity = null;
271         if (pid != null)
272             entity = (URL) _redirectMap.get(pid);
273         if (entity == null)
274             entity = (URL) _redirectMap.get(sid);
275         if (entity == null)
276         {
277             String dtd = sid;
278             if (dtd.lastIndexOf('/') >= 0)
279                 dtd = dtd.substring(dtd.lastIndexOf('/') + 1);
280 
281             if (LOG.isDebugEnabled())
282                 LOG.debug("Can't exact match entity in redirect map, trying " + dtd);
283             entity = (URL) _redirectMap.get(dtd);
284         }
285 
286         if (entity != null)
287         {
288             try
289             {
290                 InputStream in = entity.openStream();
291                 if (LOG.isDebugEnabled())
292                     LOG.debug("Redirected entity " + sid + " --> " + entity);
293                 InputSource is = new InputSource(in);
294                 is.setSystemId(sid);
295                 return is;
296             }
297             catch (IOException e)
298             {
299                 LOG.ignore(e);
300             }
301         }
302         return null;
303     }
304     
305     /* ------------------------------------------------------------ */
306     /* ------------------------------------------------------------ */
307     private class NoopHandler extends DefaultHandler
308     {
309         Handler _next;
310         int _depth;
311 
312         NoopHandler(Handler next)
313         {
314             this._next = next;
315         }
316 
317         /* ------------------------------------------------------------ */
318         public void startElement(String uri, String localName, String qName, Attributes attrs) throws SAXException
319         {
320             _depth++;
321         }
322 
323         /* ------------------------------------------------------------ */
324         public void endElement(String uri, String localName, String qName) throws SAXException
325         {
326             if (_depth == 0)
327                 _parser.getXMLReader().setContentHandler(_next);
328             else
329                 _depth--;
330         }
331     }
332 
333     /* ------------------------------------------------------------ */
334     /* ------------------------------------------------------------ */
335     private class Handler extends DefaultHandler
336     {
337         Node _top = new Node(null, null, null);
338         SAXParseException _error;
339         private Node _context = _top;
340         private NoopHandler _noop;
341 
342         Handler()
343         {
344             _noop = new NoopHandler(this);
345         }
346 
347         /* ------------------------------------------------------------ */
348         void clear()
349         {
350             _top = null;
351             _error = null;
352             _context = null;
353         }
354 
355         /* ------------------------------------------------------------ */
356         public void startElement(String uri, String localName, String qName, Attributes attrs) throws SAXException
357         {
358             String name = null;
359             if (_parser.isNamespaceAware())
360                 name = localName;
361 
362             if (name == null || "".equals(name))
363                 name = qName;
364 
365             Node node = new Node(_context, name, attrs);
366 
367 
368             // check if the node matches any xpaths set?
369             if (_xpaths != null)
370             {
371                 String path = node.getPath();
372                 boolean match = false;
373                 for (int i = LazyList.size(_xpaths); !match && i-- > 0;)
374                 {
375                     String xpath = (String) LazyList.get(_xpaths, i);
376 
377                     match = path.equals(xpath) || xpath.startsWith(path) && xpath.length() > path.length() && xpath.charAt(path.length()) == '/';
378                 }
379 
380                 if (match)
381                 {
382                     _context.add(node);
383                     _context = node;
384                 }
385                 else
386                 {
387                     _parser.getXMLReader().setContentHandler(_noop);
388                 }
389             }
390             else
391             {
392                 _context.add(node);
393                 _context = node;
394             }
395 
396             ContentHandler observer = null;
397             if (_observerMap != null)
398                 observer = (ContentHandler) _observerMap.get(name);
399             _observers.push(observer);
400 
401             for (int i = 0; i < _observers.size(); i++)
402                 if (_observers.get(i) != null)
403                     ((ContentHandler) _observers.get(i)).startElement(uri, localName, qName, attrs);
404         }
405 
406         /* ------------------------------------------------------------ */
407         public void endElement(String uri, String localName, String qName) throws SAXException
408         {
409             _context = _context._parent;
410             for (int i = 0; i < _observers.size(); i++)
411                 if (_observers.get(i) != null)
412                     ((ContentHandler) _observers.get(i)).endElement(uri, localName, qName);
413             _observers.pop();
414         }
415 
416         /* ------------------------------------------------------------ */
417         public void ignorableWhitespace(char buf[], int offset, int len) throws SAXException
418         {
419             for (int i = 0; i < _observers.size(); i++)
420                 if (_observers.get(i) != null)
421                     ((ContentHandler) _observers.get(i)).ignorableWhitespace(buf, offset, len);
422         }
423 
424         /* ------------------------------------------------------------ */
425         public void characters(char buf[], int offset, int len) throws SAXException
426         {
427             _context.add(new String(buf, offset, len));
428             for (int i = 0; i < _observers.size(); i++)
429                 if (_observers.get(i) != null)
430                     ((ContentHandler) _observers.get(i)).characters(buf, offset, len);
431         }
432 
433         /* ------------------------------------------------------------ */
434         public void warning(SAXParseException ex)
435         {
436             LOG.debug(Log.EXCEPTION, ex);
437             LOG.warn("WARNING@" + getLocationString(ex) + " : " + ex.toString());
438         }
439 
440         /* ------------------------------------------------------------ */
441         public void error(SAXParseException ex) throws SAXException
442         {
443             // Save error and continue to report other errors
444             if (_error == null)
445                 _error = ex;
446             LOG.debug(Log.EXCEPTION, ex);
447             LOG.warn("ERROR@" + getLocationString(ex) + " : " + ex.toString());
448         }
449 
450         /* ------------------------------------------------------------ */
451         public void fatalError(SAXParseException ex) throws SAXException
452         {
453             _error = ex;
454             LOG.debug(Log.EXCEPTION, ex);
455             LOG.warn("FATAL@" + getLocationString(ex) + " : " + ex.toString());
456             throw ex;
457         }
458 
459         /* ------------------------------------------------------------ */
460         private String getLocationString(SAXParseException ex)
461         {
462             return ex.getSystemId() + " line:" + ex.getLineNumber() + " col:" + ex.getColumnNumber();
463         }
464 
465         /* ------------------------------------------------------------ */
466         public InputSource resolveEntity(String pid, String sid)
467         {
468             return XmlParser.this.resolveEntity(pid,sid);
469         }
470     }
471 
472     /* ------------------------------------------------------------ */
473     /* ------------------------------------------------------------ */
474     /**
475      * XML Attribute.
476      */
477     public static class Attribute
478     {
479         private String _name;
480         private String _value;
481 
482         Attribute(String n, String v)
483         {
484             _name = n;
485             _value = v;
486         }
487 
488         public String getName()
489         {
490             return _name;
491         }
492 
493         public String getValue()
494         {
495             return _value;
496         }
497     }
498 
499     /* ------------------------------------------------------------ */
500     /* ------------------------------------------------------------ */
501     /**
502      * XML Node. Represents an XML element with optional attributes and ordered content.
503      */
504     public static class Node extends AbstractList<Object>
505     {
506         Node _parent;
507         private ArrayList<Object> _list;
508         private String _tag;
509         private Attribute[] _attrs;
510         private boolean _lastString = false;
511         private String _path;
512 
513         /* ------------------------------------------------------------ */
514         Node(Node parent, String tag, Attributes attrs)
515         {
516             _parent = parent;
517             _tag = tag;
518 
519             if (attrs != null)
520             {
521                 _attrs = new Attribute[attrs.getLength()];
522                 for (int i = 0; i < attrs.getLength(); i++)
523                 {
524                     String name = attrs.getLocalName(i);
525                     if (name == null || name.equals(""))
526                         name = attrs.getQName(i);
527                     _attrs[i] = new Attribute(name, attrs.getValue(i));
528                 }
529             }
530         }
531 
532         /* ------------------------------------------------------------ */
533         public Node getParent()
534         {
535             return _parent;
536         }
537 
538         /* ------------------------------------------------------------ */
539         public String getTag()
540         {
541             return _tag;
542         }
543 
544         /* ------------------------------------------------------------ */
545         public String getPath()
546         {
547             if (_path == null)
548             {
549                 if (getParent() != null && getParent().getTag() != null)
550                     _path = getParent().getPath() + "/" + _tag;
551                 else
552                     _path = "/" + _tag;
553             }
554             return _path;
555         }
556 
557         /* ------------------------------------------------------------ */
558         /**
559          * Get an array of element attributes.
560          */
561         public Attribute[] getAttributes()
562         {
563             return _attrs;
564         }
565 
566         /* ------------------------------------------------------------ */
567         /**
568          * Get an element attribute.
569          *
570          * @return attribute or null.
571          */
572         public String getAttribute(String name)
573         {
574             return getAttribute(name, null);
575         }
576 
577         /* ------------------------------------------------------------ */
578         /**
579          * Get an element attribute.
580          *
581          * @return attribute or null.
582          */
583         public String getAttribute(String name, String dft)
584         {
585             if (_attrs == null || name == null)
586                 return dft;
587             for (int i = 0; i < _attrs.length; i++)
588                 if (name.equals(_attrs[i].getName()))
589                     return _attrs[i].getValue();
590             return dft;
591         }
592 
593         /* ------------------------------------------------------------ */
594         /**
595          * Get the number of children nodes.
596          */
597         public int size()
598         {
599             if (_list != null)
600                 return _list.size();
601             return 0;
602         }
603 
604         /* ------------------------------------------------------------ */
605         /**
606          * Get the ith child node or content.
607          *
608          * @return Node or String.
609          */
610         public Object get(int i)
611         {
612             if (_list != null)
613                 return _list.get(i);
614             return null;
615         }
616 
617         /* ------------------------------------------------------------ */
618         /**
619          * Get the first child node with the tag.
620          *
621          * @param tag
622          * @return Node or null.
623          */
624         public Node get(String tag)
625         {
626             if (_list != null)
627             {
628                 for (int i = 0; i < _list.size(); i++)
629                 {
630                     Object o = _list.get(i);
631                     if (o instanceof Node)
632                     {
633                         Node n = (Node) o;
634                         if (tag.equals(n._tag))
635                             return n;
636                     }
637                 }
638             }
639             return null;
640         }
641 
642         /* ------------------------------------------------------------ */
643         @Override
644         public void add(int i, Object o)
645         {
646             if (_list == null)
647                 _list = new ArrayList<Object>();
648             if (o instanceof String)
649             {
650                 if (_lastString)
651                 {
652                     int last = _list.size() - 1;
653                     _list.set(last, (String) _list.get(last) + o);
654                 }
655                 else
656                     _list.add(i, o);
657                 _lastString = true;
658             }
659             else
660             {
661                 _lastString = false;
662                 _list.add(i, o);
663             }
664         }
665 
666         /* ------------------------------------------------------------ */
667         public void clear()
668         {
669             if (_list != null)
670                 _list.clear();
671             _list = null;
672         }
673 
674         /* ------------------------------------------------------------ */
675         /**
676          * Get a tag as a string.
677          *
678          * @param tag The tag to get
679          * @param tags IF true, tags are included in the value.
680          * @param trim If true, trim the value.
681          * @return results of get(tag).toString(tags).
682          */
683         public String getString(String tag, boolean tags, boolean trim)
684         {
685             Node node = get(tag);
686             if (node == null)
687                 return null;
688             String s = node.toString(tags);
689             if (s != null && trim)
690                 s = s.trim();
691             return s;
692         }
693 
694         /* ------------------------------------------------------------ */
695         public synchronized String toString()
696         {
697             return toString(true);
698         }
699 
700         /* ------------------------------------------------------------ */
701         /**
702          * Convert to a string.
703          *
704          * @param tag If false, only _content is shown.
705          */
706         public synchronized String toString(boolean tag)
707         {
708             StringBuilder buf = new StringBuilder();
709             toString(buf, tag);
710             return buf.toString();
711         }
712 
713         /* ------------------------------------------------------------ */
714         /**
715          * Convert to a string.
716          *
717          * @param tag If false, only _content is shown.
718          */
719         public synchronized String toString(boolean tag, boolean trim)
720         {
721             String s = toString(tag);
722             if (s != null && trim)
723                 s = s.trim();
724             return s;
725         }
726 
727         /* ------------------------------------------------------------ */
728         private synchronized void toString(StringBuilder buf, boolean tag)
729         {
730             if (tag)
731             {
732                 buf.append("<");
733                 buf.append(_tag);
734 
735                 if (_attrs != null)
736                 {
737                     for (int i = 0; i < _attrs.length; i++)
738                     {
739                         buf.append(' ');
740                         buf.append(_attrs[i].getName());
741                         buf.append("=\"");
742                         buf.append(_attrs[i].getValue());
743                         buf.append("\"");
744                     }
745                 }
746             }
747 
748             if (_list != null)
749             {
750                 if (tag)
751                     buf.append(">");
752                 for (int i = 0; i < _list.size(); i++)
753                 {
754                     Object o = _list.get(i);
755                     if (o == null)
756                         continue;
757                     if (o instanceof Node)
758                         ((Node) o).toString(buf, tag);
759                     else
760                         buf.append(o.toString());
761                 }
762                 if (tag)
763                 {
764                     buf.append("</");
765                     buf.append(_tag);
766                     buf.append(">");
767                 }
768             }
769             else if (tag)
770                 buf.append("/>");
771         }
772 
773         /* ------------------------------------------------------------ */
774         /**
775          * Iterator over named child nodes.
776          *
777          * @param tag The tag of the nodes.
778          * @return Iterator over all child nodes with the specified tag.
779          */
780         public Iterator<Node> iterator(final String tag)
781         {
782             return new Iterator<Node>()
783             {
784                 int c = 0;
785                 Node _node;
786 
787                 /* -------------------------------------------------- */
788                 public boolean hasNext()
789                 {
790                     if (_node != null)
791                         return true;
792                     while (_list != null && c < _list.size())
793                     {
794                         Object o = _list.get(c);
795                         if (o instanceof Node)
796                         {
797                             Node n = (Node) o;
798                             if (tag.equals(n._tag))
799                             {
800                                 _node = n;
801                                 return true;
802                             }
803                         }
804                         c++;
805                     }
806                     return false;
807                 }
808 
809                 /* -------------------------------------------------- */
810                 public Node next()
811                 {
812                     try
813                     {
814                         if (hasNext())
815                             return _node;
816                         throw new NoSuchElementException();
817                     }
818                     finally
819                     {
820                         _node = null;
821                         c++;
822                     }
823                 }
824 
825                 /* -------------------------------------------------- */
826                 public void remove()
827                 {
828                     throw new UnsupportedOperationException("Not supported");
829                 }
830             };
831         }
832     }
833 }