View Javadoc

1   //
2   //  ========================================================================
3   //  Copyright (c) 1995-2015 Mort Bay Consulting Pty. Ltd.
4   //  ------------------------------------------------------------------------
5   //  All rights reserved. This program and the accompanying materials
6   //  are made available under the terms of the Eclipse Public License v1.0
7   //  and Apache License v2.0 which accompanies this distribution.
8   //
9   //      The Eclipse Public License is available at
10  //      http://www.eclipse.org/legal/epl-v10.html
11  //
12  //      The Apache License v2.0 is available at
13  //      http://www.opensource.org/licenses/apache2.0.php
14  //
15  //  You may elect to redistribute this code under either of these licenses.
16  //  ========================================================================
17  //
18  
19  package org.eclipse.jetty.xml;
20  
21  import java.io.File;
22  import java.io.IOException;
23  import java.io.InputStream;
24  import java.net.URL;
25  import java.util.AbstractList;
26  import java.util.ArrayList;
27  import java.util.HashMap;
28  import java.util.Iterator;
29  import java.util.Map;
30  import java.util.NoSuchElementException;
31  import java.util.Stack;
32  import java.util.StringTokenizer;
33  
34  import javax.xml.parsers.SAXParser;
35  import javax.xml.parsers.SAXParserFactory;
36  
37  import org.eclipse.jetty.util.LazyList;
38  import org.eclipse.jetty.util.log.Log;
39  import org.eclipse.jetty.util.log.Logger;
40  import org.eclipse.jetty.util.resource.Resource;
41  import org.xml.sax.Attributes;
42  import org.xml.sax.ContentHandler;
43  import org.xml.sax.InputSource;
44  import org.xml.sax.SAXException;
45  import org.xml.sax.SAXParseException;
46  import org.xml.sax.XMLReader;
47  import org.xml.sax.helpers.DefaultHandler;
48  
49  /*--------------------------------------------------------------*/
50  /**
51   * XML Parser wrapper. This class wraps any standard JAXP1.1 parser with convieniant error and
52   * entity handlers and a mini dom-like document tree.
53   * <p>
54   * By default, the parser is created as a validating parser only if xerces is present. This can be
55   * configured by setting the "org.eclipse.jetty.xml.XmlParser.Validating" system property.
56   */
57  public class XmlParser
58  {
59      private static final Logger LOG = Log.getLogger(XmlParser.class);
60  
61      private Map<String,URL> _redirectMap = new HashMap<String,URL>();
62      private SAXParser _parser;
63      private Map<String,ContentHandler> _observerMap;
64      private Stack<ContentHandler> _observers = new Stack<ContentHandler>();
65      private String _xpath;
66      private Object _xpaths;
67      private String _dtd;
68  
69      /* ------------------------------------------------------------ */
70      /**
71       * Construct
72       */
73      public XmlParser()
74      {
75          SAXParserFactory factory = SAXParserFactory.newInstance();
76          boolean validating_dft = factory.getClass().toString().startsWith("org.apache.xerces.");
77          String validating_prop = System.getProperty("org.eclipse.jetty.xml.XmlParser.Validating", validating_dft ? "true" : "false");
78          boolean validating = Boolean.valueOf(validating_prop).booleanValue();
79          setValidating(validating);
80      }
81  
82      /* ------------------------------------------------------------ */
83      public XmlParser(boolean validating)
84      {
85          setValidating(validating);
86      }
87  
88      /* ------------------------------------------------------------ */
89      public void setValidating(boolean validating)
90      {
91          try
92          {
93              SAXParserFactory factory = SAXParserFactory.newInstance();
94              factory.setValidating(validating);
95              _parser = factory.newSAXParser();
96  
97              try
98              {
99                  if (validating)
100                     _parser.getXMLReader().setFeature("http://apache.org/xml/features/validation/schema", validating);
101             }
102             catch (Exception e)
103             {
104                 if (validating)
105                     LOG.warn("Schema validation may not be supported: ", e);
106                 else
107                     LOG.ignore(e);
108             }
109 
110             _parser.getXMLReader().setFeature("http://xml.org/sax/features/validation", validating);
111             _parser.getXMLReader().setFeature("http://xml.org/sax/features/namespaces", true);
112             _parser.getXMLReader().setFeature("http://xml.org/sax/features/namespace-prefixes", false);
113             try
114             {
115                 if (validating)
116                     _parser.getXMLReader().setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", validating);
117             }
118             catch (Exception e)
119             {
120                 LOG.warn(e.getMessage());
121             }
122         }
123         catch (Exception e)
124         {
125             LOG.warn(Log.EXCEPTION, e);
126             throw new Error(e.toString());
127         }
128     }
129 
130     /* ------------------------------------------------------------ */
131     public boolean isValidating()
132     {
133         return _parser.isValidating();
134     }
135     
136     /* ------------------------------------------------------------ */
137     public synchronized void redirectEntity(String name, URL entity)
138     {
139         if (entity != null)
140             _redirectMap.put(name, entity);
141     }
142 
143     /* ------------------------------------------------------------ */
144     /**
145      *
146      * @return Returns the xpath.
147      */
148     public String getXpath()
149     {
150         return _xpath;
151     }
152 
153     /* ------------------------------------------------------------ */
154     /**
155      * Set an XPath A very simple subset of xpath is supported to select a partial tree. Currently
156      * only path like "/node1/nodeA | /node1/nodeB" are supported.
157      *
158      * @param xpath The xpath to set.
159      */
160     public void setXpath(String xpath)
161     {
162         _xpath = xpath;
163         StringTokenizer tok = new StringTokenizer(xpath, "| ");
164         while (tok.hasMoreTokens())
165             _xpaths = LazyList.add(_xpaths, tok.nextToken());
166     }
167 
168     /* ------------------------------------------------------------ */
169     public String getDTD()
170     {
171         return _dtd;
172     }
173 
174     /* ------------------------------------------------------------ */
175     /**
176      * Add a ContentHandler. Add an additional _content handler that is triggered on a tag name. SAX
177      * events are passed to the ContentHandler provided from a matching start element to the
178      * corresponding end element. Only a single _content handler can be registered against each tag.
179      *
180      * @param trigger Tag local or q name.
181      * @param observer SAX ContentHandler
182      */
183     public synchronized void addContentHandler(String trigger, ContentHandler observer)
184     {
185         if (_observerMap == null)
186             _observerMap = new HashMap<>();
187         _observerMap.put(trigger, observer);
188     }
189 
190     /* ------------------------------------------------------------ */
191     public synchronized Node parse(InputSource source) throws IOException, SAXException
192     {
193         _dtd=null;
194         Handler handler = new Handler();
195         XMLReader reader = _parser.getXMLReader();
196         reader.setContentHandler(handler);
197         reader.setErrorHandler(handler);
198         reader.setEntityResolver(handler);
199         if (LOG.isDebugEnabled())
200             LOG.debug("parsing: sid=" + source.getSystemId() + ",pid=" + source.getPublicId());
201         _parser.parse(source, handler);
202         if (handler._error != null)
203             throw handler._error;
204         Node doc = (Node) handler._top.get(0);
205         handler.clear();
206         return doc;
207     }
208 
209     /* ------------------------------------------------------------ */
210     /**
211      * Parse String URL.
212      * @param url the url to the xml to parse
213      * @return the root node of the xml
214      * @throws IOException if unable to load the xml
215      * @throws SAXException if unable to parse the xml
216      */
217     public synchronized Node parse(String url) throws IOException, SAXException
218     {
219         if (LOG.isDebugEnabled())
220             LOG.debug("parse: " + url);
221         return parse(new InputSource(url));
222     }
223 
224     /* ------------------------------------------------------------ */
225     /**
226      * Parse File.
227      * @param file the file to the xml to parse 
228      * @return the root node of the xml
229      * @throws IOException if unable to load the xml
230      * @throws SAXException if unable to parse the xml
231      */
232     public synchronized Node parse(File file) throws IOException, SAXException
233     {
234         if (LOG.isDebugEnabled())
235             LOG.debug("parse: " + file);
236         return parse(new InputSource(Resource.toURL(file).toString()));
237     }
238 
239     /* ------------------------------------------------------------ */
240     /**
241      * Parse InputStream.
242      * @param in the input stream of the xml to parse
243      * @return the root node of the xml
244      * @throws IOException if unable to load the xml
245      * @throws SAXException if unable to parse the xml
246      */
247     public synchronized Node parse(InputStream in) throws IOException, SAXException
248     {
249         _dtd=null;
250         Handler handler = new Handler();
251         XMLReader reader = _parser.getXMLReader();
252         reader.setContentHandler(handler);
253         reader.setErrorHandler(handler);
254         reader.setEntityResolver(handler);
255         _parser.parse(new InputSource(in), handler);
256         if (handler._error != null)
257             throw handler._error;
258         Node doc = (Node) handler._top.get(0);
259         handler.clear();
260         return doc;
261     }
262 
263 
264     /* ------------------------------------------------------------ */
265     protected InputSource resolveEntity(String pid, String sid)
266     {
267         if (LOG.isDebugEnabled())
268             LOG.debug("resolveEntity(" + pid + ", " + sid + ")");
269 
270         if (sid!=null && sid.endsWith(".dtd"))
271             _dtd=sid;
272 
273         URL entity = null;
274         if (pid != null)
275             entity = (URL) _redirectMap.get(pid);
276         if (entity == null)
277             entity = (URL) _redirectMap.get(sid);
278         if (entity == null)
279         {
280             String dtd = sid;
281             if (dtd.lastIndexOf('/') >= 0)
282                 dtd = dtd.substring(dtd.lastIndexOf('/') + 1);
283 
284             if (LOG.isDebugEnabled())
285                 LOG.debug("Can't exact match entity in redirect map, trying " + dtd);
286             entity = (URL) _redirectMap.get(dtd);
287         }
288 
289         if (entity != null)
290         {
291             try
292             {
293                 InputStream in = entity.openStream();
294                 if (LOG.isDebugEnabled())
295                     LOG.debug("Redirected entity " + sid + " --> " + entity);
296                 InputSource is = new InputSource(in);
297                 is.setSystemId(sid);
298                 return is;
299             }
300             catch (IOException e)
301             {
302                 LOG.ignore(e);
303             }
304         }
305         return null;
306     }
307     
308     /* ------------------------------------------------------------ */
309     /* ------------------------------------------------------------ */
310     private class NoopHandler extends DefaultHandler
311     {
312         Handler _next;
313         int _depth;
314 
315         NoopHandler(Handler next)
316         {
317             this._next = next;
318         }
319 
320         /* ------------------------------------------------------------ */
321         public void startElement(String uri, String localName, String qName, Attributes attrs) throws SAXException
322         {
323             _depth++;
324         }
325 
326         /* ------------------------------------------------------------ */
327         public void endElement(String uri, String localName, String qName) throws SAXException
328         {
329             if (_depth == 0)
330                 _parser.getXMLReader().setContentHandler(_next);
331             else
332                 _depth--;
333         }
334     }
335 
336     /* ------------------------------------------------------------ */
337     /* ------------------------------------------------------------ */
338     private class Handler extends DefaultHandler
339     {
340         Node _top = new Node(null, null, null);
341         SAXParseException _error;
342         private Node _context = _top;
343         private NoopHandler _noop;
344 
345         Handler()
346         {
347             _noop = new NoopHandler(this);
348         }
349 
350         /* ------------------------------------------------------------ */
351         void clear()
352         {
353             _top = null;
354             _error = null;
355             _context = null;
356         }
357 
358         /* ------------------------------------------------------------ */
359         public void startElement(String uri, String localName, String qName, Attributes attrs) throws SAXException
360         {
361             String name = null;
362             if (_parser.isNamespaceAware())
363                 name = localName;
364 
365             if (name == null || "".equals(name))
366                 name = qName;
367 
368             Node node = new Node(_context, name, attrs);
369 
370 
371             // check if the node matches any xpaths set?
372             if (_xpaths != null)
373             {
374                 String path = node.getPath();
375                 boolean match = false;
376                 for (int i = LazyList.size(_xpaths); !match && i-- > 0;)
377                 {
378                     String xpath = (String) LazyList.get(_xpaths, i);
379 
380                     match = path.equals(xpath) || xpath.startsWith(path) && xpath.length() > path.length() && xpath.charAt(path.length()) == '/';
381                 }
382 
383                 if (match)
384                 {
385                     _context.add(node);
386                     _context = node;
387                 }
388                 else
389                 {
390                     _parser.getXMLReader().setContentHandler(_noop);
391                 }
392             }
393             else
394             {
395                 _context.add(node);
396                 _context = node;
397             }
398 
399             ContentHandler observer = null;
400             if (_observerMap != null)
401                 observer = (ContentHandler) _observerMap.get(name);
402             _observers.push(observer);
403 
404             for (int i = 0; i < _observers.size(); i++)
405                 if (_observers.get(i) != null)
406                     ((ContentHandler) _observers.get(i)).startElement(uri, localName, qName, attrs);
407         }
408 
409         /* ------------------------------------------------------------ */
410         public void endElement(String uri, String localName, String qName) throws SAXException
411         {
412             _context = _context._parent;
413             for (int i = 0; i < _observers.size(); i++)
414                 if (_observers.get(i) != null)
415                     ((ContentHandler) _observers.get(i)).endElement(uri, localName, qName);
416             _observers.pop();
417         }
418 
419         /* ------------------------------------------------------------ */
420         public void ignorableWhitespace(char buf[], int offset, int len) throws SAXException
421         {
422             for (int i = 0; i < _observers.size(); i++)
423                 if (_observers.get(i) != null)
424                     ((ContentHandler) _observers.get(i)).ignorableWhitespace(buf, offset, len);
425         }
426 
427         /* ------------------------------------------------------------ */
428         public void characters(char buf[], int offset, int len) throws SAXException
429         {
430             _context.add(new String(buf, offset, len));
431             for (int i = 0; i < _observers.size(); i++)
432                 if (_observers.get(i) != null)
433                     ((ContentHandler) _observers.get(i)).characters(buf, offset, len);
434         }
435 
436         /* ------------------------------------------------------------ */
437         public void warning(SAXParseException ex)
438         {
439             LOG.debug(Log.EXCEPTION, ex);
440             LOG.warn("WARNING@" + getLocationString(ex) + " : " + ex.toString());
441         }
442 
443         /* ------------------------------------------------------------ */
444         public void error(SAXParseException ex) throws SAXException
445         {
446             // Save error and continue to report other errors
447             if (_error == null)
448                 _error = ex;
449             LOG.debug(Log.EXCEPTION, ex);
450             LOG.warn("ERROR@" + getLocationString(ex) + " : " + ex.toString());
451         }
452 
453         /* ------------------------------------------------------------ */
454         public void fatalError(SAXParseException ex) throws SAXException
455         {
456             _error = ex;
457             LOG.debug(Log.EXCEPTION, ex);
458             LOG.warn("FATAL@" + getLocationString(ex) + " : " + ex.toString());
459             throw ex;
460         }
461 
462         /* ------------------------------------------------------------ */
463         private String getLocationString(SAXParseException ex)
464         {
465             return ex.getSystemId() + " line:" + ex.getLineNumber() + " col:" + ex.getColumnNumber();
466         }
467 
468         /* ------------------------------------------------------------ */
469         public InputSource resolveEntity(String pid, String sid)
470         {
471             return XmlParser.this.resolveEntity(pid,sid);
472         }
473     }
474 
475     /* ------------------------------------------------------------ */
476     /* ------------------------------------------------------------ */
477     /**
478      * XML Attribute.
479      */
480     public static class Attribute
481     {
482         private String _name;
483         private String _value;
484 
485         Attribute(String n, String v)
486         {
487             _name = n;
488             _value = v;
489         }
490 
491         public String getName()
492         {
493             return _name;
494         }
495 
496         public String getValue()
497         {
498             return _value;
499         }
500     }
501 
502     /* ------------------------------------------------------------ */
503     /* ------------------------------------------------------------ */
504     /**
505      * XML Node. Represents an XML element with optional attributes and ordered content.
506      */
507     public static class Node extends AbstractList<Object>
508     {
509         Node _parent;
510         private ArrayList<Object> _list;
511         private String _tag;
512         private Attribute[] _attrs;
513         private boolean _lastString = false;
514         private String _path;
515 
516         /* ------------------------------------------------------------ */
517         Node(Node parent, String tag, Attributes attrs)
518         {
519             _parent = parent;
520             _tag = tag;
521 
522             if (attrs != null)
523             {
524                 _attrs = new Attribute[attrs.getLength()];
525                 for (int i = 0; i < attrs.getLength(); i++)
526                 {
527                     String name = attrs.getLocalName(i);
528                     if (name == null || name.equals(""))
529                         name = attrs.getQName(i);
530                     _attrs[i] = new Attribute(name, attrs.getValue(i));
531                 }
532             }
533         }
534 
535         /* ------------------------------------------------------------ */
536         public Node getParent()
537         {
538             return _parent;
539         }
540 
541         /* ------------------------------------------------------------ */
542         public String getTag()
543         {
544             return _tag;
545         }
546 
547         /* ------------------------------------------------------------ */
548         public String getPath()
549         {
550             if (_path == null)
551             {
552                 if (getParent() != null && getParent().getTag() != null)
553                     _path = getParent().getPath() + "/" + _tag;
554                 else
555                     _path = "/" + _tag;
556             }
557             return _path;
558         }
559 
560         /* ------------------------------------------------------------ */
561         /**
562          * Get an array of element attributes.
563          * @return the attributes
564          */
565         public Attribute[] getAttributes()
566         {
567             return _attrs;
568         }
569 
570         /* ------------------------------------------------------------ */
571         /**
572          * Get an element attribute.
573          * 
574          * @param name the name of the attribute 
575          * @return attribute or null.
576          */
577         public String getAttribute(String name)
578         {
579             return getAttribute(name, null);
580         }
581 
582         /* ------------------------------------------------------------ */
583         /**
584          * Get an element attribute.
585          * 
586          * @param name the name of the element 
587          * @param dft the default value
588          * @return attribute or null.
589          */
590         public String getAttribute(String name, String dft)
591         {
592             if (_attrs == null || name == null)
593                 return dft;
594             for (int i = 0; i < _attrs.length; i++)
595                 if (name.equals(_attrs[i].getName()))
596                     return _attrs[i].getValue();
597             return dft;
598         }
599 
600         /* ------------------------------------------------------------ */
601         /**
602          * Get the number of children nodes.
603          */
604         public int size()
605         {
606             if (_list != null)
607                 return _list.size();
608             return 0;
609         }
610 
611         /* ------------------------------------------------------------ */
612         /**
613          * Get the ith child node or content.
614          *
615          * @return Node or String.
616          */
617         public Object get(int i)
618         {
619             if (_list != null)
620                 return _list.get(i);
621             return null;
622         }
623 
624         /* ------------------------------------------------------------ */
625         /**
626          * Get the first child node with the tag.
627          *
628          * @param tag the name of the tag
629          * @return Node or null.
630          */
631         public Node get(String tag)
632         {
633             if (_list != null)
634             {
635                 for (int i = 0; i < _list.size(); i++)
636                 {
637                     Object o = _list.get(i);
638                     if (o instanceof Node)
639                     {
640                         Node n = (Node) o;
641                         if (tag.equals(n._tag))
642                             return n;
643                     }
644                 }
645             }
646             return null;
647         }
648 
649         /* ------------------------------------------------------------ */
650         @Override
651         public void add(int i, Object o)
652         {
653             if (_list == null)
654                 _list = new ArrayList<Object>();
655             if (o instanceof String)
656             {
657                 if (_lastString)
658                 {
659                     int last = _list.size() - 1;
660                     _list.set(last, (String) _list.get(last) + o);
661                 }
662                 else
663                     _list.add(i, o);
664                 _lastString = true;
665             }
666             else
667             {
668                 _lastString = false;
669                 _list.add(i, o);
670             }
671         }
672 
673         /* ------------------------------------------------------------ */
674         public void clear()
675         {
676             if (_list != null)
677                 _list.clear();
678             _list = null;
679         }
680 
681         /* ------------------------------------------------------------ */
682         /**
683          * Get a tag as a string.
684          *
685          * @param tag The tag to get
686          * @param tags IF true, tags are included in the value.
687          * @param trim If true, trim the value.
688          * @return results of get(tag).toString(tags).
689          */
690         public String getString(String tag, boolean tags, boolean trim)
691         {
692             Node node = get(tag);
693             if (node == null)
694                 return null;
695             String s = node.toString(tags);
696             if (s != null && trim)
697                 s = s.trim();
698             return s;
699         }
700 
701         /* ------------------------------------------------------------ */
702         public synchronized String toString()
703         {
704             return toString(true);
705         }
706 
707         /* ------------------------------------------------------------ */
708         /**
709          * Convert to a string.
710          *
711          * @param tag If false, only _content is shown.
712          * @return the string value
713          */
714         public synchronized String toString(boolean tag)
715         {
716             StringBuilder buf = new StringBuilder();
717             toString(buf, tag);
718             return buf.toString();
719         }
720 
721         /* ------------------------------------------------------------ */
722         /**
723          * Convert to a string.
724          *
725          * @param tag If false, only _content is shown.
726          * @param trim true to trim the content
727          * @return the trimmed content
728          */
729         public synchronized String toString(boolean tag, boolean trim)
730         {
731             String s = toString(tag);
732             if (s != null && trim)
733                 s = s.trim();
734             return s;
735         }
736 
737         /* ------------------------------------------------------------ */
738         private synchronized void toString(StringBuilder buf, boolean tag)
739         {
740             if (tag)
741             {
742                 buf.append("<");
743                 buf.append(_tag);
744 
745                 if (_attrs != null)
746                 {
747                     for (int i = 0; i < _attrs.length; i++)
748                     {
749                         buf.append(' ');
750                         buf.append(_attrs[i].getName());
751                         buf.append("=\"");
752                         buf.append(_attrs[i].getValue());
753                         buf.append("\"");
754                     }
755                 }
756             }
757 
758             if (_list != null)
759             {
760                 if (tag)
761                     buf.append(">");
762                 for (int i = 0; i < _list.size(); i++)
763                 {
764                     Object o = _list.get(i);
765                     if (o == null)
766                         continue;
767                     if (o instanceof Node)
768                         ((Node) o).toString(buf, tag);
769                     else
770                         buf.append(o.toString());
771                 }
772                 if (tag)
773                 {
774                     buf.append("</");
775                     buf.append(_tag);
776                     buf.append(">");
777                 }
778             }
779             else if (tag)
780                 buf.append("/>");
781         }
782 
783         /* ------------------------------------------------------------ */
784         /**
785          * Iterator over named child nodes.
786          *
787          * @param tag The tag of the nodes.
788          * @return Iterator over all child nodes with the specified tag.
789          */
790         public Iterator<Node> iterator(final String tag)
791         {
792             return new Iterator<Node>()
793             {
794                 int c = 0;
795                 Node _node;
796 
797                 /* -------------------------------------------------- */
798                 public boolean hasNext()
799                 {
800                     if (_node != null)
801                         return true;
802                     while (_list != null && c < _list.size())
803                     {
804                         Object o = _list.get(c);
805                         if (o instanceof Node)
806                         {
807                             Node n = (Node) o;
808                             if (tag.equals(n._tag))
809                             {
810                                 _node = n;
811                                 return true;
812                             }
813                         }
814                         c++;
815                     }
816                     return false;
817                 }
818 
819                 /* -------------------------------------------------- */
820                 public Node next()
821                 {
822                     try
823                     {
824                         if (hasNext())
825                             return _node;
826                         throw new NoSuchElementException();
827                     }
828                     finally
829                     {
830                         _node = null;
831                         c++;
832                     }
833                 }
834 
835                 /* -------------------------------------------------- */
836                 public void remove()
837                 {
838                     throw new UnsupportedOperationException("Not supported");
839                 }
840             };
841         }
842     }
843 }