View Javadoc

1   // ========================================================================
2   // Copyright (c) 2004-2009 Mort Bay Consulting Pty. Ltd.
3   // ------------------------------------------------------------------------
4   // All rights reserved. This program and the accompanying materials
5   // are made available under the terms of the Eclipse Public License v1.0
6   // and Apache License v2.0 which accompanies this distribution.
7   // The Eclipse Public License is available at 
8   // http://www.eclipse.org/legal/epl-v10.html
9   // The Apache License v2.0 is available at
10  // http://www.opensource.org/licenses/apache2.0.php
11  // You may elect to redistribute this code under either of these licenses. 
12  // ========================================================================
13  
14  package org.eclipse.jetty.xml;
15  
16  import java.io.File;
17  import java.io.IOException;
18  import java.io.InputStream;
19  import java.net.URL;
20  import java.util.AbstractList;
21  import java.util.ArrayList;
22  import java.util.HashMap;
23  import java.util.Iterator;
24  import java.util.Map;
25  import java.util.NoSuchElementException;
26  import java.util.Stack;
27  import java.util.StringTokenizer;
28  
29  import javax.xml.parsers.SAXParser;
30  import javax.xml.parsers.SAXParserFactory;
31  
32  import org.eclipse.jetty.util.LazyList;
33  import org.eclipse.jetty.util.log.Log;
34  import org.xml.sax.Attributes;
35  import org.xml.sax.ContentHandler;
36  import org.xml.sax.InputSource;
37  import org.xml.sax.SAXException;
38  import org.xml.sax.SAXParseException;
39  import org.xml.sax.XMLReader;
40  import org.xml.sax.helpers.DefaultHandler;
41  
42  /*--------------------------------------------------------------*/
43  /**
44   * XML Parser wrapper. This class wraps any standard JAXP1.1 parser with convieniant error and
45   * entity handlers and a mini dom-like document tree.
46   * <P>
47   * By default, the parser is created as a validating parser only if xerces is present. This can be 
48   * configured by setting the "org.eclipse.jetty.xml.XmlParser.Validating" system property.
49   * 
50   * 
51   */
52  public class XmlParser
53  {
54      private Map<String,URL> _redirectMap = new HashMap<String,URL>();
55      private SAXParser _parser;
56      private Map<String,ContentHandler> _observerMap;
57      private Stack<ContentHandler> _observers = new Stack<ContentHandler>();
58      private String _xpath;
59      private Object _xpaths;
60      private String _dtd;
61  
62      /* ------------------------------------------------------------ */
63      /**
64       * Construct
65       */
66      public XmlParser()
67      {
68          SAXParserFactory factory = SAXParserFactory.newInstance();
69          boolean validating_dft = factory.getClass().toString().startsWith("org.apache.xerces.");
70          String validating_prop = System.getProperty("org.eclipse.jetty.xml.XmlParser.Validating", validating_dft ? "true" : "false");
71          boolean validating = Boolean.valueOf(validating_prop).booleanValue();
72  
73          setValidating(validating);
74      }
75  
76      /* ------------------------------------------------------------ */
77      /**
78       * Constructor.
79       */
80      public XmlParser(boolean validating)
81      {
82          setValidating(validating);
83      }
84      
85      /* ------------------------------------------------------------ */
86      public void setValidating(boolean validating)
87      {
88          try
89          {
90              SAXParserFactory factory = SAXParserFactory.newInstance();
91              factory.setValidating(validating);
92              _parser = factory.newSAXParser();
93              
94              try
95              {
96                  if (validating)
97                      _parser.getXMLReader().setFeature("http://apache.org/xml/features/validation/schema", validating);
98              }
99              catch (Exception e)
100             {
101                 if (validating)
102                     Log.warn("Schema validation may not be supported: ", e);
103                 else
104                     Log.ignore(e);
105             }
106 
107             _parser.getXMLReader().setFeature("http://xml.org/sax/features/validation", validating);
108             _parser.getXMLReader().setFeature("http://xml.org/sax/features/namespaces", true);
109             _parser.getXMLReader().setFeature("http://xml.org/sax/features/namespace-prefixes", false);  
110             _parser.getXMLReader().setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", validating);
111         }
112         catch (Exception e)
113         {
114             Log.warn(Log.EXCEPTION, e);
115             throw new Error(e.toString());
116         }
117     }
118     
119     /* ------------------------------------------------------------ */
120     /**
121      * @param name
122      * @param entity
123      */
124     public synchronized void redirectEntity(String name, URL entity)
125     {
126         if (entity != null)
127             _redirectMap.put(name, entity);
128     }
129 
130     /* ------------------------------------------------------------ */
131     /**
132      * 
133      * @return Returns the xpath.
134      */
135     public String getXpath()
136     {
137         return _xpath;
138     }
139 
140     /* ------------------------------------------------------------ */
141     /**
142      * Set an XPath A very simple subset of xpath is supported to select a partial tree. Currently
143      * only path like "/node1/nodeA | /node1/nodeB" are supported.
144      * 
145      * @param xpath The xpath to set.
146      */
147     public void setXpath(String xpath)
148     {
149         _xpath = xpath;
150         StringTokenizer tok = new StringTokenizer(xpath, "| ");
151         while (tok.hasMoreTokens())
152             _xpaths = LazyList.add(_xpaths, tok.nextToken());
153     }
154 
155     /* ------------------------------------------------------------ */
156     public String getDTD()
157     {
158         return _dtd;
159     }
160 
161     /* ------------------------------------------------------------ */
162     /**
163      * Add a ContentHandler. Add an additional _content handler that is triggered on a tag name. SAX
164      * events are passed to the ContentHandler provided from a matching start element to the
165      * corresponding end element. Only a single _content handler can be registered against each tag.
166      * 
167      * @param trigger Tag local or q name.
168      * @param observer SAX ContentHandler
169      */
170     public synchronized void addContentHandler(String trigger, ContentHandler observer)
171     {
172         if (_observerMap == null)
173             _observerMap = new HashMap();
174         _observerMap.put(trigger, observer);
175     }
176 
177     /* ------------------------------------------------------------ */
178     public synchronized Node parse(InputSource source) throws IOException, SAXException
179     {
180         _dtd=null;
181         Handler handler = new Handler();
182         XMLReader reader = _parser.getXMLReader();
183         reader.setContentHandler(handler);
184         reader.setErrorHandler(handler);
185         reader.setEntityResolver(handler);
186         if (Log.isDebugEnabled())
187             Log.debug("parsing: sid=" + source.getSystemId() + ",pid=" + source.getPublicId());
188         _parser.parse(source, handler);
189         if (handler._error != null)
190             throw handler._error;
191         Node doc = (Node) handler._top.get(0);
192         handler.clear();
193         return doc;
194     }
195 
196     /* ------------------------------------------------------------ */
197     /**
198      * Parse String URL.
199      */
200     public synchronized Node parse(String url) throws IOException, SAXException
201     {
202         if (Log.isDebugEnabled())
203             Log.debug("parse: " + url);
204         return parse(new InputSource(url));
205     }
206 
207     /* ------------------------------------------------------------ */
208     /**
209      * Parse File.
210      */
211     public synchronized Node parse(File file) throws IOException, SAXException
212     {
213         if (Log.isDebugEnabled())
214             Log.debug("parse: " + file);
215         return parse(new InputSource(file.toURL().toString()));
216     }
217 
218     /* ------------------------------------------------------------ */
219     /**
220      * Parse InputStream.
221      */
222     public synchronized Node parse(InputStream in) throws IOException, SAXException
223     {
224         _dtd=null;
225         Handler handler = new Handler();
226         XMLReader reader = _parser.getXMLReader();
227         reader.setContentHandler(handler);
228         reader.setErrorHandler(handler);
229         reader.setEntityResolver(handler);
230         _parser.parse(new InputSource(in), handler);
231         if (handler._error != null)
232             throw handler._error;
233         Node doc = (Node) handler._top.get(0);
234         handler.clear();
235         return doc;
236     }
237 
238     /* ------------------------------------------------------------ */
239     /* ------------------------------------------------------------ */
240     private class NoopHandler extends DefaultHandler
241     {
242         Handler _next;
243         int _depth;
244 
245         NoopHandler(Handler next)
246         {
247             this._next = next;
248         }
249 
250         /* ------------------------------------------------------------ */
251         public void startElement(String uri, String localName, String qName, Attributes attrs) throws SAXException
252         {
253             _depth++;
254         }
255 
256         /* ------------------------------------------------------------ */
257         public void endElement(String uri, String localName, String qName) throws SAXException
258         {
259             if (_depth == 0)
260                 _parser.getXMLReader().setContentHandler(_next);
261             else
262                 _depth--;
263         }
264     }
265     
266     /* ------------------------------------------------------------ */
267     /* ------------------------------------------------------------ */
268     private class Handler extends DefaultHandler
269     {
270         Node _top = new Node(null, null, null);
271         SAXParseException _error;
272         private Node _context = _top;
273         private NoopHandler _noop;
274 
275         Handler()
276         {
277             _noop = new NoopHandler(this);
278         }
279 
280         /* ------------------------------------------------------------ */
281         void clear()
282         {
283             _top = null;
284             _error = null;
285             _context = null;
286         }
287 
288         /* ------------------------------------------------------------ */
289         public void startElement(String uri, String localName, String qName, Attributes attrs) throws SAXException
290         {
291             String name = (uri == null || uri.equals("")) ? qName : localName;
292             Node node = new Node(_context, name, attrs);
293             
294 
295             // check if the node matches any xpaths set?
296             if (_xpaths != null)
297             {
298                 String path = node.getPath();
299                 boolean match = false;
300                 for (int i = LazyList.size(_xpaths); !match && i-- > 0;)
301                 {
302                     String xpath = (String) LazyList.get(_xpaths, i);
303 
304                     match = path.equals(xpath) || xpath.startsWith(path) && xpath.length() > path.length() && xpath.charAt(path.length()) == '/';
305                 }
306 
307                 if (match)
308                 {
309                     _context.add(node);
310                     _context = node;
311                 }
312                 else
313                 {
314                     _parser.getXMLReader().setContentHandler(_noop);
315                 }
316             }
317             else
318             {
319                 _context.add(node);
320                 _context = node;
321             }
322 
323             ContentHandler observer = null;
324             if (_observerMap != null)
325                 observer = (ContentHandler) _observerMap.get(name);
326             _observers.push(observer);
327 
328             for (int i = 0; i < _observers.size(); i++)
329                 if (_observers.get(i) != null)
330                     ((ContentHandler) _observers.get(i)).startElement(uri, localName, qName, attrs);
331         }
332 
333         /* ------------------------------------------------------------ */
334         public void endElement(String uri, String localName, String qName) throws SAXException
335         {
336             _context = _context._parent;
337             for (int i = 0; i < _observers.size(); i++)
338                 if (_observers.get(i) != null)
339                     ((ContentHandler) _observers.get(i)).endElement(uri, localName, qName);
340             _observers.pop();
341         }
342 
343         /* ------------------------------------------------------------ */
344         public void ignorableWhitespace(char buf[], int offset, int len) throws SAXException
345         {
346             for (int i = 0; i < _observers.size(); i++)
347                 if (_observers.get(i) != null)
348                     ((ContentHandler) _observers.get(i)).ignorableWhitespace(buf, offset, len);
349         }
350 
351         /* ------------------------------------------------------------ */
352         public void characters(char buf[], int offset, int len) throws SAXException
353         {
354             _context.add(new String(buf, offset, len));
355             for (int i = 0; i < _observers.size(); i++)
356                 if (_observers.get(i) != null)
357                     ((ContentHandler) _observers.get(i)).characters(buf, offset, len);
358         }
359 
360         /* ------------------------------------------------------------ */
361         public void warning(SAXParseException ex)
362         {
363             Log.debug(Log.EXCEPTION, ex);
364             Log.warn("WARNING@" + getLocationString(ex) + " : " + ex.toString());
365         }
366 
367         /* ------------------------------------------------------------ */
368         public void error(SAXParseException ex) throws SAXException
369         {
370             // Save error and continue to report other errors
371             if (_error == null)
372                 _error = ex;
373             Log.debug(Log.EXCEPTION, ex);
374             Log.warn("ERROR@" + getLocationString(ex) + " : " + ex.toString());
375         }
376 
377         /* ------------------------------------------------------------ */
378         public void fatalError(SAXParseException ex) throws SAXException
379         {
380             _error = ex;
381             Log.debug(Log.EXCEPTION, ex);
382             Log.warn("FATAL@" + getLocationString(ex) + " : " + ex.toString());
383             throw ex;
384         }
385 
386         /* ------------------------------------------------------------ */
387         private String getLocationString(SAXParseException ex)
388         {
389             return ex.getSystemId() + " line:" + ex.getLineNumber() + " col:" + ex.getColumnNumber();
390         }
391 
392         /* ------------------------------------------------------------ */
393         public InputSource resolveEntity(String pid, String sid)
394         {
395             if (Log.isDebugEnabled())
396                 Log.debug("resolveEntity(" + pid + ", " + sid + ")");
397             
398             if (sid!=null && sid.endsWith(".dtd"))
399                 _dtd=sid;
400             
401             URL entity = null;
402             if (pid != null)
403                 entity = (URL) _redirectMap.get(pid);
404             if (entity == null)
405                 entity = (URL) _redirectMap.get(sid);
406             if (entity == null)
407             {
408                 String dtd = sid;
409                 if (dtd.lastIndexOf('/') >= 0)
410                     dtd = dtd.substring(dtd.lastIndexOf('/') + 1);
411 
412                 if (Log.isDebugEnabled())
413                     Log.debug("Can't exact match entity in redirect map, trying " + dtd);
414                 entity = (URL) _redirectMap.get(dtd);
415             }
416 
417             if (entity != null)
418             {
419                 try
420                 {
421                     InputStream in = entity.openStream();
422                     if (Log.isDebugEnabled())
423                         Log.debug("Redirected entity " + sid + " --> " + entity);
424                     InputSource is = new InputSource(in);
425                     is.setSystemId(sid);
426                     return is;
427                 }
428                 catch (IOException e)
429                 {
430                     Log.ignore(e);
431                 }
432             }
433             return null;
434         }
435     }
436 
437     /* ------------------------------------------------------------ */
438     /* ------------------------------------------------------------ */
439     /**
440      * XML Attribute.
441      */
442     public static class Attribute
443     {
444         private String _name;
445         private String _value;
446 
447         Attribute(String n, String v)
448         {
449             _name = n;
450             _value = v;
451         }
452 
453         public String getName()
454         {
455             return _name;
456         }
457 
458         public String getValue()
459         {
460             return _value;
461         }
462     }
463 
464     /* ------------------------------------------------------------ */
465     /* ------------------------------------------------------------ */
466     /**
467      * XML Node. Represents an XML element with optional attributes and ordered content.
468      */
469     public static class Node extends AbstractList
470     {
471         Node _parent;
472         private ArrayList _list;
473         private String _tag;
474         private Attribute[] _attrs;
475         private boolean _lastString = false;
476         private String _path;
477 
478         /* ------------------------------------------------------------ */
479         Node(Node parent, String tag, Attributes attrs)
480         {
481             _parent = parent;
482             _tag = tag;
483 
484             if (attrs != null)
485             {
486                 _attrs = new Attribute[attrs.getLength()];
487                 for (int i = 0; i < attrs.getLength(); i++)
488                 {
489                     String name = attrs.getLocalName(i);
490                     if (name == null || name.equals(""))
491                         name = attrs.getQName(i);
492                     _attrs[i] = new Attribute(name, attrs.getValue(i));
493                 }
494             }
495         }
496 
497         /* ------------------------------------------------------------ */
498         public Node getParent()
499         {
500             return _parent;
501         }
502 
503         /* ------------------------------------------------------------ */
504         public String getTag()
505         {
506             return _tag;
507         }
508 
509         /* ------------------------------------------------------------ */
510         public String getPath()
511         {
512             if (_path == null)
513             {
514                 if (getParent() != null && getParent().getTag() != null)
515                     _path = getParent().getPath() + "/" + _tag;
516                 else
517                     _path = "/" + _tag;
518             }
519             return _path;
520         }
521 
522         /* ------------------------------------------------------------ */
523         /**
524          * Get an array of element attributes.
525          */
526         public Attribute[] getAttributes()
527         {
528             return _attrs;
529         }
530 
531         /* ------------------------------------------------------------ */
532         /**
533          * Get an element attribute.
534          * 
535          * @return attribute or null.
536          */
537         public String getAttribute(String name)
538         {
539             return getAttribute(name, null);
540         }
541 
542         /* ------------------------------------------------------------ */
543         /**
544          * Get an element attribute.
545          * 
546          * @return attribute or null.
547          */
548         public String getAttribute(String name, String dft)
549         {
550             if (_attrs == null || name == null)
551                 return dft;
552             for (int i = 0; i < _attrs.length; i++)
553                 if (name.equals(_attrs[i].getName()))
554                     return _attrs[i].getValue();
555             return dft;
556         }
557 
558         /* ------------------------------------------------------------ */
559         /**
560          * Get the number of children nodes.
561          */
562         public int size()
563         {
564             if (_list != null)
565                 return _list.size();
566             return 0;
567         }
568 
569         /* ------------------------------------------------------------ */
570         /**
571          * Get the ith child node or content.
572          * 
573          * @return Node or String.
574          */
575         public Object get(int i)
576         {
577             if (_list != null)
578                 return _list.get(i);
579             return null;
580         }
581 
582         /* ------------------------------------------------------------ */
583         /**
584          * Get the first child node with the tag.
585          * 
586          * @param tag
587          * @return Node or null.
588          */
589         public Node get(String tag)
590         {
591             if (_list != null)
592             {
593                 for (int i = 0; i < _list.size(); i++)
594                 {
595                     Object o = _list.get(i);
596                     if (o instanceof Node)
597                     {
598                         Node n = (Node) o;
599                         if (tag.equals(n._tag))
600                             return n;
601                     }
602                 }
603             }
604             return null;
605         }
606 
607         /* ------------------------------------------------------------ */
608         @Override
609         public void add(int i, Object o)
610         {
611             if (_list == null)
612                 _list = new ArrayList();
613             if (o instanceof String)
614             {
615                 if (_lastString)
616                 {
617                     int last = _list.size() - 1;
618                     _list.set(last, (String) _list.get(last) + o);
619                 }
620                 else
621                     _list.add(i, o);
622                 _lastString = true;
623             }
624             else
625             {
626                 _lastString = false;
627                 _list.add(i, o);
628             }
629         }
630 
631         /* ------------------------------------------------------------ */
632         public void clear()
633         {
634             if (_list != null)
635                 _list.clear();
636             _list = null;
637         }
638 
639         /* ------------------------------------------------------------ */
640         /**
641          * Get a tag as a string.
642          * 
643          * @param tag The tag to get
644          * @param tags IF true, tags are included in the value.
645          * @param trim If true, trim the value.
646          * @return results of get(tag).toString(tags).
647          */
648         public String getString(String tag, boolean tags, boolean trim)
649         {
650             Node node = get(tag);
651             if (node == null)
652                 return null;
653             String s = node.toString(tags);
654             if (s != null && trim)
655                 s = s.trim();
656             return s;
657         }
658 
659         /* ------------------------------------------------------------ */
660         public synchronized String toString()
661         {
662             return toString(true);
663         }
664 
665         /* ------------------------------------------------------------ */
666         /**
667          * Convert to a string.
668          * 
669          * @param tag If false, only _content is shown.
670          */
671         public synchronized String toString(boolean tag)
672         {
673             StringBuilder buf = new StringBuilder();
674             toString(buf, tag);
675             return buf.toString();
676         }
677 
678         /* ------------------------------------------------------------ */
679         /**
680          * Convert to a string.
681          * 
682          * @param tag If false, only _content is shown.
683          */
684         public synchronized String toString(boolean tag, boolean trim)
685         {
686             String s = toString(tag);
687             if (s != null && trim)
688                 s = s.trim();
689             return s;
690         }
691 
692         /* ------------------------------------------------------------ */
693         private synchronized void toString(StringBuilder buf, boolean tag)
694         {
695             if (tag)
696             {
697                 buf.append("<");
698                 buf.append(_tag);
699 
700                 if (_attrs != null)
701                 {
702                     for (int i = 0; i < _attrs.length; i++)
703                     {
704                         buf.append(' ');
705                         buf.append(_attrs[i].getName());
706                         buf.append("=\"");
707                         buf.append(_attrs[i].getValue());
708                         buf.append("\"");
709                     }
710                 }
711             }
712 
713             if (_list != null)
714             {
715                 if (tag)
716                     buf.append(">");
717                 for (int i = 0; i < _list.size(); i++)
718                 {
719                     Object o = _list.get(i);
720                     if (o == null)
721                         continue;
722                     if (o instanceof Node)
723                         ((Node) o).toString(buf, tag);
724                     else
725                         buf.append(o.toString());
726                 }
727                 if (tag)
728                 {
729                     buf.append("</");
730                     buf.append(_tag);
731                     buf.append(">");
732                 }
733             }
734             else if (tag)
735                 buf.append("/>");
736         }
737 
738         /* ------------------------------------------------------------ */
739         /**
740          * Iterator over named child nodes.
741          * 
742          * @param tag The tag of the nodes.
743          * @return Iterator over all child nodes with the specified tag.
744          */
745         public Iterator<Node> iterator(final String tag)
746         {
747             return new Iterator<Node>()
748             {
749                 int c = 0;
750                 Node _node;
751 
752                 /* -------------------------------------------------- */
753                 public boolean hasNext()
754                 {
755                     if (_node != null)
756                         return true;
757                     while (_list != null && c < _list.size())
758                     {
759                         Object o = _list.get(c);
760                         if (o instanceof Node)
761                         {
762                             Node n = (Node) o;
763                             if (tag.equals(n._tag))
764                             {
765                                 _node = n;
766                                 return true;
767                             }
768                         }
769                         c++;
770                     }
771                     return false;
772                 }
773 
774                 /* -------------------------------------------------- */
775                 public Node next()
776                 {
777                     try
778                     {
779                         if (hasNext())
780                             return _node;
781                         throw new NoSuchElementException();
782                     }
783                     finally
784                     {
785                         _node = null;
786                         c++;
787                     }
788                 }
789 
790                 /* -------------------------------------------------- */
791                 public void remove()
792                 {
793                     throw new UnsupportedOperationException("Not supported");
794                 }
795             };
796         }
797     }
798 }