View Javadoc

1   // ========================================================================
2   // Copyright (c) 2004-2009 Mort Bay Consulting Pty. Ltd.
3   // ------------------------------------------------------------------------
4   // All rights reserved. This program and the accompanying materials
5   // are made available under the terms of the Eclipse Public License v1.0
6   // and Apache License v2.0 which accompanies this distribution.
7   // The Eclipse Public License is available at 
8   // http://www.eclipse.org/legal/epl-v10.html
9   // The Apache License v2.0 is available at
10  // http://www.opensource.org/licenses/apache2.0.php
11  // You may elect to redistribute this code under either of these licenses. 
12  // ========================================================================
13  
14  package org.eclipse.jetty.xml;
15  
16  import java.io.File;
17  import java.io.IOException;
18  import java.io.InputStream;
19  import java.net.URL;
20  import java.util.AbstractList;
21  import java.util.ArrayList;
22  import java.util.HashMap;
23  import java.util.Iterator;
24  import java.util.Map;
25  import java.util.NoSuchElementException;
26  import java.util.Stack;
27  import java.util.StringTokenizer;
28  
29  import javax.xml.parsers.SAXParser;
30  import javax.xml.parsers.SAXParserFactory;
31  
32  import org.eclipse.jetty.util.LazyList;
33  import org.eclipse.jetty.util.log.Log;
34  import org.eclipse.jetty.util.resource.Resource;
35  import org.xml.sax.Attributes;
36  import org.xml.sax.ContentHandler;
37  import org.xml.sax.InputSource;
38  import org.xml.sax.SAXException;
39  import org.xml.sax.SAXParseException;
40  import org.xml.sax.XMLReader;
41  import org.xml.sax.helpers.DefaultHandler;
42  
43  /*--------------------------------------------------------------*/
44  /**
45   * XML Parser wrapper. This class wraps any standard JAXP1.1 parser with convieniant error and
46   * entity handlers and a mini dom-like document tree.
47   * <P>
48   * By default, the parser is created as a validating parser only if xerces is present. This can be 
49   * configured by setting the "org.eclipse.jetty.xml.XmlParser.Validating" system property.
50   * 
51   * 
52   */
53  public class XmlParser
54  {
55      private Map<String,URL> _redirectMap = new HashMap<String,URL>();
56      private SAXParser _parser;
57      private Map<String,ContentHandler> _observerMap;
58      private Stack<ContentHandler> _observers = new Stack<ContentHandler>();
59      private String _xpath;
60      private Object _xpaths;
61      private String _dtd;
62  
63      /* ------------------------------------------------------------ */
64      /**
65       * Construct
66       */
67      public XmlParser()
68      {
69          SAXParserFactory factory = SAXParserFactory.newInstance();
70          boolean validating_dft = factory.getClass().toString().startsWith("org.apache.xerces.");
71          String validating_prop = System.getProperty("org.eclipse.jetty.xml.XmlParser.Validating", validating_dft ? "true" : "false");
72          boolean validating = Boolean.valueOf(validating_prop).booleanValue();
73          setValidating(validating);
74      }
75  
76      /* ------------------------------------------------------------ */
77      /**
78       * Constructor.
79       */
80      public XmlParser(boolean validating)
81      {
82          setValidating(validating);
83      }
84      
85      /* ------------------------------------------------------------ */
86      public void setValidating(boolean validating)
87      {
88          try
89          {
90              SAXParserFactory factory = SAXParserFactory.newInstance();
91              factory.setValidating(validating);
92              _parser = factory.newSAXParser();
93              
94              try
95              {
96                  if (validating)
97                      _parser.getXMLReader().setFeature("http://apache.org/xml/features/validation/schema", validating);
98              }
99              catch (Exception e)
100             {
101                 if (validating)
102                     Log.warn("Schema validation may not be supported: ", e);
103                 else
104                     Log.ignore(e);
105             }
106 
107             _parser.getXMLReader().setFeature("http://xml.org/sax/features/validation", validating);
108             _parser.getXMLReader().setFeature("http://xml.org/sax/features/namespaces", true);
109             _parser.getXMLReader().setFeature("http://xml.org/sax/features/namespace-prefixes", false);  
110             try
111             {
112                 if (validating)
113                     _parser.getXMLReader().setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", validating);
114             }
115             catch (Exception e)
116             {
117                 Log.warn(e.getMessage());
118             }
119         }
120         catch (Exception e)
121         {
122             Log.warn(Log.EXCEPTION, e);
123             throw new Error(e.toString());
124         }
125     }
126     
127     /* ------------------------------------------------------------ */
128     /**
129      * @param name
130      * @param entity
131      */
132     public synchronized void redirectEntity(String name, URL entity)
133     {
134         if (entity != null)
135             _redirectMap.put(name, entity);
136     }
137 
138     /* ------------------------------------------------------------ */
139     /**
140      * 
141      * @return Returns the xpath.
142      */
143     public String getXpath()
144     {
145         return _xpath;
146     }
147 
148     /* ------------------------------------------------------------ */
149     /**
150      * Set an XPath A very simple subset of xpath is supported to select a partial tree. Currently
151      * only path like "/node1/nodeA | /node1/nodeB" are supported.
152      * 
153      * @param xpath The xpath to set.
154      */
155     public void setXpath(String xpath)
156     {
157         _xpath = xpath;
158         StringTokenizer tok = new StringTokenizer(xpath, "| ");
159         while (tok.hasMoreTokens())
160             _xpaths = LazyList.add(_xpaths, tok.nextToken());
161     }
162 
163     /* ------------------------------------------------------------ */
164     public String getDTD()
165     {
166         return _dtd;
167     }
168 
169     /* ------------------------------------------------------------ */
170     /**
171      * Add a ContentHandler. Add an additional _content handler that is triggered on a tag name. SAX
172      * events are passed to the ContentHandler provided from a matching start element to the
173      * corresponding end element. Only a single _content handler can be registered against each tag.
174      * 
175      * @param trigger Tag local or q name.
176      * @param observer SAX ContentHandler
177      */
178     public synchronized void addContentHandler(String trigger, ContentHandler observer)
179     {
180         if (_observerMap == null)
181             _observerMap = new HashMap();
182         _observerMap.put(trigger, observer);
183     }
184 
185     /* ------------------------------------------------------------ */
186     public synchronized Node parse(InputSource source) throws IOException, SAXException
187     {
188         _dtd=null;
189         Handler handler = new Handler();
190         XMLReader reader = _parser.getXMLReader();
191         reader.setContentHandler(handler);
192         reader.setErrorHandler(handler);
193         reader.setEntityResolver(handler);
194         if (Log.isDebugEnabled())
195             Log.debug("parsing: sid=" + source.getSystemId() + ",pid=" + source.getPublicId());
196         _parser.parse(source, handler);
197         if (handler._error != null)
198             throw handler._error;
199         Node doc = (Node) handler._top.get(0);
200         handler.clear();
201         return doc;
202     }
203 
204     /* ------------------------------------------------------------ */
205     /**
206      * Parse String URL.
207      */
208     public synchronized Node parse(String url) throws IOException, SAXException
209     {
210         if (Log.isDebugEnabled())
211             Log.debug("parse: " + url);
212         return parse(new InputSource(url));
213     }
214 
215     /* ------------------------------------------------------------ */
216     /**
217      * Parse File.
218      */
219     public synchronized Node parse(File file) throws IOException, SAXException
220     {
221         if (Log.isDebugEnabled())
222             Log.debug("parse: " + file);
223         return parse(new InputSource(Resource.toURL(file).toString()));
224     }
225 
226     /* ------------------------------------------------------------ */
227     /**
228      * Parse InputStream.
229      */
230     public synchronized Node parse(InputStream in) throws IOException, SAXException
231     {
232         _dtd=null;
233         Handler handler = new Handler();
234         XMLReader reader = _parser.getXMLReader();
235         reader.setContentHandler(handler);
236         reader.setErrorHandler(handler);
237         reader.setEntityResolver(handler);
238         _parser.parse(new InputSource(in), handler);
239         if (handler._error != null)
240             throw handler._error;
241         Node doc = (Node) handler._top.get(0);
242         handler.clear();
243         return doc;
244     }
245 
246     /* ------------------------------------------------------------ */
247     /* ------------------------------------------------------------ */
248     private class NoopHandler extends DefaultHandler
249     {
250         Handler _next;
251         int _depth;
252 
253         NoopHandler(Handler next)
254         {
255             this._next = next;
256         }
257 
258         /* ------------------------------------------------------------ */
259         public void startElement(String uri, String localName, String qName, Attributes attrs) throws SAXException
260         {
261             _depth++;
262         }
263 
264         /* ------------------------------------------------------------ */
265         public void endElement(String uri, String localName, String qName) throws SAXException
266         {
267             if (_depth == 0)
268                 _parser.getXMLReader().setContentHandler(_next);
269             else
270                 _depth--;
271         }
272     }
273     
274     /* ------------------------------------------------------------ */
275     /* ------------------------------------------------------------ */
276     private class Handler extends DefaultHandler
277     {
278         Node _top = new Node(null, null, null);
279         SAXParseException _error;
280         private Node _context = _top;
281         private NoopHandler _noop;
282 
283         Handler()
284         {
285             _noop = new NoopHandler(this);
286         }
287 
288         /* ------------------------------------------------------------ */
289         void clear()
290         {
291             _top = null;
292             _error = null;
293             _context = null;
294         }
295 
296         /* ------------------------------------------------------------ */
297         public void startElement(String uri, String localName, String qName, Attributes attrs) throws SAXException
298         {
299             String name = null;
300             if (_parser.isNamespaceAware())
301                 name = localName;
302 
303             if (name == null || "".equals(name))
304                 name = qName;
305 
306             Node node = new Node(_context, name, attrs);
307             
308 
309             // check if the node matches any xpaths set?
310             if (_xpaths != null)
311             {
312                 String path = node.getPath();
313                 boolean match = false;
314                 for (int i = LazyList.size(_xpaths); !match && i-- > 0;)
315                 {
316                     String xpath = (String) LazyList.get(_xpaths, i);
317 
318                     match = path.equals(xpath) || xpath.startsWith(path) && xpath.length() > path.length() && xpath.charAt(path.length()) == '/';
319                 }
320 
321                 if (match)
322                 {
323                     _context.add(node);
324                     _context = node;
325                 }
326                 else
327                 {
328                     _parser.getXMLReader().setContentHandler(_noop);
329                 }
330             }
331             else
332             {
333                 _context.add(node);
334                 _context = node;
335             }
336 
337             ContentHandler observer = null;
338             if (_observerMap != null)
339                 observer = (ContentHandler) _observerMap.get(name);
340             _observers.push(observer);
341 
342             for (int i = 0; i < _observers.size(); i++)
343                 if (_observers.get(i) != null)
344                     ((ContentHandler) _observers.get(i)).startElement(uri, localName, qName, attrs);
345         }
346 
347         /* ------------------------------------------------------------ */
348         public void endElement(String uri, String localName, String qName) throws SAXException
349         {
350             _context = _context._parent;
351             for (int i = 0; i < _observers.size(); i++)
352                 if (_observers.get(i) != null)
353                     ((ContentHandler) _observers.get(i)).endElement(uri, localName, qName);
354             _observers.pop();
355         }
356 
357         /* ------------------------------------------------------------ */
358         public void ignorableWhitespace(char buf[], int offset, int len) throws SAXException
359         {
360             for (int i = 0; i < _observers.size(); i++)
361                 if (_observers.get(i) != null)
362                     ((ContentHandler) _observers.get(i)).ignorableWhitespace(buf, offset, len);
363         }
364 
365         /* ------------------------------------------------------------ */
366         public void characters(char buf[], int offset, int len) throws SAXException
367         {
368             _context.add(new String(buf, offset, len));
369             for (int i = 0; i < _observers.size(); i++)
370                 if (_observers.get(i) != null)
371                     ((ContentHandler) _observers.get(i)).characters(buf, offset, len);
372         }
373 
374         /* ------------------------------------------------------------ */
375         public void warning(SAXParseException ex)
376         {
377             Log.debug(Log.EXCEPTION, ex);
378             Log.warn("WARNING@" + getLocationString(ex) + " : " + ex.toString());
379         }
380 
381         /* ------------------------------------------------------------ */
382         public void error(SAXParseException ex) throws SAXException
383         {
384             // Save error and continue to report other errors
385             if (_error == null)
386                 _error = ex;
387             Log.debug(Log.EXCEPTION, ex);
388             Log.warn("ERROR@" + getLocationString(ex) + " : " + ex.toString());
389         }
390 
391         /* ------------------------------------------------------------ */
392         public void fatalError(SAXParseException ex) throws SAXException
393         {
394             _error = ex;
395             Log.debug(Log.EXCEPTION, ex);
396             Log.warn("FATAL@" + getLocationString(ex) + " : " + ex.toString());
397             throw ex;
398         }
399 
400         /* ------------------------------------------------------------ */
401         private String getLocationString(SAXParseException ex)
402         {
403             return ex.getSystemId() + " line:" + ex.getLineNumber() + " col:" + ex.getColumnNumber();
404         }
405 
406         /* ------------------------------------------------------------ */
407         public InputSource resolveEntity(String pid, String sid)
408         {
409             if (Log.isDebugEnabled())
410                 Log.debug("resolveEntity(" + pid + ", " + sid + ")");
411             
412             if (sid!=null && sid.endsWith(".dtd"))
413                 _dtd=sid;
414             
415             URL entity = null;
416             if (pid != null)
417                 entity = (URL) _redirectMap.get(pid);
418             if (entity == null)
419                 entity = (URL) _redirectMap.get(sid);
420             if (entity == null)
421             {
422                 String dtd = sid;
423                 if (dtd.lastIndexOf('/') >= 0)
424                     dtd = dtd.substring(dtd.lastIndexOf('/') + 1);
425 
426                 if (Log.isDebugEnabled())
427                     Log.debug("Can't exact match entity in redirect map, trying " + dtd);
428                 entity = (URL) _redirectMap.get(dtd);
429             }
430 
431             if (entity != null)
432             {
433                 try
434                 {
435                     InputStream in = entity.openStream();
436                     if (Log.isDebugEnabled())
437                         Log.debug("Redirected entity " + sid + " --> " + entity);
438                     InputSource is = new InputSource(in);
439                     is.setSystemId(sid);
440                     return is;
441                 }
442                 catch (IOException e)
443                 {
444                     Log.ignore(e);
445                 }
446             }
447             return null;
448         }
449     }
450 
451     /* ------------------------------------------------------------ */
452     /* ------------------------------------------------------------ */
453     /**
454      * XML Attribute.
455      */
456     public static class Attribute
457     {
458         private String _name;
459         private String _value;
460 
461         Attribute(String n, String v)
462         {
463             _name = n;
464             _value = v;
465         }
466 
467         public String getName()
468         {
469             return _name;
470         }
471 
472         public String getValue()
473         {
474             return _value;
475         }
476     }
477 
478     /* ------------------------------------------------------------ */
479     /* ------------------------------------------------------------ */
480     /**
481      * XML Node. Represents an XML element with optional attributes and ordered content.
482      */
483     public static class Node extends AbstractList<Object>
484     {
485         Node _parent;
486         private ArrayList<Object> _list;
487         private String _tag;
488         private Attribute[] _attrs;
489         private boolean _lastString = false;
490         private String _path;
491 
492         /* ------------------------------------------------------------ */
493         Node(Node parent, String tag, Attributes attrs)
494         {
495             _parent = parent;
496             _tag = tag;
497 
498             if (attrs != null)
499             {
500                 _attrs = new Attribute[attrs.getLength()];
501                 for (int i = 0; i < attrs.getLength(); i++)
502                 {
503                     String name = attrs.getLocalName(i);
504                     if (name == null || name.equals(""))
505                         name = attrs.getQName(i);
506                     _attrs[i] = new Attribute(name, attrs.getValue(i));
507                 }
508             }
509         }
510 
511         /* ------------------------------------------------------------ */
512         public Node getParent()
513         {
514             return _parent;
515         }
516 
517         /* ------------------------------------------------------------ */
518         public String getTag()
519         {
520             return _tag;
521         }
522 
523         /* ------------------------------------------------------------ */
524         public String getPath()
525         {
526             if (_path == null)
527             {
528                 if (getParent() != null && getParent().getTag() != null)
529                     _path = getParent().getPath() + "/" + _tag;
530                 else
531                     _path = "/" + _tag;
532             }
533             return _path;
534         }
535 
536         /* ------------------------------------------------------------ */
537         /**
538          * Get an array of element attributes.
539          */
540         public Attribute[] getAttributes()
541         {
542             return _attrs;
543         }
544 
545         /* ------------------------------------------------------------ */
546         /**
547          * Get an element attribute.
548          * 
549          * @return attribute or null.
550          */
551         public String getAttribute(String name)
552         {
553             return getAttribute(name, null);
554         }
555 
556         /* ------------------------------------------------------------ */
557         /**
558          * Get an element attribute.
559          * 
560          * @return attribute or null.
561          */
562         public String getAttribute(String name, String dft)
563         {
564             if (_attrs == null || name == null)
565                 return dft;
566             for (int i = 0; i < _attrs.length; i++)
567                 if (name.equals(_attrs[i].getName()))
568                     return _attrs[i].getValue();
569             return dft;
570         }
571 
572         /* ------------------------------------------------------------ */
573         /**
574          * Get the number of children nodes.
575          */
576         public int size()
577         {
578             if (_list != null)
579                 return _list.size();
580             return 0;
581         }
582 
583         /* ------------------------------------------------------------ */
584         /**
585          * Get the ith child node or content.
586          * 
587          * @return Node or String.
588          */
589         public Object get(int i)
590         {
591             if (_list != null)
592                 return _list.get(i);
593             return null;
594         }
595 
596         /* ------------------------------------------------------------ */
597         /**
598          * Get the first child node with the tag.
599          * 
600          * @param tag
601          * @return Node or null.
602          */
603         public Node get(String tag)
604         {
605             if (_list != null)
606             {
607                 for (int i = 0; i < _list.size(); i++)
608                 {
609                     Object o = _list.get(i);
610                     if (o instanceof Node)
611                     {
612                         Node n = (Node) o;
613                         if (tag.equals(n._tag))
614                             return n;
615                     }
616                 }
617             }
618             return null;
619         }
620 
621         /* ------------------------------------------------------------ */
622         @Override
623         public void add(int i, Object o)
624         {
625             if (_list == null)
626                 _list = new ArrayList<Object>();
627             if (o instanceof String)
628             {
629                 if (_lastString)
630                 {
631                     int last = _list.size() - 1;
632                     _list.set(last, (String) _list.get(last) + o);
633                 }
634                 else
635                     _list.add(i, o);
636                 _lastString = true;
637             }
638             else
639             {
640                 _lastString = false;
641                 _list.add(i, o);
642             }
643         }
644 
645         /* ------------------------------------------------------------ */
646         public void clear()
647         {
648             if (_list != null)
649                 _list.clear();
650             _list = null;
651         }
652 
653         /* ------------------------------------------------------------ */
654         /**
655          * Get a tag as a string.
656          * 
657          * @param tag The tag to get
658          * @param tags IF true, tags are included in the value.
659          * @param trim If true, trim the value.
660          * @return results of get(tag).toString(tags).
661          */
662         public String getString(String tag, boolean tags, boolean trim)
663         {
664             Node node = get(tag);
665             if (node == null)
666                 return null;
667             String s = node.toString(tags);
668             if (s != null && trim)
669                 s = s.trim();
670             return s;
671         }
672 
673         /* ------------------------------------------------------------ */
674         public synchronized String toString()
675         {
676             return toString(true);
677         }
678 
679         /* ------------------------------------------------------------ */
680         /**
681          * Convert to a string.
682          * 
683          * @param tag If false, only _content is shown.
684          */
685         public synchronized String toString(boolean tag)
686         {
687             StringBuilder buf = new StringBuilder();
688             toString(buf, tag);
689             return buf.toString();
690         }
691 
692         /* ------------------------------------------------------------ */
693         /**
694          * Convert to a string.
695          * 
696          * @param tag If false, only _content is shown.
697          */
698         public synchronized String toString(boolean tag, boolean trim)
699         {
700             String s = toString(tag);
701             if (s != null && trim)
702                 s = s.trim();
703             return s;
704         }
705 
706         /* ------------------------------------------------------------ */
707         private synchronized void toString(StringBuilder buf, boolean tag)
708         {
709             if (tag)
710             {
711                 buf.append("<");
712                 buf.append(_tag);
713 
714                 if (_attrs != null)
715                 {
716                     for (int i = 0; i < _attrs.length; i++)
717                     {
718                         buf.append(' ');
719                         buf.append(_attrs[i].getName());
720                         buf.append("=\"");
721                         buf.append(_attrs[i].getValue());
722                         buf.append("\"");
723                     }
724                 }
725             }
726 
727             if (_list != null)
728             {
729                 if (tag)
730                     buf.append(">");
731                 for (int i = 0; i < _list.size(); i++)
732                 {
733                     Object o = _list.get(i);
734                     if (o == null)
735                         continue;
736                     if (o instanceof Node)
737                         ((Node) o).toString(buf, tag);
738                     else
739                         buf.append(o.toString());
740                 }
741                 if (tag)
742                 {
743                     buf.append("</");
744                     buf.append(_tag);
745                     buf.append(">");
746                 }
747             }
748             else if (tag)
749                 buf.append("/>");
750         }
751 
752         /* ------------------------------------------------------------ */
753         /**
754          * Iterator over named child nodes.
755          * 
756          * @param tag The tag of the nodes.
757          * @return Iterator over all child nodes with the specified tag.
758          */
759         public Iterator<Node> iterator(final String tag)
760         {
761             return new Iterator<Node>()
762             {
763                 int c = 0;
764                 Node _node;
765 
766                 /* -------------------------------------------------- */
767                 public boolean hasNext()
768                 {
769                     if (_node != null)
770                         return true;
771                     while (_list != null && c < _list.size())
772                     {
773                         Object o = _list.get(c);
774                         if (o instanceof Node)
775                         {
776                             Node n = (Node) o;
777                             if (tag.equals(n._tag))
778                             {
779                                 _node = n;
780                                 return true;
781                             }
782                         }
783                         c++;
784                     }
785                     return false;
786                 }
787 
788                 /* -------------------------------------------------- */
789                 public Node next()
790                 {
791                     try
792                     {
793                         if (hasNext())
794                             return _node;
795                         throw new NoSuchElementException();
796                     }
797                     finally
798                     {
799                         _node = null;
800                         c++;
801                     }
802                 }
803 
804                 /* -------------------------------------------------- */
805                 public void remove()
806                 {
807                     throw new UnsupportedOperationException("Not supported");
808                 }
809             };
810         }
811     }
812 }