1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.eclipse.jetty.xml;
20
21 import java.io.File;
22 import java.io.IOException;
23 import java.io.InputStream;
24 import java.net.URL;
25 import java.util.AbstractList;
26 import java.util.ArrayList;
27 import java.util.HashMap;
28 import java.util.Iterator;
29 import java.util.Map;
30 import java.util.NoSuchElementException;
31 import java.util.Stack;
32 import java.util.StringTokenizer;
33
34 import javax.xml.parsers.SAXParser;
35 import javax.xml.parsers.SAXParserFactory;
36
37 import org.eclipse.jetty.util.LazyList;
38 import org.eclipse.jetty.util.log.Log;
39 import org.eclipse.jetty.util.log.Logger;
40 import org.eclipse.jetty.util.resource.Resource;
41 import org.xml.sax.Attributes;
42 import org.xml.sax.ContentHandler;
43 import org.xml.sax.InputSource;
44 import org.xml.sax.SAXException;
45 import org.xml.sax.SAXParseException;
46 import org.xml.sax.XMLReader;
47 import org.xml.sax.helpers.DefaultHandler;
48
49
50
51
52
53
54
55
56
57
58
59 public class XmlParser
60 {
61 private static final Logger LOG = Log.getLogger(XmlParser.class);
62
63 private Map<String,URL> _redirectMap = new HashMap<String,URL>();
64 private SAXParser _parser;
65 private Map<String,ContentHandler> _observerMap;
66 private Stack<ContentHandler> _observers = new Stack<ContentHandler>();
67 private String _xpath;
68 private Object _xpaths;
69 private String _dtd;
70
71
72
73
74
75 public XmlParser()
76 {
77 SAXParserFactory factory = SAXParserFactory.newInstance();
78 boolean validating_dft = factory.getClass().toString().startsWith("org.apache.xerces.");
79 String validating_prop = System.getProperty("org.eclipse.jetty.xml.XmlParser.Validating", validating_dft ? "true" : "false");
80 boolean validating = Boolean.valueOf(validating_prop).booleanValue();
81 setValidating(validating);
82 }
83
84
85
86
87
88 public XmlParser(boolean validating)
89 {
90 setValidating(validating);
91 }
92
93
94 public void setValidating(boolean validating)
95 {
96 try
97 {
98 SAXParserFactory factory = SAXParserFactory.newInstance();
99 factory.setValidating(validating);
100 _parser = factory.newSAXParser();
101
102 try
103 {
104 if (validating)
105 _parser.getXMLReader().setFeature("http://apache.org/xml/features/validation/schema", validating);
106 }
107 catch (Exception e)
108 {
109 if (validating)
110 LOG.warn("Schema validation may not be supported: ", e);
111 else
112 LOG.ignore(e);
113 }
114
115 _parser.getXMLReader().setFeature("http://xml.org/sax/features/validation", validating);
116 _parser.getXMLReader().setFeature("http://xml.org/sax/features/namespaces", true);
117 _parser.getXMLReader().setFeature("http://xml.org/sax/features/namespace-prefixes", false);
118 try
119 {
120 if (validating)
121 _parser.getXMLReader().setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", validating);
122 }
123 catch (Exception e)
124 {
125 LOG.warn(e.getMessage());
126 }
127 }
128 catch (Exception e)
129 {
130 LOG.warn(Log.EXCEPTION, e);
131 throw new Error(e.toString());
132 }
133 }
134
135
136 public boolean isValidating()
137 {
138 return _parser.isValidating();
139 }
140
141
142
143
144
145
146 public synchronized void redirectEntity(String name, URL entity)
147 {
148 if (entity != null)
149 _redirectMap.put(name, entity);
150 }
151
152
153
154
155
156
157 public String getXpath()
158 {
159 return _xpath;
160 }
161
162
163
164
165
166
167
168
169 public void setXpath(String xpath)
170 {
171 _xpath = xpath;
172 StringTokenizer tok = new StringTokenizer(xpath, "| ");
173 while (tok.hasMoreTokens())
174 _xpaths = LazyList.add(_xpaths, tok.nextToken());
175 }
176
177
178 public String getDTD()
179 {
180 return _dtd;
181 }
182
183
184
185
186
187
188
189
190
191
192 public synchronized void addContentHandler(String trigger, ContentHandler observer)
193 {
194 if (_observerMap == null)
195 _observerMap = new HashMap<>();
196 _observerMap.put(trigger, observer);
197 }
198
199
200 public synchronized Node parse(InputSource source) throws IOException, SAXException
201 {
202 _dtd=null;
203 Handler handler = new Handler();
204 XMLReader reader = _parser.getXMLReader();
205 reader.setContentHandler(handler);
206 reader.setErrorHandler(handler);
207 reader.setEntityResolver(handler);
208 if (LOG.isDebugEnabled())
209 LOG.debug("parsing: sid=" + source.getSystemId() + ",pid=" + source.getPublicId());
210 _parser.parse(source, handler);
211 if (handler._error != null)
212 throw handler._error;
213 Node doc = (Node) handler._top.get(0);
214 handler.clear();
215 return doc;
216 }
217
218
219
220
221
222 public synchronized Node parse(String url) throws IOException, SAXException
223 {
224 if (LOG.isDebugEnabled())
225 LOG.debug("parse: " + url);
226 return parse(new InputSource(url));
227 }
228
229
230
231
232
233 public synchronized Node parse(File file) throws IOException, SAXException
234 {
235 if (LOG.isDebugEnabled())
236 LOG.debug("parse: " + file);
237 return parse(new InputSource(Resource.toURL(file).toString()));
238 }
239
240
241
242
243
244 public synchronized Node parse(InputStream in) throws IOException, SAXException
245 {
246 _dtd=null;
247 Handler handler = new Handler();
248 XMLReader reader = _parser.getXMLReader();
249 reader.setContentHandler(handler);
250 reader.setErrorHandler(handler);
251 reader.setEntityResolver(handler);
252 _parser.parse(new InputSource(in), handler);
253 if (handler._error != null)
254 throw handler._error;
255 Node doc = (Node) handler._top.get(0);
256 handler.clear();
257 return doc;
258 }
259
260
261
262 protected InputSource resolveEntity(String pid, String sid)
263 {
264 if (LOG.isDebugEnabled())
265 LOG.debug("resolveEntity(" + pid + ", " + sid + ")");
266
267 if (sid!=null && sid.endsWith(".dtd"))
268 _dtd=sid;
269
270 URL entity = null;
271 if (pid != null)
272 entity = (URL) _redirectMap.get(pid);
273 if (entity == null)
274 entity = (URL) _redirectMap.get(sid);
275 if (entity == null)
276 {
277 String dtd = sid;
278 if (dtd.lastIndexOf('/') >= 0)
279 dtd = dtd.substring(dtd.lastIndexOf('/') + 1);
280
281 if (LOG.isDebugEnabled())
282 LOG.debug("Can't exact match entity in redirect map, trying " + dtd);
283 entity = (URL) _redirectMap.get(dtd);
284 }
285
286 if (entity != null)
287 {
288 try
289 {
290 InputStream in = entity.openStream();
291 if (LOG.isDebugEnabled())
292 LOG.debug("Redirected entity " + sid + " --> " + entity);
293 InputSource is = new InputSource(in);
294 is.setSystemId(sid);
295 return is;
296 }
297 catch (IOException e)
298 {
299 LOG.ignore(e);
300 }
301 }
302 return null;
303 }
304
305
306
307 private class NoopHandler extends DefaultHandler
308 {
309 Handler _next;
310 int _depth;
311
312 NoopHandler(Handler next)
313 {
314 this._next = next;
315 }
316
317
318 public void startElement(String uri, String localName, String qName, Attributes attrs) throws SAXException
319 {
320 _depth++;
321 }
322
323
324 public void endElement(String uri, String localName, String qName) throws SAXException
325 {
326 if (_depth == 0)
327 _parser.getXMLReader().setContentHandler(_next);
328 else
329 _depth--;
330 }
331 }
332
333
334
335 private class Handler extends DefaultHandler
336 {
337 Node _top = new Node(null, null, null);
338 SAXParseException _error;
339 private Node _context = _top;
340 private NoopHandler _noop;
341
342 Handler()
343 {
344 _noop = new NoopHandler(this);
345 }
346
347
348 void clear()
349 {
350 _top = null;
351 _error = null;
352 _context = null;
353 }
354
355
356 public void startElement(String uri, String localName, String qName, Attributes attrs) throws SAXException
357 {
358 String name = null;
359 if (_parser.isNamespaceAware())
360 name = localName;
361
362 if (name == null || "".equals(name))
363 name = qName;
364
365 Node node = new Node(_context, name, attrs);
366
367
368
369 if (_xpaths != null)
370 {
371 String path = node.getPath();
372 boolean match = false;
373 for (int i = LazyList.size(_xpaths); !match && i-- > 0;)
374 {
375 String xpath = (String) LazyList.get(_xpaths, i);
376
377 match = path.equals(xpath) || xpath.startsWith(path) && xpath.length() > path.length() && xpath.charAt(path.length()) == '/';
378 }
379
380 if (match)
381 {
382 _context.add(node);
383 _context = node;
384 }
385 else
386 {
387 _parser.getXMLReader().setContentHandler(_noop);
388 }
389 }
390 else
391 {
392 _context.add(node);
393 _context = node;
394 }
395
396 ContentHandler observer = null;
397 if (_observerMap != null)
398 observer = (ContentHandler) _observerMap.get(name);
399 _observers.push(observer);
400
401 for (int i = 0; i < _observers.size(); i++)
402 if (_observers.get(i) != null)
403 ((ContentHandler) _observers.get(i)).startElement(uri, localName, qName, attrs);
404 }
405
406
407 public void endElement(String uri, String localName, String qName) throws SAXException
408 {
409 _context = _context._parent;
410 for (int i = 0; i < _observers.size(); i++)
411 if (_observers.get(i) != null)
412 ((ContentHandler) _observers.get(i)).endElement(uri, localName, qName);
413 _observers.pop();
414 }
415
416
417 public void ignorableWhitespace(char buf[], int offset, int len) throws SAXException
418 {
419 for (int i = 0; i < _observers.size(); i++)
420 if (_observers.get(i) != null)
421 ((ContentHandler) _observers.get(i)).ignorableWhitespace(buf, offset, len);
422 }
423
424
425 public void characters(char buf[], int offset, int len) throws SAXException
426 {
427 _context.add(new String(buf, offset, len));
428 for (int i = 0; i < _observers.size(); i++)
429 if (_observers.get(i) != null)
430 ((ContentHandler) _observers.get(i)).characters(buf, offset, len);
431 }
432
433
434 public void warning(SAXParseException ex)
435 {
436 LOG.debug(Log.EXCEPTION, ex);
437 LOG.warn("WARNING@" + getLocationString(ex) + " : " + ex.toString());
438 }
439
440
441 public void error(SAXParseException ex) throws SAXException
442 {
443
444 if (_error == null)
445 _error = ex;
446 LOG.debug(Log.EXCEPTION, ex);
447 LOG.warn("ERROR@" + getLocationString(ex) + " : " + ex.toString());
448 }
449
450
451 public void fatalError(SAXParseException ex) throws SAXException
452 {
453 _error = ex;
454 LOG.debug(Log.EXCEPTION, ex);
455 LOG.warn("FATAL@" + getLocationString(ex) + " : " + ex.toString());
456 throw ex;
457 }
458
459
460 private String getLocationString(SAXParseException ex)
461 {
462 return ex.getSystemId() + " line:" + ex.getLineNumber() + " col:" + ex.getColumnNumber();
463 }
464
465
466 public InputSource resolveEntity(String pid, String sid)
467 {
468 return XmlParser.this.resolveEntity(pid,sid);
469 }
470 }
471
472
473
474
475
476
477 public static class Attribute
478 {
479 private String _name;
480 private String _value;
481
482 Attribute(String n, String v)
483 {
484 _name = n;
485 _value = v;
486 }
487
488 public String getName()
489 {
490 return _name;
491 }
492
493 public String getValue()
494 {
495 return _value;
496 }
497 }
498
499
500
501
502
503
504 public static class Node extends AbstractList<Object>
505 {
506 Node _parent;
507 private ArrayList<Object> _list;
508 private String _tag;
509 private Attribute[] _attrs;
510 private boolean _lastString = false;
511 private String _path;
512
513
514 Node(Node parent, String tag, Attributes attrs)
515 {
516 _parent = parent;
517 _tag = tag;
518
519 if (attrs != null)
520 {
521 _attrs = new Attribute[attrs.getLength()];
522 for (int i = 0; i < attrs.getLength(); i++)
523 {
524 String name = attrs.getLocalName(i);
525 if (name == null || name.equals(""))
526 name = attrs.getQName(i);
527 _attrs[i] = new Attribute(name, attrs.getValue(i));
528 }
529 }
530 }
531
532
533 public Node getParent()
534 {
535 return _parent;
536 }
537
538
539 public String getTag()
540 {
541 return _tag;
542 }
543
544
545 public String getPath()
546 {
547 if (_path == null)
548 {
549 if (getParent() != null && getParent().getTag() != null)
550 _path = getParent().getPath() + "/" + _tag;
551 else
552 _path = "/" + _tag;
553 }
554 return _path;
555 }
556
557
558
559
560
561 public Attribute[] getAttributes()
562 {
563 return _attrs;
564 }
565
566
567
568
569
570
571
572 public String getAttribute(String name)
573 {
574 return getAttribute(name, null);
575 }
576
577
578
579
580
581
582
583 public String getAttribute(String name, String dft)
584 {
585 if (_attrs == null || name == null)
586 return dft;
587 for (int i = 0; i < _attrs.length; i++)
588 if (name.equals(_attrs[i].getName()))
589 return _attrs[i].getValue();
590 return dft;
591 }
592
593
594
595
596
597 public int size()
598 {
599 if (_list != null)
600 return _list.size();
601 return 0;
602 }
603
604
605
606
607
608
609
610 public Object get(int i)
611 {
612 if (_list != null)
613 return _list.get(i);
614 return null;
615 }
616
617
618
619
620
621
622
623
624 public Node get(String tag)
625 {
626 if (_list != null)
627 {
628 for (int i = 0; i < _list.size(); i++)
629 {
630 Object o = _list.get(i);
631 if (o instanceof Node)
632 {
633 Node n = (Node) o;
634 if (tag.equals(n._tag))
635 return n;
636 }
637 }
638 }
639 return null;
640 }
641
642
643 @Override
644 public void add(int i, Object o)
645 {
646 if (_list == null)
647 _list = new ArrayList<Object>();
648 if (o instanceof String)
649 {
650 if (_lastString)
651 {
652 int last = _list.size() - 1;
653 _list.set(last, (String) _list.get(last) + o);
654 }
655 else
656 _list.add(i, o);
657 _lastString = true;
658 }
659 else
660 {
661 _lastString = false;
662 _list.add(i, o);
663 }
664 }
665
666
667 public void clear()
668 {
669 if (_list != null)
670 _list.clear();
671 _list = null;
672 }
673
674
675
676
677
678
679
680
681
682
683 public String getString(String tag, boolean tags, boolean trim)
684 {
685 Node node = get(tag);
686 if (node == null)
687 return null;
688 String s = node.toString(tags);
689 if (s != null && trim)
690 s = s.trim();
691 return s;
692 }
693
694
695 public synchronized String toString()
696 {
697 return toString(true);
698 }
699
700
701
702
703
704
705
706 public synchronized String toString(boolean tag)
707 {
708 StringBuilder buf = new StringBuilder();
709 toString(buf, tag);
710 return buf.toString();
711 }
712
713
714
715
716
717
718
719 public synchronized String toString(boolean tag, boolean trim)
720 {
721 String s = toString(tag);
722 if (s != null && trim)
723 s = s.trim();
724 return s;
725 }
726
727
728 private synchronized void toString(StringBuilder buf, boolean tag)
729 {
730 if (tag)
731 {
732 buf.append("<");
733 buf.append(_tag);
734
735 if (_attrs != null)
736 {
737 for (int i = 0; i < _attrs.length; i++)
738 {
739 buf.append(' ');
740 buf.append(_attrs[i].getName());
741 buf.append("=\"");
742 buf.append(_attrs[i].getValue());
743 buf.append("\"");
744 }
745 }
746 }
747
748 if (_list != null)
749 {
750 if (tag)
751 buf.append(">");
752 for (int i = 0; i < _list.size(); i++)
753 {
754 Object o = _list.get(i);
755 if (o == null)
756 continue;
757 if (o instanceof Node)
758 ((Node) o).toString(buf, tag);
759 else
760 buf.append(o.toString());
761 }
762 if (tag)
763 {
764 buf.append("</");
765 buf.append(_tag);
766 buf.append(">");
767 }
768 }
769 else if (tag)
770 buf.append("/>");
771 }
772
773
774
775
776
777
778
779
780 public Iterator<Node> iterator(final String tag)
781 {
782 return new Iterator<Node>()
783 {
784 int c = 0;
785 Node _node;
786
787
788 public boolean hasNext()
789 {
790 if (_node != null)
791 return true;
792 while (_list != null && c < _list.size())
793 {
794 Object o = _list.get(c);
795 if (o instanceof Node)
796 {
797 Node n = (Node) o;
798 if (tag.equals(n._tag))
799 {
800 _node = n;
801 return true;
802 }
803 }
804 c++;
805 }
806 return false;
807 }
808
809
810 public Node next()
811 {
812 try
813 {
814 if (hasNext())
815 return _node;
816 throw new NoSuchElementException();
817 }
818 finally
819 {
820 _node = null;
821 c++;
822 }
823 }
824
825
826 public void remove()
827 {
828 throw new UnsupportedOperationException("Not supported");
829 }
830 };
831 }
832 }
833 }