1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.eclipse.jetty.xml;
20
21 import java.io.File;
22 import java.io.IOException;
23 import java.io.InputStream;
24 import java.net.URL;
25 import java.util.AbstractList;
26 import java.util.ArrayList;
27 import java.util.HashMap;
28 import java.util.Iterator;
29 import java.util.Map;
30 import java.util.NoSuchElementException;
31 import java.util.Stack;
32 import java.util.StringTokenizer;
33
34 import javax.xml.parsers.SAXParser;
35 import javax.xml.parsers.SAXParserFactory;
36
37 import org.eclipse.jetty.util.LazyList;
38 import org.eclipse.jetty.util.log.Log;
39 import org.eclipse.jetty.util.log.Logger;
40 import org.eclipse.jetty.util.resource.Resource;
41 import org.xml.sax.Attributes;
42 import org.xml.sax.ContentHandler;
43 import org.xml.sax.InputSource;
44 import org.xml.sax.SAXException;
45 import org.xml.sax.SAXParseException;
46 import org.xml.sax.XMLReader;
47 import org.xml.sax.helpers.DefaultHandler;
48
49
50
51
52
53
54
55
56
57 public class XmlParser
58 {
59 private static final Logger LOG = Log.getLogger(XmlParser.class);
60
61 private Map<String,URL> _redirectMap = new HashMap<String,URL>();
62 private SAXParser _parser;
63 private Map<String,ContentHandler> _observerMap;
64 private Stack<ContentHandler> _observers = new Stack<ContentHandler>();
65 private String _xpath;
66 private Object _xpaths;
67 private String _dtd;
68
69
70
71
72
73 public XmlParser()
74 {
75 SAXParserFactory factory = SAXParserFactory.newInstance();
76 boolean validating_dft = factory.getClass().toString().startsWith("org.apache.xerces.");
77 String validating_prop = System.getProperty("org.eclipse.jetty.xml.XmlParser.Validating", validating_dft ? "true" : "false");
78 boolean validating = Boolean.valueOf(validating_prop).booleanValue();
79 setValidating(validating);
80 }
81
82
83 public XmlParser(boolean validating)
84 {
85 setValidating(validating);
86 }
87
88
89 public void setValidating(boolean validating)
90 {
91 try
92 {
93 SAXParserFactory factory = SAXParserFactory.newInstance();
94 factory.setValidating(validating);
95 _parser = factory.newSAXParser();
96
97 try
98 {
99 if (validating)
100 _parser.getXMLReader().setFeature("http://apache.org/xml/features/validation/schema", validating);
101 }
102 catch (Exception e)
103 {
104 if (validating)
105 LOG.warn("Schema validation may not be supported: ", e);
106 else
107 LOG.ignore(e);
108 }
109
110 _parser.getXMLReader().setFeature("http://xml.org/sax/features/validation", validating);
111 _parser.getXMLReader().setFeature("http://xml.org/sax/features/namespaces", true);
112 _parser.getXMLReader().setFeature("http://xml.org/sax/features/namespace-prefixes", false);
113 try
114 {
115 if (validating)
116 _parser.getXMLReader().setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", validating);
117 }
118 catch (Exception e)
119 {
120 LOG.warn(e.getMessage());
121 }
122 }
123 catch (Exception e)
124 {
125 LOG.warn(Log.EXCEPTION, e);
126 throw new Error(e.toString());
127 }
128 }
129
130
131 public boolean isValidating()
132 {
133 return _parser.isValidating();
134 }
135
136
137 public synchronized void redirectEntity(String name, URL entity)
138 {
139 if (entity != null)
140 _redirectMap.put(name, entity);
141 }
142
143
144
145
146
147
148 public String getXpath()
149 {
150 return _xpath;
151 }
152
153
154
155
156
157
158
159
160 public void setXpath(String xpath)
161 {
162 _xpath = xpath;
163 StringTokenizer tok = new StringTokenizer(xpath, "| ");
164 while (tok.hasMoreTokens())
165 _xpaths = LazyList.add(_xpaths, tok.nextToken());
166 }
167
168
169 public String getDTD()
170 {
171 return _dtd;
172 }
173
174
175
176
177
178
179
180
181
182
183 public synchronized void addContentHandler(String trigger, ContentHandler observer)
184 {
185 if (_observerMap == null)
186 _observerMap = new HashMap<>();
187 _observerMap.put(trigger, observer);
188 }
189
190
191 public synchronized Node parse(InputSource source) throws IOException, SAXException
192 {
193 _dtd=null;
194 Handler handler = new Handler();
195 XMLReader reader = _parser.getXMLReader();
196 reader.setContentHandler(handler);
197 reader.setErrorHandler(handler);
198 reader.setEntityResolver(handler);
199 if (LOG.isDebugEnabled())
200 LOG.debug("parsing: sid=" + source.getSystemId() + ",pid=" + source.getPublicId());
201 _parser.parse(source, handler);
202 if (handler._error != null)
203 throw handler._error;
204 Node doc = (Node) handler._top.get(0);
205 handler.clear();
206 return doc;
207 }
208
209
210
211
212
213
214
215
216
217 public synchronized Node parse(String url) throws IOException, SAXException
218 {
219 if (LOG.isDebugEnabled())
220 LOG.debug("parse: " + url);
221 return parse(new InputSource(url));
222 }
223
224
225
226
227
228
229
230
231
232 public synchronized Node parse(File file) throws IOException, SAXException
233 {
234 if (LOG.isDebugEnabled())
235 LOG.debug("parse: " + file);
236 return parse(new InputSource(Resource.toURL(file).toString()));
237 }
238
239
240
241
242
243
244
245
246
247 public synchronized Node parse(InputStream in) throws IOException, SAXException
248 {
249 _dtd=null;
250 Handler handler = new Handler();
251 XMLReader reader = _parser.getXMLReader();
252 reader.setContentHandler(handler);
253 reader.setErrorHandler(handler);
254 reader.setEntityResolver(handler);
255 _parser.parse(new InputSource(in), handler);
256 if (handler._error != null)
257 throw handler._error;
258 Node doc = (Node) handler._top.get(0);
259 handler.clear();
260 return doc;
261 }
262
263
264
265 protected InputSource resolveEntity(String pid, String sid)
266 {
267 if (LOG.isDebugEnabled())
268 LOG.debug("resolveEntity(" + pid + ", " + sid + ")");
269
270 if (sid!=null && sid.endsWith(".dtd"))
271 _dtd=sid;
272
273 URL entity = null;
274 if (pid != null)
275 entity = (URL) _redirectMap.get(pid);
276 if (entity == null)
277 entity = (URL) _redirectMap.get(sid);
278 if (entity == null)
279 {
280 String dtd = sid;
281 if (dtd.lastIndexOf('/') >= 0)
282 dtd = dtd.substring(dtd.lastIndexOf('/') + 1);
283
284 if (LOG.isDebugEnabled())
285 LOG.debug("Can't exact match entity in redirect map, trying " + dtd);
286 entity = (URL) _redirectMap.get(dtd);
287 }
288
289 if (entity != null)
290 {
291 try
292 {
293 InputStream in = entity.openStream();
294 if (LOG.isDebugEnabled())
295 LOG.debug("Redirected entity " + sid + " --> " + entity);
296 InputSource is = new InputSource(in);
297 is.setSystemId(sid);
298 return is;
299 }
300 catch (IOException e)
301 {
302 LOG.ignore(e);
303 }
304 }
305 return null;
306 }
307
308
309
310 private class NoopHandler extends DefaultHandler
311 {
312 Handler _next;
313 int _depth;
314
315 NoopHandler(Handler next)
316 {
317 this._next = next;
318 }
319
320
321 public void startElement(String uri, String localName, String qName, Attributes attrs) throws SAXException
322 {
323 _depth++;
324 }
325
326
327 public void endElement(String uri, String localName, String qName) throws SAXException
328 {
329 if (_depth == 0)
330 _parser.getXMLReader().setContentHandler(_next);
331 else
332 _depth--;
333 }
334 }
335
336
337
338 private class Handler extends DefaultHandler
339 {
340 Node _top = new Node(null, null, null);
341 SAXParseException _error;
342 private Node _context = _top;
343 private NoopHandler _noop;
344
345 Handler()
346 {
347 _noop = new NoopHandler(this);
348 }
349
350
351 void clear()
352 {
353 _top = null;
354 _error = null;
355 _context = null;
356 }
357
358
359 public void startElement(String uri, String localName, String qName, Attributes attrs) throws SAXException
360 {
361 String name = null;
362 if (_parser.isNamespaceAware())
363 name = localName;
364
365 if (name == null || "".equals(name))
366 name = qName;
367
368 Node node = new Node(_context, name, attrs);
369
370
371
372 if (_xpaths != null)
373 {
374 String path = node.getPath();
375 boolean match = false;
376 for (int i = LazyList.size(_xpaths); !match && i-- > 0;)
377 {
378 String xpath = (String) LazyList.get(_xpaths, i);
379
380 match = path.equals(xpath) || xpath.startsWith(path) && xpath.length() > path.length() && xpath.charAt(path.length()) == '/';
381 }
382
383 if (match)
384 {
385 _context.add(node);
386 _context = node;
387 }
388 else
389 {
390 _parser.getXMLReader().setContentHandler(_noop);
391 }
392 }
393 else
394 {
395 _context.add(node);
396 _context = node;
397 }
398
399 ContentHandler observer = null;
400 if (_observerMap != null)
401 observer = (ContentHandler) _observerMap.get(name);
402 _observers.push(observer);
403
404 for (int i = 0; i < _observers.size(); i++)
405 if (_observers.get(i) != null)
406 ((ContentHandler) _observers.get(i)).startElement(uri, localName, qName, attrs);
407 }
408
409
410 public void endElement(String uri, String localName, String qName) throws SAXException
411 {
412 _context = _context._parent;
413 for (int i = 0; i < _observers.size(); i++)
414 if (_observers.get(i) != null)
415 ((ContentHandler) _observers.get(i)).endElement(uri, localName, qName);
416 _observers.pop();
417 }
418
419
420 public void ignorableWhitespace(char buf[], int offset, int len) throws SAXException
421 {
422 for (int i = 0; i < _observers.size(); i++)
423 if (_observers.get(i) != null)
424 ((ContentHandler) _observers.get(i)).ignorableWhitespace(buf, offset, len);
425 }
426
427
428 public void characters(char buf[], int offset, int len) throws SAXException
429 {
430 _context.add(new String(buf, offset, len));
431 for (int i = 0; i < _observers.size(); i++)
432 if (_observers.get(i) != null)
433 ((ContentHandler) _observers.get(i)).characters(buf, offset, len);
434 }
435
436
437 public void warning(SAXParseException ex)
438 {
439 LOG.debug(Log.EXCEPTION, ex);
440 LOG.warn("WARNING@" + getLocationString(ex) + " : " + ex.toString());
441 }
442
443
444 public void error(SAXParseException ex) throws SAXException
445 {
446
447 if (_error == null)
448 _error = ex;
449 LOG.debug(Log.EXCEPTION, ex);
450 LOG.warn("ERROR@" + getLocationString(ex) + " : " + ex.toString());
451 }
452
453
454 public void fatalError(SAXParseException ex) throws SAXException
455 {
456 _error = ex;
457 LOG.debug(Log.EXCEPTION, ex);
458 LOG.warn("FATAL@" + getLocationString(ex) + " : " + ex.toString());
459 throw ex;
460 }
461
462
463 private String getLocationString(SAXParseException ex)
464 {
465 return ex.getSystemId() + " line:" + ex.getLineNumber() + " col:" + ex.getColumnNumber();
466 }
467
468
469 public InputSource resolveEntity(String pid, String sid)
470 {
471 return XmlParser.this.resolveEntity(pid,sid);
472 }
473 }
474
475
476
477
478
479
480 public static class Attribute
481 {
482 private String _name;
483 private String _value;
484
485 Attribute(String n, String v)
486 {
487 _name = n;
488 _value = v;
489 }
490
491 public String getName()
492 {
493 return _name;
494 }
495
496 public String getValue()
497 {
498 return _value;
499 }
500 }
501
502
503
504
505
506
507 public static class Node extends AbstractList<Object>
508 {
509 Node _parent;
510 private ArrayList<Object> _list;
511 private String _tag;
512 private Attribute[] _attrs;
513 private boolean _lastString = false;
514 private String _path;
515
516
517 Node(Node parent, String tag, Attributes attrs)
518 {
519 _parent = parent;
520 _tag = tag;
521
522 if (attrs != null)
523 {
524 _attrs = new Attribute[attrs.getLength()];
525 for (int i = 0; i < attrs.getLength(); i++)
526 {
527 String name = attrs.getLocalName(i);
528 if (name == null || name.equals(""))
529 name = attrs.getQName(i);
530 _attrs[i] = new Attribute(name, attrs.getValue(i));
531 }
532 }
533 }
534
535
536 public Node getParent()
537 {
538 return _parent;
539 }
540
541
542 public String getTag()
543 {
544 return _tag;
545 }
546
547
548 public String getPath()
549 {
550 if (_path == null)
551 {
552 if (getParent() != null && getParent().getTag() != null)
553 _path = getParent().getPath() + "/" + _tag;
554 else
555 _path = "/" + _tag;
556 }
557 return _path;
558 }
559
560
561
562
563
564
565 public Attribute[] getAttributes()
566 {
567 return _attrs;
568 }
569
570
571
572
573
574
575
576
577 public String getAttribute(String name)
578 {
579 return getAttribute(name, null);
580 }
581
582
583
584
585
586
587
588
589
590 public String getAttribute(String name, String dft)
591 {
592 if (_attrs == null || name == null)
593 return dft;
594 for (int i = 0; i < _attrs.length; i++)
595 if (name.equals(_attrs[i].getName()))
596 return _attrs[i].getValue();
597 return dft;
598 }
599
600
601
602
603
604 public int size()
605 {
606 if (_list != null)
607 return _list.size();
608 return 0;
609 }
610
611
612
613
614
615
616
617 public Object get(int i)
618 {
619 if (_list != null)
620 return _list.get(i);
621 return null;
622 }
623
624
625
626
627
628
629
630
631 public Node get(String tag)
632 {
633 if (_list != null)
634 {
635 for (int i = 0; i < _list.size(); i++)
636 {
637 Object o = _list.get(i);
638 if (o instanceof Node)
639 {
640 Node n = (Node) o;
641 if (tag.equals(n._tag))
642 return n;
643 }
644 }
645 }
646 return null;
647 }
648
649
650 @Override
651 public void add(int i, Object o)
652 {
653 if (_list == null)
654 _list = new ArrayList<Object>();
655 if (o instanceof String)
656 {
657 if (_lastString)
658 {
659 int last = _list.size() - 1;
660 _list.set(last, (String) _list.get(last) + o);
661 }
662 else
663 _list.add(i, o);
664 _lastString = true;
665 }
666 else
667 {
668 _lastString = false;
669 _list.add(i, o);
670 }
671 }
672
673
674 public void clear()
675 {
676 if (_list != null)
677 _list.clear();
678 _list = null;
679 }
680
681
682
683
684
685
686
687
688
689
690 public String getString(String tag, boolean tags, boolean trim)
691 {
692 Node node = get(tag);
693 if (node == null)
694 return null;
695 String s = node.toString(tags);
696 if (s != null && trim)
697 s = s.trim();
698 return s;
699 }
700
701
702 public synchronized String toString()
703 {
704 return toString(true);
705 }
706
707
708
709
710
711
712
713
714 public synchronized String toString(boolean tag)
715 {
716 StringBuilder buf = new StringBuilder();
717 toString(buf, tag);
718 return buf.toString();
719 }
720
721
722
723
724
725
726
727
728
729 public synchronized String toString(boolean tag, boolean trim)
730 {
731 String s = toString(tag);
732 if (s != null && trim)
733 s = s.trim();
734 return s;
735 }
736
737
738 private synchronized void toString(StringBuilder buf, boolean tag)
739 {
740 if (tag)
741 {
742 buf.append("<");
743 buf.append(_tag);
744
745 if (_attrs != null)
746 {
747 for (int i = 0; i < _attrs.length; i++)
748 {
749 buf.append(' ');
750 buf.append(_attrs[i].getName());
751 buf.append("=\"");
752 buf.append(_attrs[i].getValue());
753 buf.append("\"");
754 }
755 }
756 }
757
758 if (_list != null)
759 {
760 if (tag)
761 buf.append(">");
762 for (int i = 0; i < _list.size(); i++)
763 {
764 Object o = _list.get(i);
765 if (o == null)
766 continue;
767 if (o instanceof Node)
768 ((Node) o).toString(buf, tag);
769 else
770 buf.append(o.toString());
771 }
772 if (tag)
773 {
774 buf.append("</");
775 buf.append(_tag);
776 buf.append(">");
777 }
778 }
779 else if (tag)
780 buf.append("/>");
781 }
782
783
784
785
786
787
788
789
790 public Iterator<Node> iterator(final String tag)
791 {
792 return new Iterator<Node>()
793 {
794 int c = 0;
795 Node _node;
796
797
798 public boolean hasNext()
799 {
800 if (_node != null)
801 return true;
802 while (_list != null && c < _list.size())
803 {
804 Object o = _list.get(c);
805 if (o instanceof Node)
806 {
807 Node n = (Node) o;
808 if (tag.equals(n._tag))
809 {
810 _node = n;
811 return true;
812 }
813 }
814 c++;
815 }
816 return false;
817 }
818
819
820 public Node next()
821 {
822 try
823 {
824 if (hasNext())
825 return _node;
826 throw new NoSuchElementException();
827 }
828 finally
829 {
830 _node = null;
831 c++;
832 }
833 }
834
835
836 public void remove()
837 {
838 throw new UnsupportedOperationException("Not supported");
839 }
840 };
841 }
842 }
843 }