View Javadoc

1   // ========================================================================
2   // Copyright (c) 2004-2009 Mort Bay Consulting Pty. Ltd.
3   // ------------------------------------------------------------------------
4   // All rights reserved. This program and the accompanying materials
5   // are made available under the terms of the Eclipse Public License v1.0
6   // and Apache License v2.0 which accompanies this distribution.
7   // The Eclipse Public License is available at 
8   // http://www.eclipse.org/legal/epl-v10.html
9   // The Apache License v2.0 is available at
10  // http://www.opensource.org/licenses/apache2.0.php
11  // You may elect to redistribute this code under either of these licenses. 
12  // ========================================================================
13  
14  package org.eclipse.jetty.util;
15  
16  import java.io.UnsupportedEncodingException;
17  
18  
19  
20  /* ------------------------------------------------------------ */
21  /** URI Holder.
22   * This class assists with the decoding and encoding or HTTP URI's.
23   * It differs from the java.net.URL class as it does not provide
24   * communications ability, but it does assist with query string
25   * formatting.
26   * <P>UTF-8 encoding is used by default for % encoded characters. This
27   * may be overridden with the org.eclipse.jetty.util.URI.charset system property.
28   * @see UrlEncoded
29   * 
30   */
31  public class URIUtil
32      implements Cloneable
33  {
34      public static final String SLASH="/";
35      public static final String HTTP="http";
36      public static final String HTTP_COLON="http:";
37      public static final String HTTPS="https";
38      public static final String HTTPS_COLON="https:";
39  
40      // Use UTF-8 as per http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars
41      public static final String __CHARSET=System.getProperty("org.eclipse.jetty.util.URI.charset",StringUtil.__UTF8);
42      
43      private URIUtil()
44      {}
45      
46      /* ------------------------------------------------------------ */
47      /** Encode a URI path.
48       * This is the same encoding offered by URLEncoder, except that
49       * the '/' character is not encoded.
50       * @param path The path the encode
51       * @return The encoded path
52       */
53      public static String encodePath(String path)
54      {
55          if (path==null || path.length()==0)
56              return path;
57          
58          StringBuilder buf = encodePath(null,path);
59          return buf==null?path:buf.toString();
60      }
61          
62      /* ------------------------------------------------------------ */
63      /** Encode a URI path.
64       * @param path The path the encode
65       * @param buf StringBuilder to encode path into (or null)
66       * @return The StringBuilder or null if no substitutions required.
67       */
68      public static StringBuilder encodePath(StringBuilder buf, String path)
69      {
70          if (buf==null)
71          {
72          loop:
73              for (int i=0;i<path.length();i++)
74              {
75                  char c=path.charAt(i);
76                  switch(c)
77                  {
78                      case '%':
79                      case '?':
80                      case ';':
81                      case '#':
82                      case '\'':
83                      case '"':
84                      case '<':
85                      case '>':
86                      case ' ':
87                          buf=new StringBuilder(path.length()<<1);
88                          break loop;
89                  }
90              }
91              if (buf==null)
92                  return null;
93          }
94          
95          synchronized(buf)
96          {
97              for (int i=0;i<path.length();i++)
98              {
99                  char c=path.charAt(i);       
100                 switch(c)
101                 {
102                   case '%':
103                       buf.append("%25");
104                       continue;
105                   case '?':
106                       buf.append("%3F");
107                       continue;
108                   case ';':
109                       buf.append("%3B");
110                       continue;
111                   case '#':
112                       buf.append("%23");
113                       continue;
114                   case '"':
115                       buf.append("%22");
116                       continue;
117                   case '\'':
118                       buf.append("%27");
119                       continue;
120                   case '<':
121                       buf.append("%3C");
122                       continue;
123                   case '>':
124                       buf.append("%3E");
125                       continue;
126                   case ' ':
127                       buf.append("%20");
128                       continue;
129                   default:
130                       buf.append(c);
131                       continue;
132                 }
133             }
134         }
135 
136         return buf;
137     }
138     
139     /* ------------------------------------------------------------ */
140     /** Encode a URI path.
141      * @param path The path the encode
142      * @param buf StringBuilder to encode path into (or null)
143      * @param encode String of characters to encode. % is always encoded.
144      * @return The StringBuilder or null if no substitutions required.
145      */
146     public static StringBuilder encodeString(StringBuilder buf,
147                                              String path,
148                                              String encode)
149     {
150         if (buf==null)
151         {
152         loop:
153             for (int i=0;i<path.length();i++)
154             {
155                 char c=path.charAt(i);
156                 if (c=='%' || encode.indexOf(c)>=0)
157                 {    
158                     buf=new StringBuilder(path.length()<<1);
159                     break loop;
160                 }
161             }
162             if (buf==null)
163                 return null;
164         }
165         
166         synchronized(buf)
167         {
168             for (int i=0;i<path.length();i++)
169             {
170                 char c=path.charAt(i);
171                 if (c=='%' || encode.indexOf(c)>=0)
172                 {
173                     buf.append('%');
174                     StringUtil.append(buf,(byte)(0xff&c),16);
175                 }
176                 else
177                     buf.append(c);
178             }
179         }
180 
181         return buf;
182     }
183     
184     /* ------------------------------------------------------------ */
185     /* Decode a URI path.
186      * @param path The path the encode
187      * @param buf StringBuilder to encode path into
188      */
189     public static String decodePath(String path)
190     {
191         if (path==null)
192             return null;
193         char[] chars=null;
194         int n=0;
195         byte[] bytes=null;
196         int b=0;
197         
198         int len=path.length();
199         
200         for (int i=0;i<len;i++)
201         {
202             char c = path.charAt(i);
203 
204             if (c=='%' && (i+2)<len)
205             {
206                 if (chars==null)
207                 {
208                     chars=new char[len];
209                     bytes=new byte[len];
210                     path.getChars(0,i,chars,0);
211                 }
212                 bytes[b++]=(byte)(0xff&TypeUtil.parseInt(path,i+1,2,16));
213                 i+=2;
214                 continue;
215             }
216             else if (bytes==null)
217             {
218                 n++;
219                 continue;
220             }
221             
222             if (b>0)
223             {
224                 String s;
225                 try
226                 {
227                     s=new String(bytes,0,b,__CHARSET);
228                 }
229                 catch (UnsupportedEncodingException e)
230                 {       
231                     s=new String(bytes,0,b);
232                 }
233                 s.getChars(0,s.length(),chars,n);
234                 n+=s.length();
235                 b=0;
236             }
237             
238             chars[n++]=c;
239         }
240 
241         if (chars==null)
242             return path;
243 
244         if (b>0)
245         {
246             String s;
247             try
248             {
249                 s=new String(bytes,0,b,__CHARSET);
250             }
251             catch (UnsupportedEncodingException e)
252             {       
253                 s=new String(bytes,0,b);
254             }
255             s.getChars(0,s.length(),chars,n);
256             n+=s.length();
257         }
258         
259         return new String(chars,0,n);
260     }
261     
262     /* ------------------------------------------------------------ */
263     /* Decode a URI path.
264      * @param path The path the encode
265      * @param buf StringBuilder to encode path into
266      */
267     public static String decodePath(byte[] buf, int offset, int length)
268     {
269         byte[] bytes=null;
270         int n=0;
271         
272         for (int i=0;i<length;i++)
273         {
274             byte b = buf[i + offset];
275             
276             if (b=='%' && (i+2)<length)
277             {
278                 b=(byte)(0xff&TypeUtil.parseInt(buf,i+offset+1,2,16));
279                 i+=2;
280             }
281             else if (bytes==null)
282             {
283                 n++;
284                 continue;
285             }
286             
287             if (bytes==null)
288             {
289                 bytes=new byte[length];
290                 for (int j=0;j<n;j++)
291                     bytes[j]=buf[j + offset];
292             }
293             
294             bytes[n++]=b;
295         }
296 
297         if (bytes==null)
298             return StringUtil.toString(buf,offset,length,__CHARSET);
299         return StringUtil.toString(bytes,0,n,__CHARSET);
300     }
301 
302     
303     /* ------------------------------------------------------------ */
304     /** Add two URI path segments.
305      * Handles null and empty paths, path and query params (eg ?a=b or
306      * ;JSESSIONID=xxx) and avoids duplicate '/'
307      * @param p1 URI path segment (should be encoded)
308      * @param p2 URI path segment (should be encoded)
309      * @return Legally combined path segments.
310      */
311     public static String addPaths(String p1, String p2)
312     {
313         if (p1==null || p1.length()==0)
314         {
315             if (p1!=null && p2==null)
316                 return p1;
317             return p2;
318         }
319         if (p2==null || p2.length()==0)
320             return p1;
321         
322         int split=p1.indexOf(';');
323         if (split<0)
324             split=p1.indexOf('?');
325         if (split==0)
326             return p2+p1;
327         if (split<0)
328             split=p1.length();
329 
330         StringBuilder buf = new StringBuilder(p1.length()+p2.length()+2);
331         buf.append(p1);
332         
333         if (buf.charAt(split-1)=='/')
334         {
335             if (p2.startsWith(URIUtil.SLASH))
336             {
337                 buf.deleteCharAt(split-1);
338                 buf.insert(split-1,p2);
339             }
340             else
341                 buf.insert(split,p2);
342         }
343         else
344         {
345             if (p2.startsWith(URIUtil.SLASH))
346                 buf.insert(split,p2);
347             else
348             {
349                 buf.insert(split,'/');
350                 buf.insert(split+1,p2);
351             }
352         }
353 
354         return buf.toString();
355     }
356     
357     /* ------------------------------------------------------------ */
358     /** Return the parent Path.
359      * Treat a URI like a directory path and return the parent directory.
360      */
361     public static String parentPath(String p)
362     {
363         if (p==null || URIUtil.SLASH.equals(p))
364             return null;
365         int slash=p.lastIndexOf('/',p.length()-2);
366         if (slash>=0)
367             return p.substring(0,slash+1);
368         return null;
369     }
370     
371     /* ------------------------------------------------------------ */
372     /** Strip parameters from a path.
373      * Return path upto any semicolon parameters.
374      */
375     public static String stripPath(String path)
376     {
377         if (path==null)
378             return null;
379         int semi=path.indexOf(';');
380         if (semi<0)
381             return path;
382         return path.substring(0,semi);
383     }
384     
385     /* ------------------------------------------------------------ */
386     /** Convert a path to a cananonical form.
387      * All instances of "." and ".." are factored out.  Null is returned
388      * if the path tries to .. above its root.
389      * @param path 
390      * @return path or null.
391      */
392     public static String canonicalPath(String path)
393     {
394         if (path==null || path.length()==0)
395             return path;
396 
397         int end=path.length();
398         int start = path.lastIndexOf('/', end);
399 
400     search:
401         while (end>0)
402         {
403             switch(end-start)
404             {
405               case 2: // possible single dot
406                   if (path.charAt(start+1)!='.')
407                       break;
408                   break search;
409               case 3: // possible double dot
410                   if (path.charAt(start+1)!='.' || path.charAt(start+2)!='.')
411                       break;
412                   break search;
413             }
414             
415             end=start;
416             start=path.lastIndexOf('/',end-1);
417         }
418 
419         // If we have checked the entire string
420         if (start>=end)
421             return path;
422         
423         StringBuilder buf = new StringBuilder(path);
424         int delStart=-1;
425         int delEnd=-1;
426         int skip=0;
427         
428         while (end>0)
429         {
430             switch(end-start)
431             {       
432               case 2: // possible single dot
433                   if (buf.charAt(start+1)!='.')
434                   {
435                       if (skip>0 && --skip==0)
436                       {   
437                           delStart=start>=0?start:0;
438                           if(delStart>0 && delEnd==buf.length() && buf.charAt(delEnd-1)=='.')
439                               delStart++;
440                       }
441                       break;
442                   }
443                   
444                   if(start<0 && buf.length()>2 && buf.charAt(1)=='/' && buf.charAt(2)=='/')
445                       break;
446                   
447                   if(delEnd<0)
448                       delEnd=end;
449                   delStart=start;
450                   if (delStart<0 || delStart==0&&buf.charAt(delStart)=='/')
451                   {
452                       delStart++;
453                       if (delEnd<buf.length() && buf.charAt(delEnd)=='/')
454                           delEnd++;
455                       break;
456                   }
457                   if (end==buf.length())
458                       delStart++;
459                   
460                   end=start--;
461                   while (start>=0 && buf.charAt(start)!='/')
462                       start--;
463                   continue;
464                   
465               case 3: // possible double dot
466                   if (buf.charAt(start+1)!='.' || buf.charAt(start+2)!='.')
467                   {
468                       if (skip>0 && --skip==0)
469                       {   delStart=start>=0?start:0;
470                           if(delStart>0 && delEnd==buf.length() && buf.charAt(delEnd-1)=='.')
471                               delStart++;
472                       }
473                       break;
474                   }
475                   
476                   delStart=start;
477                   if (delEnd<0)
478                       delEnd=end;
479 
480                   skip++;
481                   end=start--;
482                   while (start>=0 && buf.charAt(start)!='/')
483                       start--;
484                   continue;
485 
486               default:
487                   if (skip>0 && --skip==0)
488                   {
489                       delStart=start>=0?start:0;
490                       if(delEnd==buf.length() && buf.charAt(delEnd-1)=='.')
491                           delStart++;
492                   }
493             }     
494             
495             // Do the delete
496             if (skip<=0 && delStart>=0 && delEnd>=delStart)
497             {  
498                 buf.delete(delStart,delEnd);
499                 delStart=delEnd=-1;
500                 if (skip>0)
501                     delEnd=end;
502             }
503             
504             end=start--;
505             while (start>=0 && buf.charAt(start)!='/')
506                 start--;
507         }      
508 
509         // Too many ..
510         if (skip>0)
511             return null;
512         
513         // Do the delete
514         if (delEnd>=0)
515             buf.delete(delStart,delEnd);
516 
517         return buf.toString();
518     }
519 
520     /* ------------------------------------------------------------ */
521     /** Convert a path to a compact form.
522      * All instances of "//" and "///" etc. are factored out to single "/" 
523      * @param path 
524      * @return path
525      */
526     public static String compactPath(String path)
527     {
528         if (path==null || path.length()==0)
529             return path;
530 
531         int state=0;
532         int end=path.length();
533         int i=0;
534         
535         loop:
536         while (i<end)
537         {
538             char c=path.charAt(i);
539             switch(c)
540             {
541                 case '?':
542                     return path;
543                 case '/':
544                     state++;
545                     if (state==2)
546                         break loop;
547                     break;
548                 default:
549                     state=0;
550             }
551             i++;
552         }
553         
554         if (state<2)
555             return path;
556         
557         StringBuffer buf = new StringBuffer(path.length());
558         buf.append(path,0,i);
559         
560         loop2:
561         while (i<end)
562         {
563             char c=path.charAt(i);
564             switch(c)
565             {
566                 case '?':
567                     buf.append(path,i,end);
568                     break loop2;
569                 case '/':
570                     if (state++==0)
571                         buf.append(c);
572                     break;
573                 default:
574                     state=0;
575                     buf.append(c);
576             }
577             i++;
578         }
579         
580         return buf.toString();
581     }
582 
583     /* ------------------------------------------------------------ */
584     /** 
585      * @param uri URI
586      * @return True if the uri has a scheme
587      */
588     public static boolean hasScheme(String uri)
589     {
590         for (int i=0;i<uri.length();i++)
591         {
592             char c=uri.charAt(i);
593             if (c==':')
594                 return true;
595             if (!(c>='a'&&c<='z' ||
596                   c>='A'&&c<='Z' ||
597                   (i>0 &&(c>='0'&&c<='9' ||
598                           c=='.' ||
599                           c=='+' ||
600                           c=='-'))
601                   ))
602                 break;
603         }
604         return false;
605     }
606     
607 }
608 
609 
610