View Javadoc

1   //
2   //  ========================================================================
3   //  Copyright (c) 1995-2016 Mort Bay Consulting Pty. Ltd.
4   //  ------------------------------------------------------------------------
5   //  All rights reserved. This program and the accompanying materials
6   //  are made available under the terms of the Eclipse Public License v1.0
7   //  and Apache License v2.0 which accompanies this distribution.
8   //
9   //      The Eclipse Public License is available at
10  //      http://www.eclipse.org/legal/epl-v10.html
11  //
12  //      The Apache License v2.0 is available at
13  //      http://www.opensource.org/licenses/apache2.0.php
14  //
15  //  You may elect to redistribute this code under either of these licenses.
16  //  ========================================================================
17  //
18  
19  package org.eclipse.jetty.util;
20  
21  import java.io.IOException;
22  import java.nio.ByteBuffer;
23  import java.util.Arrays;
24  import java.util.HashSet;
25  import java.util.Set;
26  
27  /* ------------------------------------------------------------ */
28  /** 
29   * <p>A Trie String lookup data structure using a fixed size array.</p>
30   * <p>This implementation is always case insensitive and is optimal for
31   * a small number of fixed strings with few special characters.  The
32   * Trie is stored in an array of lookup tables, each indexed by the 
33   * next character of the key.   Frequently used characters are directly
34   * indexed in each lookup table, whilst infrequently used characters
35   * must use a big character table.
36   * </p>
37   * <p>This Trie is very space efficient if the key characters are 
38   * from ' ', '+', '-', ':', ';', '.', 'A' to 'Z' or 'a' to 'z'. 
39   * Other ISO-8859-1 characters can be used by the key, but less space
40   * efficiently.
41   * </p>
42   * <p>This Trie is not Threadsafe and contains no mutual exclusion 
43   * or deliberate memory barriers.  It is intended for an ArrayTrie to be
44   * built by a single thread and then used concurrently by multiple threads
45   * and not mutated during that access.  If concurrent mutations of the
46   * Trie is required external locks need to be applied.
47   * </p>
48   * @param <V> the entry type
49   */
50  public class ArrayTrie<V> extends AbstractTrie<V>
51  {
52      /**
53       * The Size of a Trie row is how many characters can be looked
54       * up directly without going to a big index.  This is set at 
55       * 32 to cover case insensitive alphabet and a few other common
56       * characters. 
57       */
58      private static final int ROW_SIZE = 32;
59      
60      /**
61       * The index lookup table, this maps a character as a byte 
62       * (ISO-8859-1 or UTF8) to an index within a Trie row
63       */
64      private static final int[] __lookup = 
65      { // 0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
66     /*0*/-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 
67     /*1*/-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 
68     /*2*/31, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 26, -1, 27, 30, -1,
69     /*3*/-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 28, 29, -1, -1, -1, -1,
70     /*4*/-1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
71     /*5*/15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
72     /*6*/-1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
73     /*7*/15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
74      };
75      
76      /**
77       * The Trie rows in a single array which allows a lookup of row,character
78       * to the next row in the Trie.  This is actually a 2 dimensional
79       * array that has been flattened to achieve locality of reference.
80       * The first ROW_SIZE entries are for row 0, then next ROW_SIZE 
81       * entries are for row 1 etc.   So in general instead of using
82       * _rows[row][index], we use _rows[row*ROW_SIZE+index] to look up
83       * the next row for a given character.
84       * 
85       * The array is of characters rather than integers to save space. 
86       */
87      private final char[] _rowIndex;
88      
89      /**
90       * The key (if any) for a Trie row. 
91       * A row may be a leaf, a node or both in the Trie tree.
92       */
93      private final String[] _key;
94      
95      /**
96       * The value (if any) for a Trie row. 
97       * A row may be a leaf, a node or both in the Trie tree.
98       */
99      private final V[] _value;
100     
101     /**
102      * A big index for each row.
103      * If a character outside of the lookup map is needed,
104      * then a big index will be created for the row, with
105      * 256 entries, one for each possible byte.
106      */
107     private char[][] _bigIndex;
108     
109     /**
110      * The number of rows allocated
111      */
112     private char _rows;
113 
114     public ArrayTrie()
115     {
116         this(128);
117     }
118     
119     /* ------------------------------------------------------------ */
120     /**
121      * @param capacity  The capacity of the trie, which at the worst case
122      * is the total number of characters of all keys stored in the Trie.
123      * The capacity needed is dependent of the shared prefixes of the keys.
124      * For example, a capacity of 6 nodes is required to store keys "foo" 
125      * and "bar", but a capacity of only 4 is required to
126      * store "bar" and "bat".
127      */
128     @SuppressWarnings("unchecked")
129     public ArrayTrie(int capacity)
130     {
131         super(true);
132         _value=(V[])new Object[capacity];
133         _rowIndex=new char[capacity*32];
134         _key=new String[capacity];
135     }
136 
137     /* ------------------------------------------------------------ */
138     @Override
139     public void clear()
140     {
141         _rows=0;
142         Arrays.fill(_value,null);
143         Arrays.fill(_rowIndex,(char)0);
144         Arrays.fill(_key,null);
145     }
146     
147     /* ------------------------------------------------------------ */
148     @Override
149     public boolean put(String s, V v)
150     {
151         int t=0;
152         int k;
153         int limit = s.length();
154         for(k=0; k < limit; k++)
155         {
156             char c=s.charAt(k);
157             
158             int index=__lookup[c&0x7f];
159             if (index>=0)
160             {
161                 int idx=t*ROW_SIZE+index;
162                 t=_rowIndex[idx];
163                 if (t==0)
164                 {
165                     if (++_rows>=_value.length)
166                         return false;
167                     t=_rowIndex[idx]=_rows;
168                 }
169             }
170             else if (c>127)
171                 throw new IllegalArgumentException("non ascii character");
172             else
173             {
174                 if (_bigIndex==null)
175                     _bigIndex=new char[_value.length][];
176                 if (t>=_bigIndex.length)
177                     return false;
178                 char[] big=_bigIndex[t];
179                 if (big==null)
180                     big=_bigIndex[t]=new char[128];
181                 t=big[c];
182                 if (t==0)
183                 {
184                     if (_rows==_value.length)
185                         return false;
186                     t=big[c]=++_rows;
187                 }
188             }
189         }
190         
191         if (t>=_key.length)
192         {
193             _rows=(char)_key.length;
194             return false;
195         }
196         
197         _key[t]=v==null?null:s;
198         _value[t] = v;
199         return true;
200     }
201 
202     /* ------------------------------------------------------------ */
203     @Override
204     public V get(String s, int offset, int len)
205     {
206         int t = 0;
207         for(int i=0; i < len; i++)
208         {
209             char c=s.charAt(offset+i);
210             int index=__lookup[c&0x7f];
211             if (index>=0)
212             {
213                 int idx=t*ROW_SIZE+index;
214                 t=_rowIndex[idx];
215                 if (t==0)
216                     return null;
217             }
218             else
219             {
220                 char[] big = _bigIndex==null?null:_bigIndex[t];
221                 if (big==null)
222                     return null;
223                 t=big[c];
224                 if (t==0)
225                     return null;
226             }
227         }
228         return _value[t];
229     }
230 
231     /* ------------------------------------------------------------ */
232     @Override
233     public V get(ByteBuffer b,int offset,int len)
234     {
235         int t = 0;
236         for(int i=0; i < len; i++)
237         {
238             byte c=b.get(offset+i);
239             int index=__lookup[c&0x7f];
240             if (index>=0)
241             {
242                 int idx=t*ROW_SIZE+index;
243                 t=_rowIndex[idx];
244                 if (t==0)
245                     return null;
246             }
247             else
248             {
249                 char[] big = _bigIndex==null?null:_bigIndex[t];
250                 if (big==null)
251                     return null;
252                 t=big[c];
253                 if (t==0)
254                     return null;
255             }
256         }
257         return (V)_value[t];
258     }
259 
260     /* ------------------------------------------------------------ */
261     @Override
262     public V getBest(byte[] b,int offset,int len)
263     {
264         return getBest(0,b,offset,len);
265     }
266 
267     /* ------------------------------------------------------------ */
268     @Override
269     public V getBest(ByteBuffer b,int offset,int len)
270     {
271         if (b.hasArray())
272             return getBest(0,b.array(),b.arrayOffset()+b.position()+offset,len);
273         return getBest(0,b,offset,len);
274     }
275 
276     /* ------------------------------------------------------------ */
277     @Override
278     public V getBest(String s, int offset, int len)
279     {
280         return getBest(0,s,offset,len);
281     }
282     
283     /* ------------------------------------------------------------ */
284     private V getBest(int t, String s, int offset, int len)
285     {
286         int pos=offset;
287         for(int i=0; i < len; i++)
288         {
289             char c=s.charAt(pos++);
290             int index=__lookup[c&0x7f];
291             if (index>=0)
292             {
293                 int idx=t*ROW_SIZE+index;
294                 int nt=_rowIndex[idx];
295                 if (nt==0)
296                     break;
297                 t=nt;
298             }
299             else
300             {
301                 char[] big = _bigIndex==null?null:_bigIndex[t];
302                 if (big==null)
303                     return null;
304                 int nt=big[c];
305                 if (nt==0)
306                     break;
307                 t=nt;
308             }
309             
310             // Is the next Trie is a match
311             if (_key[t]!=null)
312             {
313                 // Recurse so we can remember this possibility
314                 V best=getBest(t,s,offset+i+1,len-i-1);
315                 if (best!=null)
316                     return best;
317                 return (V)_value[t];
318             }
319         }
320         return (V)_value[t];
321     }
322 
323     /* ------------------------------------------------------------ */
324     private V getBest(int t,byte[] b,int offset,int len)
325     {
326         for(int i=0; i < len; i++)
327         {
328             byte c=b[offset+i];
329             int index=__lookup[c&0x7f];
330             if (index>=0)
331             {
332                 int idx=t*ROW_SIZE+index;
333                 int nt=_rowIndex[idx];
334                 if (nt==0)
335                     break;
336                 t=nt;
337             }
338             else
339             {
340                 char[] big = _bigIndex==null?null:_bigIndex[t];
341                 if (big==null)
342                     return null;
343                 int nt=big[c];
344                 if (nt==0)
345                     break;
346                 t=nt;
347             }
348             
349             // Is the next Trie is a match
350             if (_key[t]!=null)
351             {
352                 // Recurse so we can remember this possibility
353                 V best=getBest(t,b,offset+i+1,len-i-1);
354                 if (best!=null)
355                     return best;
356                 break;
357             }
358         }
359         return (V)_value[t];
360     }
361     
362     private V getBest(int t,ByteBuffer b,int offset,int len)
363     {
364         int pos=b.position()+offset;
365         for(int i=0; i < len; i++)
366         {
367             byte c=b.get(pos++);
368             int index=__lookup[c&0x7f];
369             if (index>=0)
370             {
371                 int idx=t*ROW_SIZE+index;
372                 int nt=_rowIndex[idx];
373                 if (nt==0)
374                     break;
375                 t=nt;
376             }
377             else
378             {
379                 char[] big = _bigIndex==null?null:_bigIndex[t];
380                 if (big==null)
381                     return null;
382                 int nt=big[c];
383                 if (nt==0)
384                     break;
385                 t=nt;
386             }
387             
388             // Is the next Trie is a match
389             if (_key[t]!=null)
390             {
391                 // Recurse so we can remember this possibility
392                 V best=getBest(t,b,offset+i+1,len-i-1);
393                 if (best!=null)
394                     return best;
395                 break;
396             }
397         }
398         return (V)_value[t];
399     }
400     
401     
402     
403 
404     @Override
405     public String toString()
406     {
407         StringBuilder buf = new StringBuilder();
408         toString(buf,0);
409         
410         if (buf.length()==0)
411             return "{}";
412         
413         buf.setCharAt(0,'{');
414         buf.append('}');
415         return buf.toString();
416     }
417 
418 
419     private void toString(Appendable out, int t)
420     {
421         if (_value[t]!=null)
422         {
423             try
424             {
425                 out.append(',');
426                 out.append(_key[t]);
427                 out.append('=');
428                 out.append(_value[t].toString());
429             }
430             catch (IOException e)
431             {
432                 throw new RuntimeException(e);
433             }
434         }
435 
436         for(int i=0; i < ROW_SIZE; i++)
437         {
438             int idx=t*ROW_SIZE+i;
439             if (_rowIndex[idx] != 0)
440                 toString(out,_rowIndex[idx]);
441         }
442 
443         char[] big = _bigIndex==null?null:_bigIndex[t];
444         if (big!=null)
445         {
446             for (int i:big)
447                 if (i!=0)
448                     toString(out,i);
449         }
450 
451     }
452 
453     @Override
454     public Set<String> keySet()
455     {
456         Set<String> keys = new HashSet<>();
457         keySet(keys,0);
458         return keys;
459     }
460     
461     private void keySet(Set<String> set, int t)
462     {
463         if (t<_value.length&&_value[t]!=null)
464             set.add(_key[t]);
465 
466         for(int i=0; i < ROW_SIZE; i++)
467         {
468             int idx=t*ROW_SIZE+i;
469             if (idx<_rowIndex.length && _rowIndex[idx] != 0)
470                 keySet(set,_rowIndex[idx]);
471         }
472         
473         char[] big = _bigIndex==null||t>=_bigIndex.length?null:_bigIndex[t];
474         if (big!=null)
475         {
476             for (int i:big)
477                 if (i!=0)
478                     keySet(set,i);
479         }
480     }
481     
482     @Override
483     public boolean isFull()
484     {
485         return _rows+1>=_key.length;
486     }
487 }