View Javadoc
1   /*
2    * Copyright (C) 2008-2011, Google Inc.
3    * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org>
4    * and other copyright owners as documented in the project's IP log.
5    *
6    * This program and the accompanying materials are made available
7    * under the terms of the Eclipse Distribution License v1.0 which
8    * accompanies this distribution, is reproduced below, and is
9    * available at http://www.eclipse.org/org/documents/edl-v10.php
10   *
11   * All rights reserved.
12   *
13   * Redistribution and use in source and binary forms, with or
14   * without modification, are permitted provided that the following
15   * conditions are met:
16   *
17   * - Redistributions of source code must retain the above copyright
18   *   notice, this list of conditions and the following disclaimer.
19   *
20   * - Redistributions in binary form must reproduce the above
21   *   copyright notice, this list of conditions and the following
22   *   disclaimer in the documentation and/or other materials provided
23   *   with the distribution.
24   *
25   * - Neither the name of the Eclipse Foundation, Inc. nor the
26   *   names of its contributors may be used to endorse or promote
27   *   products derived from this software without specific prior
28   *   written permission.
29   *
30   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
31   * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
32   * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
33   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
35   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
36   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
37   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
38   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
39   * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
40   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
41   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
42   * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43   */
44  
45  package org.eclipse.jgit.internal.storage.dfs;
46  
47  import java.io.IOException;
48  import java.util.Collection;
49  import java.util.Collections;
50  import java.util.Map;
51  import java.util.concurrent.ConcurrentHashMap;
52  import java.util.concurrent.atomic.AtomicLong;
53  import java.util.concurrent.atomic.AtomicReferenceArray;
54  import java.util.concurrent.locks.ReentrantLock;
55  
56  import org.eclipse.jgit.internal.JGitText;
57  
58  /**
59   * Caches slices of a {@link DfsPackFile} in memory for faster read access.
60   * <p>
61   * The DfsBlockCache serves as a Java based "buffer cache", loading segments of
62   * a DfsPackFile into the JVM heap prior to use. As JGit often wants to do reads
63   * of only tiny slices of a file, the DfsBlockCache tries to smooth out these
64   * tiny reads into larger block-sized IO operations.
65   * <p>
66   * Whenever a cache miss occurs, loading is invoked by exactly one thread for
67   * the given <code>(DfsPackKey,position)</code> key tuple. This is ensured by an
68   * array of locks, with the tuple hashed to a lock instance.
69   * <p>
70   * Its too expensive during object access to be accurate with a least recently
71   * used (LRU) algorithm. Strictly ordering every read is a lot of overhead that
72   * typically doesn't yield a corresponding benefit to the application. This
73   * cache implements a clock replacement algorithm, giving each block one chance
74   * to have been accessed during a sweep of the cache to save itself from
75   * eviction.
76   * <p>
77   * Entities created by the cache are held under hard references, preventing the
78   * Java VM from clearing anything. Blocks are discarded by the replacement
79   * algorithm when adding a new block would cause the cache to exceed its
80   * configured maximum size.
81   * <p>
82   * The key tuple is passed through to methods as a pair of parameters rather
83   * than as a single Object, thus reducing the transient memory allocations of
84   * callers. It is more efficient to avoid the allocation, as we can't be 100%
85   * sure that a JIT would be able to stack-allocate a key tuple.
86   * <p>
87   * The internal hash table does not expand at runtime, instead it is fixed in
88   * size at cache creation time. The internal lock table used to gate load
89   * invocations is also fixed in size.
90   */
91  public final class DfsBlockCache {
92  	private static volatile DfsBlockCache cache;
93  
94  	static {
95  		reconfigure(new DfsBlockCacheConfig());
96  	}
97  
98  	/**
99  	 * Modify the configuration of the window cache.
100 	 * <p>
101 	 * The new configuration is applied immediately, and the existing cache is
102 	 * cleared.
103 	 *
104 	 * @param cfg
105 	 *            the new window cache configuration.
106 	 * @throws IllegalArgumentException
107 	 *             the cache configuration contains one or more invalid
108 	 *             settings, usually too low of a limit.
109 	 */
110 	public static void reconfigure(DfsBlockCacheConfig cfg) {
111 		DfsBlockCache nc = new DfsBlockCache(cfg);
112 		DfsBlockCache oc = cache;
113 		cache = nc;
114 
115 		if (oc != null) {
116 			for (DfsPackFile pack : oc.getPackFiles())
117 				pack.key.cachedSize.set(0);
118 		}
119 	}
120 
121 	/** @return the currently active DfsBlockCache. */
122 	public static DfsBlockCache getInstance() {
123 		return cache;
124 	}
125 
126 	/** Number of entries in {@link #table}. */
127 	private final int tableSize;
128 
129 	/** Hash bucket directory; entries are chained below. */
130 	private final AtomicReferenceArray<HashEntry> table;
131 
132 	/** Locks to prevent concurrent loads for same (PackFile,position). */
133 	private final ReentrantLock[] loadLocks;
134 
135 	/** Maximum number of bytes the cache should hold. */
136 	private final long maxBytes;
137 
138 	/** Pack files smaller than this size can be copied through the cache. */
139 	private final long maxStreamThroughCache;
140 
141 	/**
142 	 * Suggested block size to read from pack files in.
143 	 * <p>
144 	 * If a pack file does not have a native block size, this size will be used.
145 	 * <p>
146 	 * If a pack file has a native size, a whole multiple of the native size
147 	 * will be used until it matches this size.
148 	 */
149 	private final int blockSize;
150 
151 	/** As {@link #blockSize} is a power of 2, bits to shift for a / blockSize. */
152 	private final int blockSizeShift;
153 
154 	/** Cache of pack files, indexed by description. */
155 	private final Map<DfsPackDescription, DfsPackFile> packCache;
156 
157 	/** View of pack files in the pack cache. */
158 	private final Collection<DfsPackFile> packFiles;
159 
160 	/** Number of times a block was found in the cache. */
161 	private final AtomicLong statHit;
162 
163 	/** Number of times a block was not found, and had to be loaded. */
164 	private final AtomicLong statMiss;
165 
166 	/** Number of blocks evicted due to cache being full. */
167 	private volatile long statEvict;
168 
169 	/** Protects the clock and its related data. */
170 	private final ReentrantLock clockLock;
171 
172 	/** Current position of the clock. */
173 	private Ref clockHand;
174 
175 	/** Number of bytes currently loaded in the cache. */
176 	private volatile long liveBytes;
177 
178 	private DfsBlockCache(final DfsBlockCacheConfig cfg) {
179 		tableSize = tableSize(cfg);
180 		if (tableSize < 1)
181 			throw new IllegalArgumentException(JGitText.get().tSizeMustBeGreaterOrEqual1);
182 
183 		table = new AtomicReferenceArray<HashEntry>(tableSize);
184 		loadLocks = new ReentrantLock[32];
185 		for (int i = 0; i < loadLocks.length; i++)
186 			loadLocks[i] = new ReentrantLock(true /* fair */);
187 
188 		maxBytes = cfg.getBlockLimit();
189 		maxStreamThroughCache = (long) (maxBytes * cfg.getStreamRatio());
190 		blockSize = cfg.getBlockSize();
191 		blockSizeShift = Integer.numberOfTrailingZeros(blockSize);
192 
193 		clockLock = new ReentrantLock(true /* fair */);
194 		clockHand = new Ref<Object>(new DfsPackKey(), -1, 0, null);
195 		clockHand.next = clockHand;
196 
197 		packCache = new ConcurrentHashMap<DfsPackDescription, DfsPackFile>(
198 				16, 0.75f, 1);
199 		packFiles = Collections.unmodifiableCollection(packCache.values());
200 
201 		statHit = new AtomicLong();
202 		statMiss = new AtomicLong();
203 	}
204 
205 	boolean shouldCopyThroughCache(long length) {
206 		return length <= maxStreamThroughCache;
207 	}
208 
209 	/** @return total number of bytes in the cache. */
210 	public long getCurrentSize() {
211 		return liveBytes;
212 	}
213 
214 	/** @return 0..100, defining how full the cache is. */
215 	public long getFillPercentage() {
216 		return getCurrentSize() * 100 / maxBytes;
217 	}
218 
219 	/** @return number of requests for items in the cache. */
220 	public long getHitCount() {
221 		return statHit.get();
222 	}
223 
224 	/** @return number of requests for items not in the cache. */
225 	public long getMissCount() {
226 		return statMiss.get();
227 	}
228 
229 	/** @return total number of requests (hit + miss). */
230 	public long getTotalRequestCount() {
231 		return getHitCount() + getMissCount();
232 	}
233 
234 	/** @return 0..100, defining number of cache hits. */
235 	public long getHitRatio() {
236 		long hits = statHit.get();
237 		long miss = statMiss.get();
238 		long total = hits + miss;
239 		if (total == 0)
240 			return 0;
241 		return hits * 100 / total;
242 	}
243 
244 	/** @return number of evictions performed due to cache being full. */
245 	public long getEvictions() {
246 		return statEvict;
247 	}
248 
249 	/**
250 	 * Get the pack files stored in this cache.
251 	 *
252 	 * @return a collection of pack files, some of which may not actually be
253 	 *             present; the caller should check the pack's cached size.
254 	 */
255 	public Collection<DfsPackFile> getPackFiles() {
256 		return packFiles;
257 	}
258 
259 	DfsPackFile getOrCreate(DfsPackDescription dsc, DfsPackKey key) {
260 		// TODO This table grows without bound. It needs to clean up
261 		// entries that aren't in cache anymore, and aren't being used
262 		// by a live DfsObjDatabase reference.
263 		synchronized (packCache) {
264 			DfsPackFile pack = packCache.get(dsc);
265 			if (pack != null && pack.invalid()) {
266 				packCache.remove(dsc);
267 				pack = null;
268 			}
269 			if (pack == null) {
270 				if (key == null)
271 					key = new DfsPackKey();
272 				pack = new DfsPackFile(this, dsc, key);
273 				packCache.put(dsc, pack);
274 			}
275 			return pack;
276 		}
277 	}
278 
279 	private int hash(int packHash, long off) {
280 		return packHash + (int) (off >>> blockSizeShift);
281 	}
282 
283 	int getBlockSize() {
284 		return blockSize;
285 	}
286 
287 	private static int tableSize(final DfsBlockCacheConfig cfg) {
288 		final int wsz = cfg.getBlockSize();
289 		final long limit = cfg.getBlockLimit();
290 		if (wsz <= 0)
291 			throw new IllegalArgumentException(JGitText.get().invalidWindowSize);
292 		if (limit < wsz)
293 			throw new IllegalArgumentException(JGitText.get().windowSizeMustBeLesserThanLimit);
294 		return (int) Math.min(5 * (limit / wsz) / 2, Integer.MAX_VALUE);
295 	}
296 
297 	/**
298 	 * Lookup a cached object, creating and loading it if it doesn't exist.
299 	 *
300 	 * @param pack
301 	 *            the pack that "contains" the cached object.
302 	 * @param position
303 	 *            offset within <code>pack</code> of the object.
304 	 * @param ctx
305 	 *            current thread's reader.
306 	 * @return the object reference.
307 	 * @throws IOException
308 	 *             the reference was not in the cache and could not be loaded.
309 	 */
310 	DfsBlock getOrLoad(DfsPackFile pack, long position, DfsReader ctx)
311 			throws IOException {
312 		final long requestedPosition = position;
313 		position = pack.alignToBlock(position);
314 
315 		DfsPackKey key = pack.key;
316 		int slot = slot(key, position);
317 		HashEntry e1 = table.get(slot);
318 		DfsBlock v = scan(e1, key, position);
319 		if (v != null) {
320 			statHit.incrementAndGet();
321 			return v;
322 		}
323 
324 		reserveSpace(blockSize);
325 		ReentrantLock regionLock = lockFor(key, position);
326 		regionLock.lock();
327 		try {
328 			HashEntry e2 = table.get(slot);
329 			if (e2 != e1) {
330 				v = scan(e2, key, position);
331 				if (v != null) {
332 					statHit.incrementAndGet();
333 					creditSpace(blockSize);
334 					return v;
335 				}
336 			}
337 
338 			statMiss.incrementAndGet();
339 			boolean credit = true;
340 			try {
341 				v = pack.readOneBlock(position, ctx);
342 				credit = false;
343 			} finally {
344 				if (credit)
345 					creditSpace(blockSize);
346 			}
347 			if (position != v.start) {
348 				// The file discovered its blockSize and adjusted.
349 				position = v.start;
350 				slot = slot(key, position);
351 				e2 = table.get(slot);
352 			}
353 
354 			key.cachedSize.addAndGet(v.size());
355 			Ref<DfsBlock> ref = new Ref<DfsBlock>(key, position, v.size(), v);
356 			ref.hot = true;
357 			for (;;) {
358 				HashEntry n = new HashEntry(clean(e2), ref);
359 				if (table.compareAndSet(slot, e2, n))
360 					break;
361 				e2 = table.get(slot);
362 			}
363 			addToClock(ref, blockSize - v.size());
364 		} finally {
365 			regionLock.unlock();
366 		}
367 
368 		// If the block size changed from the default, it is possible the block
369 		// that was loaded is the wrong block for the requested position.
370 		if (v.contains(pack.key, requestedPosition))
371 			return v;
372 		return getOrLoad(pack, requestedPosition, ctx);
373 	}
374 
375 	@SuppressWarnings("unchecked")
376 	private void reserveSpace(int reserve) {
377 		clockLock.lock();
378 		try {
379 			long live = liveBytes + reserve;
380 			if (maxBytes < live) {
381 				Ref prev = clockHand;
382 				Ref hand = clockHand.next;
383 				do {
384 					if (hand.hot) {
385 						// Value was recently touched. Clear
386 						// hot and give it another chance.
387 						hand.hot = false;
388 						prev = hand;
389 						hand = hand.next;
390 						continue;
391 					} else if (prev == hand)
392 						break;
393 
394 					// No recent access since last scan, kill
395 					// value and remove from clock.
396 					Ref dead = hand;
397 					hand = hand.next;
398 					prev.next = hand;
399 					dead.next = null;
400 					dead.value = null;
401 					live -= dead.size;
402 					dead.pack.cachedSize.addAndGet(-dead.size);
403 					statEvict++;
404 				} while (maxBytes < live);
405 				clockHand = prev;
406 			}
407 			liveBytes = live;
408 		} finally {
409 			clockLock.unlock();
410 		}
411 	}
412 
413 	private void creditSpace(int credit) {
414 		clockLock.lock();
415 		liveBytes -= credit;
416 		clockLock.unlock();
417 	}
418 
419 	private void addToClock(Ref ref, int credit) {
420 		clockLock.lock();
421 		try {
422 			if (credit != 0)
423 				liveBytes -= credit;
424 			Ref ptr = clockHand;
425 			ref.next = ptr.next;
426 			ptr.next = ref;
427 			clockHand = ref;
428 		} finally {
429 			clockLock.unlock();
430 		}
431 	}
432 
433 	void put(DfsBlock v) {
434 		put(v.pack, v.start, v.size(), v);
435 	}
436 
437 	<T> Ref<T> put(DfsPackKey key, long pos, int size, T v) {
438 		int slot = slot(key, pos);
439 		HashEntry e1 = table.get(slot);
440 		Ref<T> ref = scanRef(e1, key, pos);
441 		if (ref != null)
442 			return ref;
443 
444 		reserveSpace(size);
445 		ReentrantLock regionLock = lockFor(key, pos);
446 		regionLock.lock();
447 		try {
448 			HashEntry e2 = table.get(slot);
449 			if (e2 != e1) {
450 				ref = scanRef(e2, key, pos);
451 				if (ref != null) {
452 					creditSpace(size);
453 					return ref;
454 				}
455 			}
456 
457 			key.cachedSize.addAndGet(size);
458 			ref = new Ref<T>(key, pos, size, v);
459 			ref.hot = true;
460 			for (;;) {
461 				HashEntry n = new HashEntry(clean(e2), ref);
462 				if (table.compareAndSet(slot, e2, n))
463 					break;
464 				e2 = table.get(slot);
465 			}
466 			addToClock(ref, 0);
467 		} finally {
468 			regionLock.unlock();
469 		}
470 		return ref;
471 	}
472 
473 	boolean contains(DfsPackKey key, long position) {
474 		return scan(table.get(slot(key, position)), key, position) != null;
475 	}
476 
477 	@SuppressWarnings("unchecked")
478 	<T> T get(DfsPackKey key, long position) {
479 		T val = (T) scan(table.get(slot(key, position)), key, position);
480 		if (val == null)
481 			statMiss.incrementAndGet();
482 		else
483 			statHit.incrementAndGet();
484 		return val;
485 	}
486 
487 	private <T> T scan(HashEntry n, DfsPackKey pack, long position) {
488 		Ref<T> r = scanRef(n, pack, position);
489 		return r != null ? r.get() : null;
490 	}
491 
492 	@SuppressWarnings("unchecked")
493 	private <T> Ref<T> scanRef(HashEntry n, DfsPackKey pack, long position) {
494 		for (; n != null; n = n.next) {
495 			Ref<T> r = n.ref;
496 			if (r.pack == pack && r.position == position)
497 				return r.get() != null ? r : null;
498 		}
499 		return null;
500 	}
501 
502 	void remove(DfsPackFile pack) {
503 		synchronized (packCache) {
504 			packCache.remove(pack.getPackDescription());
505 		}
506 	}
507 
508 	private int slot(DfsPackKey pack, long position) {
509 		return (hash(pack.hash, position) >>> 1) % tableSize;
510 	}
511 
512 	private ReentrantLock lockFor(DfsPackKey pack, long position) {
513 		return loadLocks[(hash(pack.hash, position) >>> 1) % loadLocks.length];
514 	}
515 
516 	private static HashEntry clean(HashEntry top) {
517 		while (top != null && top.ref.next == null)
518 			top = top.next;
519 		if (top == null)
520 			return null;
521 		HashEntry n = clean(top.next);
522 		return n == top.next ? top : new HashEntry(n, top.ref);
523 	}
524 
525 	private static final class HashEntry {
526 		/** Next entry in the hash table's chain list. */
527 		final HashEntry next;
528 
529 		/** The referenced object. */
530 		final Ref ref;
531 
532 		HashEntry(HashEntry n, Ref r) {
533 			next = n;
534 			ref = r;
535 		}
536 	}
537 
538 	static final class Ref<T> {
539 		final DfsPackKey pack;
540 		final long position;
541 		final int size;
542 		volatile T value;
543 		Ref next;
544 		volatile boolean hot;
545 
546 		Ref(DfsPackKey pack, long position, int size, T v) {
547 			this.pack = pack;
548 			this.position = position;
549 			this.size = size;
550 			this.value = v;
551 		}
552 
553 		T get() {
554 			T v = value;
555 			if (v != null)
556 				hot = true;
557 			return v;
558 		}
559 
560 		boolean has() {
561 			return value != null;
562 		}
563 	}
564 }