View Javadoc
1   /*
2    * Copyright (C) 2008-2011, Google Inc.
3    * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org>
4    * and other copyright owners as documented in the project's IP log.
5    *
6    * This program and the accompanying materials are made available
7    * under the terms of the Eclipse Distribution License v1.0 which
8    * accompanies this distribution, is reproduced below, and is
9    * available at http://www.eclipse.org/org/documents/edl-v10.php
10   *
11   * All rights reserved.
12   *
13   * Redistribution and use in source and binary forms, with or
14   * without modification, are permitted provided that the following
15   * conditions are met:
16   *
17   * - Redistributions of source code must retain the above copyright
18   *   notice, this list of conditions and the following disclaimer.
19   *
20   * - Redistributions in binary form must reproduce the above
21   *   copyright notice, this list of conditions and the following
22   *   disclaimer in the documentation and/or other materials provided
23   *   with the distribution.
24   *
25   * - Neither the name of the Eclipse Foundation, Inc. nor the
26   *   names of its contributors may be used to endorse or promote
27   *   products derived from this software without specific prior
28   *   written permission.
29   *
30   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
31   * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
32   * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
33   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
35   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
36   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
37   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
38   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
39   * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
40   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
41   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
42   * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43   */
44  
45  package org.eclipse.jgit.internal.storage.dfs;
46  
47  import java.io.IOException;
48  import java.util.concurrent.atomic.AtomicLong;
49  import java.util.concurrent.atomic.AtomicReferenceArray;
50  import java.util.concurrent.locks.ReentrantLock;
51  
52  import org.eclipse.jgit.annotations.Nullable;
53  import org.eclipse.jgit.internal.JGitText;
54  
55  /**
56   * Caches slices of a {@link BlockBasedFile} in memory for faster read access.
57   * <p>
58   * The DfsBlockCache serves as a Java based "buffer cache", loading segments of
59   * a BlockBasedFile into the JVM heap prior to use. As JGit often wants to do
60   * reads of only tiny slices of a file, the DfsBlockCache tries to smooth out
61   * these tiny reads into larger block-sized IO operations.
62   * <p>
63   * Whenever a cache miss occurs, loading is invoked by exactly one thread for
64   * the given <code>(DfsPackKey,position)</code> key tuple. This is ensured by an
65   * array of locks, with the tuple hashed to a lock instance.
66   * <p>
67   * Its too expensive during object access to be accurate with a least recently
68   * used (LRU) algorithm. Strictly ordering every read is a lot of overhead that
69   * typically doesn't yield a corresponding benefit to the application. This
70   * cache implements a clock replacement algorithm, giving each block one chance
71   * to have been accessed during a sweep of the cache to save itself from
72   * eviction.
73   * <p>
74   * Entities created by the cache are held under hard references, preventing the
75   * Java VM from clearing anything. Blocks are discarded by the replacement
76   * algorithm when adding a new block would cause the cache to exceed its
77   * configured maximum size.
78   * <p>
79   * The key tuple is passed through to methods as a pair of parameters rather
80   * than as a single Object, thus reducing the transient memory allocations of
81   * callers. It is more efficient to avoid the allocation, as we can't be 100%
82   * sure that a JIT would be able to stack-allocate a key tuple.
83   * <p>
84   * The internal hash table does not expand at runtime, instead it is fixed in
85   * size at cache creation time. The internal lock table used to gate load
86   * invocations is also fixed in size.
87   */
88  public final class DfsBlockCache {
89  	private static volatile DfsBlockCache cache;
90  
91  	static {
92  		reconfigure(new DfsBlockCacheConfig());
93  	}
94  
95  	/**
96  	 * Modify the configuration of the window cache.
97  	 * <p>
98  	 * The new configuration is applied immediately, and the existing cache is
99  	 * cleared.
100 	 *
101 	 * @param cfg
102 	 *            the new window cache configuration.
103 	 * @throws IllegalArgumentException
104 	 *             the cache configuration contains one or more invalid
105 	 *             settings, usually too low of a limit.
106 	 */
107 	public static void reconfigure(DfsBlockCacheConfig cfg) {
108 		cache = new DfsBlockCache(cfg);
109 	}
110 
111 	/** @return the currently active DfsBlockCache. */
112 	public static DfsBlockCache getInstance() {
113 		return cache;
114 	}
115 
116 	/** Number of entries in {@link #table}. */
117 	private final int tableSize;
118 
119 	/** Hash bucket directory; entries are chained below. */
120 	private final AtomicReferenceArray<HashEntry> table;
121 
122 	/** Locks to prevent concurrent loads for same (PackFile,position). */
123 	private final ReentrantLock[] loadLocks;
124 
125 	/** Maximum number of bytes the cache should hold. */
126 	private final long maxBytes;
127 
128 	/** Pack files smaller than this size can be copied through the cache. */
129 	private final long maxStreamThroughCache;
130 
131 	/**
132 	 * Suggested block size to read from pack files in.
133 	 * <p>
134 	 * If a pack file does not have a native block size, this size will be used.
135 	 * <p>
136 	 * If a pack file has a native size, a whole multiple of the native size
137 	 * will be used until it matches this size.
138 	 * <p>
139 	 * The value for blockSize must be a power of 2.
140 	 */
141 	private final int blockSize;
142 
143 	/** As {@link #blockSize} is a power of 2, bits to shift for a / blockSize. */
144 	private final int blockSizeShift;
145 
146 	/** Number of times a block was found in the cache. */
147 	private final AtomicLong statHit;
148 
149 	/** Number of times a block was not found, and had to be loaded. */
150 	private final AtomicLong statMiss;
151 
152 	/** Number of blocks evicted due to cache being full. */
153 	private volatile long statEvict;
154 
155 	/** Protects the clock and its related data. */
156 	private final ReentrantLock clockLock;
157 
158 	/** Current position of the clock. */
159 	private Ref clockHand;
160 
161 	/** Number of bytes currently loaded in the cache. */
162 	private volatile long liveBytes;
163 
164 	@SuppressWarnings("unchecked")
165 	private DfsBlockCache(final DfsBlockCacheConfig cfg) {
166 		tableSize = tableSize(cfg);
167 		if (tableSize < 1)
168 			throw new IllegalArgumentException(JGitText.get().tSizeMustBeGreaterOrEqual1);
169 
170 		table = new AtomicReferenceArray<>(tableSize);
171 		loadLocks = new ReentrantLock[cfg.getConcurrencyLevel()];
172 		for (int i = 0; i < loadLocks.length; i++)
173 			loadLocks[i] = new ReentrantLock(true /* fair */);
174 
175 		maxBytes = cfg.getBlockLimit();
176 		maxStreamThroughCache = (long) (maxBytes * cfg.getStreamRatio());
177 		blockSize = cfg.getBlockSize();
178 		blockSizeShift = Integer.numberOfTrailingZeros(blockSize);
179 
180 		clockLock = new ReentrantLock(true /* fair */);
181 		String none = ""; //$NON-NLS-1$
182 		clockHand = new Ref<>(
183 				DfsStreamKey.of(new DfsRepositoryDescription(none), none),
184 				-1, 0, null);
185 		clockHand.next = clockHand;
186 
187 		statHit = new AtomicLong();
188 		statMiss = new AtomicLong();
189 	}
190 
191 	boolean shouldCopyThroughCache(long length) {
192 		return length <= maxStreamThroughCache;
193 	}
194 
195 	/** @return total number of bytes in the cache. */
196 	public long getCurrentSize() {
197 		return liveBytes;
198 	}
199 
200 	/** @return 0..100, defining how full the cache is. */
201 	public long getFillPercentage() {
202 		return getCurrentSize() * 100 / maxBytes;
203 	}
204 
205 	/** @return number of requests for items in the cache. */
206 	public long getHitCount() {
207 		return statHit.get();
208 	}
209 
210 	/** @return number of requests for items not in the cache. */
211 	public long getMissCount() {
212 		return statMiss.get();
213 	}
214 
215 	/** @return total number of requests (hit + miss). */
216 	public long getTotalRequestCount() {
217 		return getHitCount() + getMissCount();
218 	}
219 
220 	/** @return 0..100, defining number of cache hits. */
221 	public long getHitRatio() {
222 		long hits = statHit.get();
223 		long miss = statMiss.get();
224 		long total = hits + miss;
225 		if (total == 0)
226 			return 0;
227 		return hits * 100 / total;
228 	}
229 
230 	/** @return number of evictions performed due to cache being full. */
231 	public long getEvictions() {
232 		return statEvict;
233 	}
234 
235 	private int hash(int packHash, long off) {
236 		return packHash + (int) (off >>> blockSizeShift);
237 	}
238 
239 	int getBlockSize() {
240 		return blockSize;
241 	}
242 
243 	private static int tableSize(final DfsBlockCacheConfig cfg) {
244 		final int wsz = cfg.getBlockSize();
245 		final long limit = cfg.getBlockLimit();
246 		if (wsz <= 0)
247 			throw new IllegalArgumentException(JGitText.get().invalidWindowSize);
248 		if (limit < wsz)
249 			throw new IllegalArgumentException(JGitText.get().windowSizeMustBeLesserThanLimit);
250 		return (int) Math.min(5 * (limit / wsz) / 2, Integer.MAX_VALUE);
251 	}
252 
253 	/**
254 	 * Lookup a cached object, creating and loading it if it doesn't exist.
255 	 *
256 	 * @param file
257 	 *            the pack that "contains" the cached object.
258 	 * @param position
259 	 *            offset within <code>pack</code> of the object.
260 	 * @param ctx
261 	 *            current thread's reader.
262 	 * @param fileChannel
263 	 *            optional channel to read {@code pack}.
264 	 * @return the object reference.
265 	 * @throws IOException
266 	 *             the reference was not in the cache and could not be loaded.
267 	 */
268 	DfsBlock getOrLoad(BlockBasedFile file, long position, DfsReader ctx,
269 			@Nullable ReadableChannel fileChannel) throws IOException {
270 		final long requestedPosition = position;
271 		position = file.alignToBlock(position);
272 
273 		DfsStreamKey key = file.key;
274 		int slot = slot(key, position);
275 		HashEntry e1 = table.get(slot);
276 		DfsBlock v = scan(e1, key, position);
277 		if (v != null && v.contains(key, requestedPosition)) {
278 			ctx.stats.blockCacheHit++;
279 			statHit.incrementAndGet();
280 			return v;
281 		}
282 
283 		reserveSpace(blockSize);
284 		ReentrantLock regionLock = lockFor(key, position);
285 		regionLock.lock();
286 		try {
287 			HashEntry e2 = table.get(slot);
288 			if (e2 != e1) {
289 				v = scan(e2, key, position);
290 				if (v != null) {
291 					ctx.stats.blockCacheHit++;
292 					statHit.incrementAndGet();
293 					creditSpace(blockSize);
294 					return v;
295 				}
296 			}
297 
298 			statMiss.incrementAndGet();
299 			boolean credit = true;
300 			try {
301 				v = file.readOneBlock(requestedPosition, ctx, fileChannel);
302 				credit = false;
303 			} finally {
304 				if (credit)
305 					creditSpace(blockSize);
306 			}
307 			if (position != v.start) {
308 				// The file discovered its blockSize and adjusted.
309 				position = v.start;
310 				slot = slot(key, position);
311 				e2 = table.get(slot);
312 			}
313 
314 			Ref<DfsBlock> ref = new Ref<>(key, position, v.size(), v);
315 			ref.hot = true;
316 			for (;;) {
317 				HashEntry n = new HashEntry(clean(e2), ref);
318 				if (table.compareAndSet(slot, e2, n))
319 					break;
320 				e2 = table.get(slot);
321 			}
322 			addToClock(ref, blockSize - v.size());
323 		} finally {
324 			regionLock.unlock();
325 		}
326 
327 		// If the block size changed from the default, it is possible the block
328 		// that was loaded is the wrong block for the requested position.
329 		if (v.contains(file.key, requestedPosition))
330 			return v;
331 		return getOrLoad(file, requestedPosition, ctx, fileChannel);
332 	}
333 
334 	@SuppressWarnings("unchecked")
335 	private void reserveSpace(int reserve) {
336 		clockLock.lock();
337 		try {
338 			long live = liveBytes + reserve;
339 			if (maxBytes < live) {
340 				Ref prev = clockHand;
341 				Ref hand = clockHand.next;
342 				do {
343 					if (hand.hot) {
344 						// Value was recently touched. Clear
345 						// hot and give it another chance.
346 						hand.hot = false;
347 						prev = hand;
348 						hand = hand.next;
349 						continue;
350 					} else if (prev == hand)
351 						break;
352 
353 					// No recent access since last scan, kill
354 					// value and remove from clock.
355 					Ref dead = hand;
356 					hand = hand.next;
357 					prev.next = hand;
358 					dead.next = null;
359 					dead.value = null;
360 					live -= dead.size;
361 					statEvict++;
362 				} while (maxBytes < live);
363 				clockHand = prev;
364 			}
365 			liveBytes = live;
366 		} finally {
367 			clockLock.unlock();
368 		}
369 	}
370 
371 	private void creditSpace(int credit) {
372 		clockLock.lock();
373 		liveBytes -= credit;
374 		clockLock.unlock();
375 	}
376 
377 	@SuppressWarnings("unchecked")
378 	private void addToClock(Ref ref, int credit) {
379 		clockLock.lock();
380 		try {
381 			if (credit != 0)
382 				liveBytes -= credit;
383 			Ref ptr = clockHand;
384 			ref.next = ptr.next;
385 			ptr.next = ref;
386 			clockHand = ref;
387 		} finally {
388 			clockLock.unlock();
389 		}
390 	}
391 
392 	void put(DfsBlock v) {
393 		put(v.stream, v.start, v.size(), v);
394 	}
395 
396 	<T> Ref<T> putRef(DfsStreamKey key, long size, T v) {
397 		return put(key, 0, (int) Math.min(size, Integer.MAX_VALUE), v);
398 	}
399 
400 	<T> Ref<T> put(DfsStreamKey key, long pos, int size, T v) {
401 		int slot = slot(key, pos);
402 		HashEntry e1 = table.get(slot);
403 		Ref<T> ref = scanRef(e1, key, pos);
404 		if (ref != null)
405 			return ref;
406 
407 		reserveSpace(size);
408 		ReentrantLock regionLock = lockFor(key, pos);
409 		regionLock.lock();
410 		try {
411 			HashEntry e2 = table.get(slot);
412 			if (e2 != e1) {
413 				ref = scanRef(e2, key, pos);
414 				if (ref != null) {
415 					creditSpace(size);
416 					return ref;
417 				}
418 			}
419 
420 			ref = new Ref<>(key, pos, size, v);
421 			ref.hot = true;
422 			for (;;) {
423 				HashEntry n = new HashEntry(clean(e2), ref);
424 				if (table.compareAndSet(slot, e2, n))
425 					break;
426 				e2 = table.get(slot);
427 			}
428 			addToClock(ref, 0);
429 		} finally {
430 			regionLock.unlock();
431 		}
432 		return ref;
433 	}
434 
435 	boolean contains(DfsStreamKey key, long position) {
436 		return scan(table.get(slot(key, position)), key, position) != null;
437 	}
438 
439 	@SuppressWarnings("unchecked")
440 	<T> T get(DfsStreamKey key, long position) {
441 		T val = (T) scan(table.get(slot(key, position)), key, position);
442 		if (val == null)
443 			statMiss.incrementAndGet();
444 		else
445 			statHit.incrementAndGet();
446 		return val;
447 	}
448 
449 	private <T> T scan(HashEntry n, DfsStreamKey key, long position) {
450 		Ref<T> r = scanRef(n, key, position);
451 		return r != null ? r.get() : null;
452 	}
453 
454 	<T> Ref<T> getRef(DfsStreamKey key) {
455 		Ref<T> r = scanRef(table.get(slot(key, 0)), key, 0);
456 		if (r != null)
457 			statHit.incrementAndGet();
458 		else
459 			statMiss.incrementAndGet();
460 		return r;
461 	}
462 
463 	@SuppressWarnings("unchecked")
464 	private <T> Ref<T> scanRef(HashEntry n, DfsStreamKey key, long position) {
465 		for (; n != null; n = n.next) {
466 			Ref<T> r = n.ref;
467 			if (r.position == position && r.key.equals(key))
468 				return r.get() != null ? r : null;
469 		}
470 		return null;
471 	}
472 
473 	private int slot(DfsStreamKey key, long position) {
474 		return (hash(key.hash, position) >>> 1) % tableSize;
475 	}
476 
477 	private ReentrantLock lockFor(DfsStreamKey key, long position) {
478 		return loadLocks[(hash(key.hash, position) >>> 1) % loadLocks.length];
479 	}
480 
481 	private static HashEntry clean(HashEntry top) {
482 		while (top != null && top.ref.next == null)
483 			top = top.next;
484 		if (top == null)
485 			return null;
486 		HashEntry n = clean(top.next);
487 		return n == top.next ? top : new HashEntry(n, top.ref);
488 	}
489 
490 	private static final class HashEntry {
491 		/** Next entry in the hash table's chain list. */
492 		final HashEntry next;
493 
494 		/** The referenced object. */
495 		final Ref ref;
496 
497 		HashEntry(HashEntry n, Ref r) {
498 			next = n;
499 			ref = r;
500 		}
501 	}
502 
503 	static final class Ref<T> {
504 		final DfsStreamKey key;
505 		final long position;
506 		final int size;
507 		volatile T value;
508 		Ref next;
509 		volatile boolean hot;
510 
511 		Ref(DfsStreamKey key, long position, int size, T v) {
512 			this.key = key;
513 			this.position = position;
514 			this.size = size;
515 			this.value = v;
516 		}
517 
518 		T get() {
519 			T v = value;
520 			if (v != null)
521 				hot = true;
522 			return v;
523 		}
524 
525 		boolean has() {
526 			return value != null;
527 		}
528 	}
529 }