WindowCache.java

  1. /*
  2.  * Copyright (C) 2008-2009, Google Inc.
  3.  * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org>
  4.  * and other copyright owners as documented in the project's IP log.
  5.  *
  6.  * This program and the accompanying materials are made available
  7.  * under the terms of the Eclipse Distribution License v1.0 which
  8.  * accompanies this distribution, is reproduced below, and is
  9.  * available at http://www.eclipse.org/org/documents/edl-v10.php
  10.  *
  11.  * All rights reserved.
  12.  *
  13.  * Redistribution and use in source and binary forms, with or
  14.  * without modification, are permitted provided that the following
  15.  * conditions are met:
  16.  *
  17.  * - Redistributions of source code must retain the above copyright
  18.  *   notice, this list of conditions and the following disclaimer.
  19.  *
  20.  * - Redistributions in binary form must reproduce the above
  21.  *   copyright notice, this list of conditions and the following
  22.  *   disclaimer in the documentation and/or other materials provided
  23.  *   with the distribution.
  24.  *
  25.  * - Neither the name of the Eclipse Foundation, Inc. nor the
  26.  *   names of its contributors may be used to endorse or promote
  27.  *   products derived from this software without specific prior
  28.  *   written permission.
  29.  *
  30.  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  31.  * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  32.  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  33.  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  34.  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  35.  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  36.  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  37.  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  38.  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  39.  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  40.  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  41.  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  42.  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  43.  */

  44. package org.eclipse.jgit.internal.storage.file;

  45. import java.io.IOException;
  46. import java.lang.ref.ReferenceQueue;
  47. import java.lang.ref.SoftReference;
  48. import java.util.Random;
  49. import java.util.concurrent.atomic.AtomicInteger;
  50. import java.util.concurrent.atomic.AtomicLong;
  51. import java.util.concurrent.atomic.AtomicReferenceArray;
  52. import java.util.concurrent.locks.ReentrantLock;

  53. import org.eclipse.jgit.internal.JGitText;
  54. import org.eclipse.jgit.storage.file.WindowCacheConfig;

  55. /**
  56.  * Caches slices of a {@link org.eclipse.jgit.internal.storage.file.PackFile} in
  57.  * memory for faster read access.
  58.  * <p>
  59.  * The WindowCache serves as a Java based "buffer cache", loading segments of a
  60.  * PackFile into the JVM heap prior to use. As JGit often wants to do reads of
  61.  * only tiny slices of a file, the WindowCache tries to smooth out these tiny
  62.  * reads into larger block-sized IO operations.
  63.  * <p>
  64.  * Whenever a cache miss occurs, {@link #load(PackFile, long)} is invoked by
  65.  * exactly one thread for the given <code>(PackFile,position)</code> key tuple.
  66.  * This is ensured by an array of locks, with the tuple hashed to a lock
  67.  * instance.
  68.  * <p>
  69.  * During a miss, older entries are evicted from the cache so long as
  70.  * {@link #isFull()} returns true.
  71.  * <p>
  72.  * Its too expensive during object access to be 100% accurate with a least
  73.  * recently used (LRU) algorithm. Strictly ordering every read is a lot of
  74.  * overhead that typically doesn't yield a corresponding benefit to the
  75.  * application.
  76.  * <p>
  77.  * This cache implements a loose LRU policy by randomly picking a window
  78.  * comprised of roughly 10% of the cache, and evicting the oldest accessed entry
  79.  * within that window.
  80.  * <p>
  81.  * Entities created by the cache are held under SoftReferences, permitting the
  82.  * Java runtime's garbage collector to evict entries when heap memory gets low.
  83.  * Most JREs implement a loose least recently used algorithm for this eviction.
  84.  * <p>
  85.  * The internal hash table does not expand at runtime, instead it is fixed in
  86.  * size at cache creation time. The internal lock table used to gate load
  87.  * invocations is also fixed in size.
  88.  * <p>
  89.  * The key tuple is passed through to methods as a pair of parameters rather
  90.  * than as a single Object, thus reducing the transient memory allocations of
  91.  * callers. It is more efficient to avoid the allocation, as we can't be 100%
  92.  * sure that a JIT would be able to stack-allocate a key tuple.
  93.  * <p>
  94.  * This cache has an implementation rule such that:
  95.  * <ul>
  96.  * <li>{@link #load(PackFile, long)} is invoked by at most one thread at a time
  97.  * for a given <code>(PackFile,position)</code> tuple.</li>
  98.  * <li>For every <code>load()</code> invocation there is exactly one
  99.  * {@link #createRef(PackFile, long, ByteWindow)} invocation to wrap a
  100.  * SoftReference around the cached entity.</li>
  101.  * <li>For every Reference created by <code>createRef()</code> there will be
  102.  * exactly one call to {@link #clear(Ref)} to cleanup any resources associated
  103.  * with the (now expired) cached entity.</li>
  104.  * </ul>
  105.  * <p>
  106.  * Therefore, it is safe to perform resource accounting increments during the
  107.  * {@link #load(PackFile, long)} or
  108.  * {@link #createRef(PackFile, long, ByteWindow)} methods, and matching
  109.  * decrements during {@link #clear(Ref)}. Implementors may need to override
  110.  * {@link #createRef(PackFile, long, ByteWindow)} in order to embed additional
  111.  * accounting information into an implementation specific
  112.  * {@link org.eclipse.jgit.internal.storage.file.WindowCache.Ref} subclass, as
  113.  * the cached entity may have already been evicted by the JRE's garbage
  114.  * collector.
  115.  * <p>
  116.  * To maintain higher concurrency workloads, during eviction only one thread
  117.  * performs the eviction work, while other threads can continue to insert new
  118.  * objects in parallel. This means that the cache can be temporarily over limit,
  119.  * especially if the nominated eviction thread is being starved relative to the
  120.  * other threads.
  121.  */
  122. public class WindowCache {
  123.     private static final int bits(int newSize) {
  124.         if (newSize < 4096)
  125.             throw new IllegalArgumentException(JGitText.get().invalidWindowSize);
  126.         if (Integer.bitCount(newSize) != 1)
  127.             throw new IllegalArgumentException(JGitText.get().windowSizeMustBePowerOf2);
  128.         return Integer.numberOfTrailingZeros(newSize);
  129.     }

  130.     private static final Random rng = new Random();

  131.     private static volatile WindowCache cache;

  132.     private static volatile int streamFileThreshold;

  133.     static {
  134.         reconfigure(new WindowCacheConfig());
  135.     }

  136.     /**
  137.      * Modify the configuration of the window cache.
  138.      * <p>
  139.      * The new configuration is applied immediately. If the new limits are
  140.      * smaller than what is currently cached, older entries will be purged
  141.      * as soon as possible to allow the cache to meet the new limit.
  142.      *
  143.      * @deprecated use {@code cfg.install()} to avoid internal reference.
  144.      * @param cfg
  145.      *            the new window cache configuration.
  146.      * @throws java.lang.IllegalArgumentException
  147.      *             the cache configuration contains one or more invalid
  148.      *             settings, usually too low of a limit.
  149.      */
  150.     @Deprecated
  151.     public static void reconfigure(WindowCacheConfig cfg) {
  152.         final WindowCache nc = new WindowCache(cfg);
  153.         final WindowCache oc = cache;
  154.         if (oc != null)
  155.             oc.removeAll();
  156.         cache = nc;
  157.         streamFileThreshold = cfg.getStreamFileThreshold();
  158.         DeltaBaseCache.reconfigure(cfg);
  159.     }

  160.     static int getStreamFileThreshold() {
  161.         return streamFileThreshold;
  162.     }

  163.     /**
  164.      * @return the cached instance.
  165.      */
  166.     public static WindowCache getInstance() {
  167.         return cache;
  168.     }

  169.     static final ByteWindow get(PackFile pack, long offset)
  170.             throws IOException {
  171.         final WindowCache c = cache;
  172.         final ByteWindow r = c.getOrLoad(pack, c.toStart(offset));
  173.         if (c != cache) {
  174.             // The cache was reconfigured while we were using the old one
  175.             // to load this window. The window is still valid, but our
  176.             // cache may think its still live. Ensure the window is removed
  177.             // from the old cache so resources can be released.
  178.             //
  179.             c.removeAll();
  180.         }
  181.         return r;
  182.     }

  183.     static final void purge(PackFile pack) {
  184.         cache.removeAll(pack);
  185.     }

  186.     /** ReferenceQueue to cleanup released and garbage collected windows. */
  187.     private final ReferenceQueue<ByteWindow> queue;

  188.     /** Number of entries in {@link #table}. */
  189.     private final int tableSize;

  190.     /** Access clock for loose LRU. */
  191.     private final AtomicLong clock;

  192.     /** Hash bucket directory; entries are chained below. */
  193.     private final AtomicReferenceArray<Entry> table;

  194.     /** Locks to prevent concurrent loads for same (PackFile,position). */
  195.     private final Lock[] locks;

  196.     /** Lock to elect the eviction thread after a load occurs. */
  197.     private final ReentrantLock evictLock;

  198.     /** Number of {@link #table} buckets to scan for an eviction window. */
  199.     private final int evictBatch;

  200.     private final int maxFiles;

  201.     private final long maxBytes;

  202.     private final boolean mmap;

  203.     private final int windowSizeShift;

  204.     private final int windowSize;

  205.     private final AtomicInteger openFiles;

  206.     private final AtomicLong openBytes;

  207.     private WindowCache(WindowCacheConfig cfg) {
  208.         tableSize = tableSize(cfg);
  209.         final int lockCount = lockCount(cfg);
  210.         if (tableSize < 1)
  211.             throw new IllegalArgumentException(JGitText.get().tSizeMustBeGreaterOrEqual1);
  212.         if (lockCount < 1)
  213.             throw new IllegalArgumentException(JGitText.get().lockCountMustBeGreaterOrEqual1);

  214.         queue = new ReferenceQueue<>();
  215.         clock = new AtomicLong(1);
  216.         table = new AtomicReferenceArray<>(tableSize);
  217.         locks = new Lock[lockCount];
  218.         for (int i = 0; i < locks.length; i++)
  219.             locks[i] = new Lock();
  220.         evictLock = new ReentrantLock();

  221.         int eb = (int) (tableSize * .1);
  222.         if (64 < eb)
  223.             eb = 64;
  224.         else if (eb < 4)
  225.             eb = 4;
  226.         if (tableSize < eb)
  227.             eb = tableSize;
  228.         evictBatch = eb;

  229.         maxFiles = cfg.getPackedGitOpenFiles();
  230.         maxBytes = cfg.getPackedGitLimit();
  231.         mmap = cfg.isPackedGitMMAP();
  232.         windowSizeShift = bits(cfg.getPackedGitWindowSize());
  233.         windowSize = 1 << windowSizeShift;

  234.         openFiles = new AtomicInteger();
  235.         openBytes = new AtomicLong();

  236.         if (maxFiles < 1)
  237.             throw new IllegalArgumentException(JGitText.get().openFilesMustBeAtLeast1);
  238.         if (maxBytes < windowSize)
  239.             throw new IllegalArgumentException(JGitText.get().windowSizeMustBeLesserThanLimit);
  240.     }

  241.     /**
  242.      * @return the number of open files.
  243.      */
  244.     public int getOpenFiles() {
  245.         return openFiles.get();
  246.     }

  247.     /**
  248.      * @return the number of open bytes.
  249.      */
  250.     public long getOpenBytes() {
  251.         return openBytes.get();
  252.     }

  253.     private int hash(int packHash, long off) {
  254.         return packHash + (int) (off >>> windowSizeShift);
  255.     }

  256.     private ByteWindow load(PackFile pack, long offset)
  257.             throws IOException {
  258.         if (pack.beginWindowCache())
  259.             openFiles.incrementAndGet();
  260.         try {
  261.             if (mmap)
  262.                 return pack.mmap(offset, windowSize);
  263.             return pack.read(offset, windowSize);
  264.         } catch (IOException | RuntimeException | Error e) {
  265.             close(pack);
  266.             throw e;
  267.         }
  268.     }

  269.     private Ref createRef(PackFile p, long o, ByteWindow v) {
  270.         final Ref ref = new Ref(p, o, v, queue);
  271.         openBytes.addAndGet(ref.size);
  272.         return ref;
  273.     }

  274.     private void clear(Ref ref) {
  275.         openBytes.addAndGet(-ref.size);
  276.         close(ref.pack);
  277.     }

  278.     private void close(PackFile pack) {
  279.         if (pack.endWindowCache())
  280.             openFiles.decrementAndGet();
  281.     }

  282.     private boolean isFull() {
  283.         return maxFiles < openFiles.get() || maxBytes < openBytes.get();
  284.     }

  285.     private long toStart(long offset) {
  286.         return (offset >>> windowSizeShift) << windowSizeShift;
  287.     }

  288.     private static int tableSize(WindowCacheConfig cfg) {
  289.         final int wsz = cfg.getPackedGitWindowSize();
  290.         final long limit = cfg.getPackedGitLimit();
  291.         if (wsz <= 0)
  292.             throw new IllegalArgumentException(JGitText.get().invalidWindowSize);
  293.         if (limit < wsz)
  294.             throw new IllegalArgumentException(JGitText.get().windowSizeMustBeLesserThanLimit);
  295.         return (int) Math.min(5 * (limit / wsz) / 2, 2000000000);
  296.     }

  297.     private static int lockCount(WindowCacheConfig cfg) {
  298.         return Math.max(cfg.getPackedGitOpenFiles(), 32);
  299.     }

  300.     /**
  301.      * Lookup a cached object, creating and loading it if it doesn't exist.
  302.      *
  303.      * @param pack
  304.      *            the pack that "contains" the cached object.
  305.      * @param position
  306.      *            offset within <code>pack</code> of the object.
  307.      * @return the object reference.
  308.      * @throws IOException
  309.      *             the object reference was not in the cache and could not be
  310.      *             obtained by {@link #load(PackFile, long)}.
  311.      */
  312.     private ByteWindow getOrLoad(PackFile pack, long position)
  313.             throws IOException {
  314.         final int slot = slot(pack, position);
  315.         final Entry e1 = table.get(slot);
  316.         ByteWindow v = scan(e1, pack, position);
  317.         if (v != null)
  318.             return v;

  319.         synchronized (lock(pack, position)) {
  320.             Entry e2 = table.get(slot);
  321.             if (e2 != e1) {
  322.                 v = scan(e2, pack, position);
  323.                 if (v != null)
  324.                     return v;
  325.             }

  326.             v = load(pack, position);
  327.             final Ref ref = createRef(pack, position, v);
  328.             hit(ref);
  329.             for (;;) {
  330.                 final Entry n = new Entry(clean(e2), ref);
  331.                 if (table.compareAndSet(slot, e2, n))
  332.                     break;
  333.                 e2 = table.get(slot);
  334.             }
  335.         }

  336.         if (evictLock.tryLock()) {
  337.             try {
  338.                 gc();
  339.                 evict();
  340.             } finally {
  341.                 evictLock.unlock();
  342.             }
  343.         }

  344.         return v;
  345.     }

  346.     private ByteWindow scan(Entry n, PackFile pack, long position) {
  347.         for (; n != null; n = n.next) {
  348.             final Ref r = n.ref;
  349.             if (r.pack == pack && r.position == position) {
  350.                 final ByteWindow v = r.get();
  351.                 if (v != null) {
  352.                     hit(r);
  353.                     return v;
  354.                 }
  355.                 n.kill();
  356.                 break;
  357.             }
  358.         }
  359.         return null;
  360.     }

  361.     private void hit(Ref r) {
  362.         // We don't need to be 100% accurate here. Its sufficient that at least
  363.         // one thread performs the increment. Any other concurrent access at
  364.         // exactly the same time can simply use the same clock value.
  365.         //
  366.         // Consequently we attempt the set, but we don't try to recover should
  367.         // it fail. This is why we don't use getAndIncrement() here.
  368.         //
  369.         final long c = clock.get();
  370.         clock.compareAndSet(c, c + 1);
  371.         r.lastAccess = c;
  372.     }

  373.     private void evict() {
  374.         while (isFull()) {
  375.             int ptr = rng.nextInt(tableSize);
  376.             Entry old = null;
  377.             int slot = 0;
  378.             for (int b = evictBatch - 1; b >= 0; b--, ptr++) {
  379.                 if (tableSize <= ptr)
  380.                     ptr = 0;
  381.                 for (Entry e = table.get(ptr); e != null; e = e.next) {
  382.                     if (e.dead)
  383.                         continue;
  384.                     if (old == null || e.ref.lastAccess < old.ref.lastAccess) {
  385.                         old = e;
  386.                         slot = ptr;
  387.                     }
  388.                 }
  389.             }
  390.             if (old != null) {
  391.                 old.kill();
  392.                 gc();
  393.                 final Entry e1 = table.get(slot);
  394.                 table.compareAndSet(slot, e1, clean(e1));
  395.             }
  396.         }
  397.     }

  398.     /**
  399.      * Clear every entry from the cache.
  400.      * <p>
  401.      * This is a last-ditch effort to clear out the cache, such as before it
  402.      * gets replaced by another cache that is configured differently. This
  403.      * method tries to force every cached entry through {@link #clear(Ref)} to
  404.      * ensure that resources are correctly accounted for and cleaned up by the
  405.      * subclass. A concurrent reader loading entries while this method is
  406.      * running may cause resource accounting failures.
  407.      */
  408.     private void removeAll() {
  409.         for (int s = 0; s < tableSize; s++) {
  410.             Entry e1;
  411.             do {
  412.                 e1 = table.get(s);
  413.                 for (Entry e = e1; e != null; e = e.next)
  414.                     e.kill();
  415.             } while (!table.compareAndSet(s, e1, null));
  416.         }
  417.         gc();
  418.     }

  419.     /**
  420.      * Clear all entries related to a single file.
  421.      * <p>
  422.      * Typically this method is invoked during {@link PackFile#close()}, when we
  423.      * know the pack is never going to be useful to us again (for example, it no
  424.      * longer exists on disk). A concurrent reader loading an entry from this
  425.      * same pack may cause the pack to become stuck in the cache anyway.
  426.      *
  427.      * @param pack
  428.      *            the file to purge all entries of.
  429.      */
  430.     private void removeAll(PackFile pack) {
  431.         for (int s = 0; s < tableSize; s++) {
  432.             final Entry e1 = table.get(s);
  433.             boolean hasDead = false;
  434.             for (Entry e = e1; e != null; e = e.next) {
  435.                 if (e.ref.pack == pack) {
  436.                     e.kill();
  437.                     hasDead = true;
  438.                 } else if (e.dead)
  439.                     hasDead = true;
  440.             }
  441.             if (hasDead)
  442.                 table.compareAndSet(s, e1, clean(e1));
  443.         }
  444.         gc();
  445.     }

  446.     private void gc() {
  447.         Ref r;
  448.         while ((r = (Ref) queue.poll()) != null) {
  449.             clear(r);

  450.             final int s = slot(r.pack, r.position);
  451.             final Entry e1 = table.get(s);
  452.             for (Entry n = e1; n != null; n = n.next) {
  453.                 if (n.ref == r) {
  454.                     n.dead = true;
  455.                     table.compareAndSet(s, e1, clean(e1));
  456.                     break;
  457.                 }
  458.             }
  459.         }
  460.     }

  461.     private int slot(PackFile pack, long position) {
  462.         return (hash(pack.hash, position) >>> 1) % tableSize;
  463.     }

  464.     private Lock lock(PackFile pack, long position) {
  465.         return locks[(hash(pack.hash, position) >>> 1) % locks.length];
  466.     }

  467.     private static Entry clean(Entry top) {
  468.         while (top != null && top.dead) {
  469.             top.ref.enqueue();
  470.             top = top.next;
  471.         }
  472.         if (top == null)
  473.             return null;
  474.         final Entry n = clean(top.next);
  475.         return n == top.next ? top : new Entry(n, top.ref);
  476.     }

  477.     private static class Entry {
  478.         /** Next entry in the hash table's chain list. */
  479.         final Entry next;

  480.         /** The referenced object. */
  481.         final Ref ref;

  482.         /**
  483.          * Marked true when ref.get() returns null and the ref is dead.
  484.          * <p>
  485.          * A true here indicates that the ref is no longer accessible, and that
  486.          * we therefore need to eventually purge this Entry object out of the
  487.          * bucket's chain.
  488.          */
  489.         volatile boolean dead;

  490.         Entry(Entry n, Ref r) {
  491.             next = n;
  492.             ref = r;
  493.         }

  494.         final void kill() {
  495.             dead = true;
  496.             ref.enqueue();
  497.         }
  498.     }

  499.     /** A soft reference wrapped around a cached object. */
  500.     private static class Ref extends SoftReference<ByteWindow> {
  501.         final PackFile pack;

  502.         final long position;

  503.         final int size;

  504.         long lastAccess;

  505.         protected Ref(final PackFile pack, final long position,
  506.                 final ByteWindow v, final ReferenceQueue<ByteWindow> queue) {
  507.             super(v, queue);
  508.             this.pack = pack;
  509.             this.position = position;
  510.             this.size = v.size();
  511.         }
  512.     }

  513.     private static final class Lock {
  514.         // Used only for its implicit monitor.
  515.     }
  516. }