DeltaIndexScanner.java

  1. /*
  2.  * Copyright (C) 2010, Google Inc. and others
  3.  *
  4.  * This program and the accompanying materials are made available under the
  5.  * terms of the Eclipse Distribution License v. 1.0 which is available at
  6.  * https://www.eclipse.org/org/documents/edl-v10.php.
  7.  *
  8.  * SPDX-License-Identifier: BSD-3-Clause
  9.  */

  10. package org.eclipse.jgit.internal.storage.pack;

  11. /**
  12.  * Supports {@link DeltaIndex} by performing a partial scan of the content.
  13.  */
  14. class DeltaIndexScanner {
  15.     final int[] table;

  16.     // To save memory the buckets for hash chains are stored in correlated
  17.     // arrays. This permits us to get 3 values per entry, without paying
  18.     // the penalty for an object header on each entry.

  19.     final long[] entries;

  20.     final int[] next;

  21.     final int tableMask;

  22.     private int entryCnt;

  23.     DeltaIndexScanner(byte[] raw, int len) {
  24.         // Clip the length so it falls on a block boundary. We won't
  25.         // bother to scan the final partial block.
  26.         //
  27.         len -= (len % DeltaIndex.BLKSZ);

  28.         final int worstCaseBlockCnt = len / DeltaIndex.BLKSZ;
  29.         if (worstCaseBlockCnt < 1) {
  30.             table = new int[] {};
  31.             tableMask = 0;

  32.             entries = new long[] {};
  33.             next = new int[] {};

  34.         } else {
  35.             table = new int[tableSize(worstCaseBlockCnt)];
  36.             tableMask = table.length - 1;

  37.             // As we insert blocks we preincrement so that 0 is never a
  38.             // valid entry. Therefore we have to allocate one extra space.
  39.             //
  40.             entries = new long[1 + worstCaseBlockCnt];
  41.             next = new int[entries.length];

  42.             scan(raw, len);
  43.         }
  44.     }

  45.     private void scan(byte[] raw, int end) {
  46.         // We scan the input backwards, and always insert onto the
  47.         // front of the chain. This ensures that chains will have lower
  48.         // offsets at the front of the chain, allowing us to prefer the
  49.         // earlier match rather than the later match.
  50.         //
  51.         int lastHash = 0;
  52.         int ptr = end - DeltaIndex.BLKSZ;
  53.         do {
  54.             final int key = DeltaIndex.hashBlock(raw, ptr);
  55.             final int tIdx = key & tableMask;

  56.             final int head = table[tIdx];
  57.             if (head != 0 && lastHash == key) {
  58.                 // Two consecutive blocks have the same content hash,
  59.                 // prefer the earlier block because we want to use the
  60.                 // longest sequence we can during encoding.
  61.                 //
  62.                 entries[head] = (((long) key) << 32) | ptr;
  63.             } else {
  64.                 final int eIdx = ++entryCnt;
  65.                 entries[eIdx] = (((long) key) << 32) | ptr;
  66.                 next[eIdx] = head;
  67.                 table[tIdx] = eIdx;
  68.             }

  69.             lastHash = key;
  70.             ptr -= DeltaIndex.BLKSZ;
  71.         } while (0 <= ptr);
  72.     }

  73.     private static int tableSize(int worstCaseBlockCnt) {
  74.         int shift = 32 - Integer.numberOfLeadingZeros(worstCaseBlockCnt);
  75.         int sz = 1 << (shift - 1);
  76.         if (sz < worstCaseBlockCnt)
  77.             sz <<= 1;
  78.         return sz;
  79.     }
  80. }