1 /* 2 * Copyright (C) 2011, Google Inc. 3 * and other copyright owners as documented in the project's IP log. 4 * 5 * This program and the accompanying materials are made available 6 * under the terms of the Eclipse Distribution License v1.0 which 7 * accompanies this distribution, is reproduced below, and is 8 * available at http://www.eclipse.org/org/documents/edl-v10.php 9 * 10 * All rights reserved. 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials provided 22 * with the distribution. 23 * 24 * - Neither the name of the Eclipse Foundation, Inc. nor the 25 * names of its contributors may be used to endorse or promote 26 * products derived from this software without specific prior 27 * written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND 30 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 31 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 32 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 33 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 34 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 35 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 36 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 37 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 38 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 39 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 40 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 41 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 42 */ 43 44 package org.eclipse.jgit.internal.storage.dfs; 45 46 import java.io.FileNotFoundException; 47 import java.io.IOException; 48 import java.util.ArrayList; 49 import java.util.Collection; 50 import java.util.Collections; 51 import java.util.Comparator; 52 import java.util.HashMap; 53 import java.util.List; 54 import java.util.Map; 55 import java.util.Set; 56 import java.util.concurrent.atomic.AtomicReference; 57 58 import org.eclipse.jgit.internal.storage.pack.PackExt; 59 import org.eclipse.jgit.lib.AnyObjectId; 60 import org.eclipse.jgit.lib.ObjectDatabase; 61 import org.eclipse.jgit.lib.ObjectInserter; 62 import org.eclipse.jgit.lib.ObjectReader; 63 64 /** 65 * Manages objects stored in 66 * {@link org.eclipse.jgit.internal.storage.dfs.DfsPackFile} on a storage 67 * system. 68 */ 69 public abstract class DfsObjDatabase extends ObjectDatabase { 70 private static final PackList NO_PACKS = new PackList( 71 new DfsPackFile[0], 72 new DfsReftable[0]) { 73 @Override 74 boolean dirty() { 75 return true; 76 } 77 78 @Override 79 void clearDirty() { 80 // Always dirty. 81 } 82 83 @Override 84 public void markDirty() { 85 // Always dirty. 86 } 87 }; 88 89 /** Sources for a pack file. */ 90 public static enum PackSource { 91 /** The pack is created by ObjectInserter due to local activity. */ 92 INSERT(0), 93 94 /** 95 * The pack is created by PackParser due to a network event. 96 * <p> 97 * A received pack can be from either a push into the repository, or a 98 * fetch into the repository, the direction doesn't matter. A received 99 * pack was built by the remote Git implementation and may not match the 100 * storage layout preferred by this version. Received packs are likely 101 * to be either compacted or garbage collected in the future. 102 */ 103 RECEIVE(0), 104 105 /** 106 * The pack was created by compacting multiple packs together. 107 * <p> 108 * Packs created by compacting multiple packs together aren't nearly as 109 * efficient as a fully garbage collected repository, but may save disk 110 * space by reducing redundant copies of base objects. 111 * 112 * @see DfsPackCompactor 113 */ 114 COMPACT(1), 115 116 /** 117 * Pack was created by Git garbage collection by this implementation. 118 * <p> 119 * This source is only used by the {@link DfsGarbageCollector} when it 120 * builds a pack file by traversing the object graph and copying all 121 * reachable objects into a new pack stream. 122 * 123 * @see DfsGarbageCollector 124 */ 125 GC(2), 126 127 /** Created from non-heads by {@link DfsGarbageCollector}. */ 128 GC_REST(3), 129 130 /** 131 * RefTreeGraph pack was created by Git garbage collection. 132 * 133 * @see DfsGarbageCollector 134 */ 135 GC_TXN(4), 136 137 /** 138 * Pack was created by Git garbage collection. 139 * <p> 140 * This pack contains only unreachable garbage that was found during the 141 * last GC pass. It is retained in a new pack until it is safe to prune 142 * these objects from the repository. 143 */ 144 UNREACHABLE_GARBAGE(5); 145 146 final int category; 147 148 PackSource(int category) { 149 this.category = category; 150 } 151 } 152 153 private final AtomicReference<PackList> packList; 154 155 private final DfsRepository repository; 156 157 private DfsReaderOptions readerOptions; 158 159 /** 160 * Initialize an object database for our repository. 161 * 162 * @param repository 163 * repository owning this object database. 164 * @param options 165 * how readers should access the object database. 166 */ 167 protected DfsObjDatabase(DfsRepository repository, 168 DfsReaderOptions options) { 169 this.repository = repository; 170 this.packList = new AtomicReference<>(NO_PACKS); 171 this.readerOptions = options; 172 } 173 174 /** 175 * Get configured reader options, such as read-ahead. 176 * 177 * @return configured reader options, such as read-ahead. 178 */ 179 public DfsReaderOptions getReaderOptions() { 180 return readerOptions; 181 } 182 183 /** {@inheritDoc} */ 184 @Override 185 public DfsReader newReader() { 186 return new DfsReader(this); 187 } 188 189 /** {@inheritDoc} */ 190 @Override 191 public ObjectInserter newInserter() { 192 return new DfsInserter(this); 193 } 194 195 /** 196 * Scan and list all available pack files in the repository. 197 * 198 * @return list of available packs. The returned array is shared with the 199 * implementation and must not be modified by the caller. 200 * @throws java.io.IOException 201 * the pack list cannot be initialized. 202 */ 203 public DfsPackFile[] getPacks() throws IOException { 204 return getPackList().packs; 205 } 206 207 /** 208 * Scan and list all available reftable files in the repository. 209 * 210 * @return list of available reftables. The returned array is shared with 211 * the implementation and must not be modified by the caller. 212 * @throws java.io.IOException 213 * the pack list cannot be initialized. 214 */ 215 public DfsReftable[] getReftables() throws IOException { 216 return getPackList().reftables; 217 } 218 219 /** 220 * Scan and list all available pack files in the repository. 221 * 222 * @return list of available packs, with some additional metadata. The 223 * returned array is shared with the implementation and must not be 224 * modified by the caller. 225 * @throws java.io.IOException 226 * the pack list cannot be initialized. 227 */ 228 public PackList getPackList() throws IOException { 229 return scanPacks(NO_PACKS); 230 } 231 232 /** 233 * Get repository owning this object database. 234 * 235 * @return repository owning this object database. 236 */ 237 protected DfsRepository getRepository() { 238 return repository; 239 } 240 241 /** 242 * List currently known pack files in the repository, without scanning. 243 * 244 * @return list of available packs. The returned array is shared with the 245 * implementation and must not be modified by the caller. 246 */ 247 public DfsPackFile[] getCurrentPacks() { 248 return getCurrentPackList().packs; 249 } 250 251 /** 252 * List currently known reftable files in the repository, without scanning. 253 * 254 * @return list of available reftables. The returned array is shared with 255 * the implementation and must not be modified by the caller. 256 */ 257 public DfsReftable[] getCurrentReftables() { 258 return getCurrentPackList().reftables; 259 } 260 261 /** 262 * List currently known pack files in the repository, without scanning. 263 * 264 * @return list of available packs, with some additional metadata. The 265 * returned array is shared with the implementation and must not be 266 * modified by the caller. 267 */ 268 public PackList getCurrentPackList() { 269 return packList.get(); 270 } 271 272 /** 273 * Does the requested object exist in this database? 274 * <p> 275 * This differs from ObjectDatabase's implementation in that we can selectively 276 * ignore unreachable (garbage) objects. 277 * 278 * @param objectId 279 * identity of the object to test for existence of. 280 * @param avoidUnreachableObjects 281 * if true, ignore objects that are unreachable. 282 * @return true if the specified object is stored in this database. 283 * @throws java.io.IOException 284 * the object store cannot be accessed. 285 */ 286 public boolean has(AnyObjectId objectId, boolean avoidUnreachableObjects) 287 throws IOException { 288 try (ObjectReader or = newReader()) { 289 or.setAvoidUnreachableObjects(avoidUnreachableObjects); 290 return or.has(objectId); 291 } 292 } 293 294 /** 295 * Generate a new unique name for a pack file. 296 * 297 * @param source 298 * where the pack stream is created. 299 * @return a unique name for the pack file. Must not collide with any other 300 * pack file name in the same DFS. 301 * @throws java.io.IOException 302 * a new unique pack description cannot be generated. 303 */ 304 protected abstract DfsPackDescription newPack(PackSource source) 305 throws IOException; 306 307 /** 308 * Generate a new unique name for a pack file. 309 * 310 * <p> 311 * Default implementation of this method would be equivalent to 312 * {@code newPack(source).setEstimatedPackSize(estimatedPackSize)}. But the 313 * clients can override this method to use the given 314 * {@code estomatedPackSize} value more efficiently in the process of 315 * creating a new 316 * {@link org.eclipse.jgit.internal.storage.dfs.DfsPackDescription} object. 317 * 318 * @param source 319 * where the pack stream is created. 320 * @param estimatedPackSize 321 * the estimated size of the pack. 322 * @return a unique name for the pack file. Must not collide with any other 323 * pack file name in the same DFS. 324 * @throws java.io.IOException 325 * a new unique pack description cannot be generated. 326 */ 327 protected DfsPackDescription newPack(PackSource source, 328 long estimatedPackSize) throws IOException { 329 DfsPackDescription pack = newPack(source); 330 pack.setEstimatedPackSize(estimatedPackSize); 331 return pack; 332 } 333 334 /** 335 * Commit a pack and index pair that was written to the DFS. 336 * <p> 337 * Committing the pack/index pair makes them visible to readers. The JGit 338 * DFS code always writes the pack, then the index. This allows a simple 339 * commit process to do nothing if readers always look for both files to 340 * exist and the DFS performs atomic creation of the file (e.g. stream to a 341 * temporary file and rename to target on close). 342 * <p> 343 * During pack compaction or GC the new pack file may be replacing other 344 * older files. Implementations should remove those older files (if any) as 345 * part of the commit of the new file. 346 * <p> 347 * This method is a trivial wrapper around 348 * {@link #commitPackImpl(Collection, Collection)} that calls the 349 * implementation and fires events. 350 * 351 * @param desc 352 * description of the new packs. 353 * @param replaces 354 * if not null, list of packs to remove. 355 * @throws java.io.IOException 356 * the packs cannot be committed. On failure a rollback must 357 * also be attempted by the caller. 358 */ 359 protected void commitPack(Collection<DfsPackDescription> desc, 360 Collection<DfsPackDescription> replaces) throws IOException { 361 commitPackImpl(desc, replaces); 362 getRepository().fireEvent(new DfsPacksChangedEvent()); 363 } 364 365 /** 366 * Implementation of pack commit. 367 * 368 * @see #commitPack(Collection, Collection) 369 * @param desc 370 * description of the new packs. 371 * @param replaces 372 * if not null, list of packs to remove. 373 * @throws java.io.IOException 374 * the packs cannot be committed. 375 */ 376 protected abstract void commitPackImpl(Collection<DfsPackDescription> desc, 377 Collection<DfsPackDescription> replaces) throws IOException; 378 379 /** 380 * Try to rollback a pack creation. 381 * <p> 382 * JGit DFS always writes the pack first, then the index. If the pack does 383 * not yet exist, then neither does the index. A safe DFS implementation 384 * would try to remove both files to ensure they are really gone. 385 * <p> 386 * A rollback does not support failures, as it only occurs when there is 387 * already a failure in progress. A DFS implementor may wish to log 388 * warnings/error messages when a rollback fails, but should not send new 389 * exceptions up the Java callstack. 390 * 391 * @param desc 392 * pack to delete. 393 */ 394 protected abstract void rollbackPack(Collection<DfsPackDescription> desc); 395 396 /** 397 * List the available pack files. 398 * <p> 399 * The returned list must support random access and must be mutable by the 400 * caller. It is sorted in place using the natural sorting of the returned 401 * DfsPackDescription objects. 402 * 403 * @return available packs. May be empty if there are no packs. 404 * @throws java.io.IOException 405 * the packs cannot be listed and the object database is not 406 * functional to the caller. 407 */ 408 protected abstract List<DfsPackDescription> listPacks() throws IOException; 409 410 /** 411 * Open a pack, pack index, or other related file for reading. 412 * 413 * @param desc 414 * description of pack related to the data that will be read. 415 * This is an instance previously obtained from 416 * {@link #listPacks()}, but not necessarily from the same 417 * DfsObjDatabase instance. 418 * @param ext 419 * file extension that will be read i.e "pack" or "idx". 420 * @return channel to read the file. 421 * @throws java.io.FileNotFoundException 422 * the file does not exist. 423 * @throws java.io.IOException 424 * the file cannot be opened. 425 */ 426 protected abstract ReadableChannel openFile( 427 DfsPackDescription desc, PackExt ext) 428 throws FileNotFoundException, IOException; 429 430 /** 431 * Open a pack, pack index, or other related file for writing. 432 * 433 * @param desc 434 * description of pack related to the data that will be written. 435 * This is an instance previously obtained from 436 * {@link #newPack(PackSource)}. 437 * @param ext 438 * file extension that will be written i.e "pack" or "idx". 439 * @return channel to write the file. 440 * @throws java.io.IOException 441 * the file cannot be opened. 442 */ 443 protected abstract DfsOutputStream writeFile( 444 DfsPackDescription desc, PackExt ext) throws IOException; 445 446 void addPack(DfsPackFile newPack) throws IOException { 447 PackList o, n; 448 do { 449 o = packList.get(); 450 if (o == NO_PACKS) { 451 // The repository may not have needed any existing objects to 452 // complete the current task of creating a pack (e.g. push of a 453 // pack with no external deltas). Because we don't scan for 454 // newly added packs on missed object lookups, scan now to 455 // make sure all older packs are available in the packList. 456 o = scanPacks(o); 457 458 // Its possible the scan identified the pack we were asked to 459 // add, as the pack was already committed via commitPack(). 460 // If this is the case return without changing the list. 461 for (DfsPackFile p : o.packs) { 462 if (p.key.equals(newPack.key)) { 463 return; 464 } 465 } 466 } 467 468 DfsPackFile[] packs = new DfsPackFile[1 + o.packs.length]; 469 packs[0] = newPack; 470 System.arraycopy(o.packs, 0, packs, 1, o.packs.length); 471 n = new PackListImpl(packs, o.reftables); 472 } while (!packList.compareAndSet(o, n)); 473 } 474 475 void addReftable(DfsPackDescription add, Set<DfsPackDescription> remove) 476 throws IOException { 477 PackList o, n; 478 do { 479 o = packList.get(); 480 if (o == NO_PACKS) { 481 o = scanPacks(o); 482 for (DfsReftable t : o.reftables) { 483 if (t.getPackDescription().equals(add)) { 484 return; 485 } 486 } 487 } 488 489 List<DfsReftable> tables = new ArrayList<>(1 + o.reftables.length); 490 for (DfsReftable t : o.reftables) { 491 if (!remove.contains(t.getPackDescription())) { 492 tables.add(t); 493 } 494 } 495 tables.add(new DfsReftable(add)); 496 n = new PackListImpl(o.packs, tables.toArray(new DfsReftable[0])); 497 } while (!packList.compareAndSet(o, n)); 498 } 499 500 PackList scanPacks(final PackList original) throws IOException { 501 PackList o, n; 502 synchronized (packList) { 503 do { 504 o = packList.get(); 505 if (o != original) { 506 // Another thread did the scan for us, while we 507 // were blocked on the monitor above. 508 // 509 return o; 510 } 511 n = scanPacksImpl(o); 512 if (n == o) 513 return n; 514 } while (!packList.compareAndSet(o, n)); 515 } 516 getRepository().fireEvent(new DfsPacksChangedEvent()); 517 return n; 518 } 519 520 private PackList scanPacksImpl(PackList old) throws IOException { 521 DfsBlockCache cache = DfsBlockCache.getInstance(); 522 Map<DfsPackDescription, DfsPackFile> packs = packMap(old); 523 Map<DfsPackDescription, DfsReftable> reftables = reftableMap(old); 524 525 List<DfsPackDescription> scanned = listPacks(); 526 Collections.sort(scanned); 527 528 List<DfsPackFile> newPacks = new ArrayList<>(scanned.size()); 529 List<DfsReftable> newReftables = new ArrayList<>(scanned.size()); 530 boolean foundNew = false; 531 for (DfsPackDescription dsc : scanned) { 532 DfsPackFile oldPack = packs.remove(dsc); 533 if (oldPack != null) { 534 newPacks.add(oldPack); 535 } else if (dsc.hasFileExt(PackExt.PACK)) { 536 newPacks.add(new DfsPackFile(cache, dsc)); 537 foundNew = true; 538 } 539 540 DfsReftable oldReftable = reftables.remove(dsc); 541 if (oldReftable != null) { 542 newReftables.add(oldReftable); 543 } else if (dsc.hasFileExt(PackExt.REFTABLE)) { 544 newReftables.add(new DfsReftable(cache, dsc)); 545 foundNew = true; 546 } 547 } 548 549 if (newPacks.isEmpty()) 550 return new PackListImpl(NO_PACKS.packs, NO_PACKS.reftables); 551 if (!foundNew) { 552 old.clearDirty(); 553 return old; 554 } 555 Collections.sort(newReftables, reftableComparator()); 556 return new PackListImpl( 557 newPacks.toArray(new DfsPackFile[0]), 558 newReftables.toArray(new DfsReftable[0])); 559 } 560 561 private static Map<DfsPackDescription, DfsPackFile> packMap(PackList old) { 562 Map<DfsPackDescription, DfsPackFile> forReuse = new HashMap<>(); 563 for (DfsPackFile p : old.packs) { 564 if (!p.invalid()) { 565 forReuse.put(p.desc, p); 566 } 567 } 568 return forReuse; 569 } 570 571 private static Map<DfsPackDescription, DfsReftable> reftableMap(PackList old) { 572 Map<DfsPackDescription, DfsReftable> forReuse = new HashMap<>(); 573 for (DfsReftable p : old.reftables) { 574 if (!p.invalid()) { 575 forReuse.put(p.desc, p); 576 } 577 } 578 return forReuse; 579 } 580 581 /** 582 * Get comparator to sort {@link DfsReftable} by priority. 583 * 584 * @return comparator to sort {@link DfsReftable} by priority. 585 */ 586 protected Comparator<DfsReftable> reftableComparator() { 587 return (fa, fb) -> { 588 DfsPackDescription a = fa.getPackDescription(); 589 DfsPackDescription b = fb.getPackDescription(); 590 591 // GC, COMPACT reftables first by higher category. 592 int c = category(b) - category(a); 593 if (c != 0) { 594 return c; 595 } 596 597 // Lower maxUpdateIndex first. 598 c = Long.signum(a.getMaxUpdateIndex() - b.getMaxUpdateIndex()); 599 if (c != 0) { 600 return c; 601 } 602 603 // Older reftable first. 604 return Long.signum(a.getLastModified() - b.getLastModified()); 605 }; 606 } 607 608 static int category(DfsPackDescription d) { 609 PackSource s = d.getPackSource(); 610 return s != null ? s.category : 0; 611 } 612 613 /** 614 * Clears the cached list of packs, forcing them to be scanned again. 615 */ 616 protected void clearCache() { 617 packList.set(NO_PACKS); 618 } 619 620 /** {@inheritDoc} */ 621 @Override 622 public void close() { 623 packList.set(NO_PACKS); 624 } 625 626 /** Snapshot of packs scanned in a single pass. */ 627 public static abstract class PackList { 628 /** All known packs, sorted. */ 629 public final DfsPackFile[] packs; 630 631 /** All known reftables, sorted. */ 632 public final DfsReftable[] reftables; 633 634 private long lastModified = -1; 635 636 PackList(DfsPackFile[] packs, DfsReftable[] reftables) { 637 this.packs = packs; 638 this.reftables = reftables; 639 } 640 641 /** @return last modified time of all packs, in milliseconds. */ 642 public long getLastModified() { 643 if (lastModified < 0) { 644 long max = 0; 645 for (DfsPackFile pack : packs) { 646 max = Math.max(max, pack.getPackDescription().getLastModified()); 647 } 648 lastModified = max; 649 } 650 return lastModified; 651 } 652 653 abstract boolean dirty(); 654 abstract void clearDirty(); 655 656 /** 657 * Mark pack list as dirty. 658 * <p> 659 * Used when the caller knows that new data might have been written to the 660 * repository that could invalidate open readers depending on this pack list, 661 * for example if refs are newly scanned. 662 */ 663 public abstract void markDirty(); 664 } 665 666 private static final class PackListImpl extends PackList { 667 private volatile boolean dirty; 668 669 PackListImpl(DfsPackFile[] packs, DfsReftable[] reftables) { 670 super(packs, reftables); 671 } 672 673 @Override 674 boolean dirty() { 675 return dirty; 676 } 677 678 @Override 679 void clearDirty() { 680 dirty = false; 681 } 682 683 @Override 684 public void markDirty() { 685 dirty = true; 686 } 687 } 688 }