1 /* 2 * Copyright (C) 2011, Google Inc. 3 * and other copyright owners as documented in the project's IP log. 4 * 5 * This program and the accompanying materials are made available 6 * under the terms of the Eclipse Distribution License v1.0 which 7 * accompanies this distribution, is reproduced below, and is 8 * available at http://www.eclipse.org/org/documents/edl-v10.php 9 * 10 * All rights reserved. 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials provided 22 * with the distribution. 23 * 24 * - Neither the name of the Eclipse Foundation, Inc. nor the 25 * names of its contributors may be used to endorse or promote 26 * products derived from this software without specific prior 27 * written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND 30 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 31 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 32 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 33 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 34 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 35 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 36 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 37 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 38 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 39 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 40 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 41 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 42 */ 43 44 package org.eclipse.jgit.internal.storage.dfs; 45 46 import java.io.FileNotFoundException; 47 import java.io.IOException; 48 import java.util.ArrayList; 49 import java.util.Collection; 50 import java.util.Collections; 51 import java.util.HashMap; 52 import java.util.List; 53 import java.util.Map; 54 import java.util.concurrent.atomic.AtomicReference; 55 56 import org.eclipse.jgit.internal.storage.pack.PackExt; 57 import org.eclipse.jgit.lib.AnyObjectId; 58 import org.eclipse.jgit.lib.ObjectDatabase; 59 import org.eclipse.jgit.lib.ObjectInserter; 60 import org.eclipse.jgit.lib.ObjectReader; 61 62 /** Manages objects stored in {@link DfsPackFile} on a storage system. */ 63 public abstract class DfsObjDatabase extends ObjectDatabase { 64 private static final PackList NO_PACKS = new PackList(new DfsPackFile[0]) { 65 @Override 66 boolean dirty() { 67 return true; 68 } 69 70 @Override 71 void clearDirty() { 72 // Always dirty. 73 } 74 75 @Override 76 public void markDirty() { 77 // Always dirty. 78 } 79 }; 80 81 /** Sources for a pack file. */ 82 public static enum PackSource { 83 /** The pack is created by ObjectInserter due to local activity. */ 84 INSERT(0), 85 86 /** 87 * The pack is created by PackParser due to a network event. 88 * <p> 89 * A received pack can be from either a push into the repository, or a 90 * fetch into the repository, the direction doesn't matter. A received 91 * pack was built by the remote Git implementation and may not match the 92 * storage layout preferred by this version. Received packs are likely 93 * to be either compacted or garbage collected in the future. 94 */ 95 RECEIVE(0), 96 97 /** 98 * The pack was created by compacting multiple packs together. 99 * <p> 100 * Packs created by compacting multiple packs together aren't nearly as 101 * efficient as a fully garbage collected repository, but may save disk 102 * space by reducing redundant copies of base objects. 103 * 104 * @see DfsPackCompactor 105 */ 106 COMPACT(1), 107 108 /** 109 * Pack was created by Git garbage collection by this implementation. 110 * <p> 111 * This source is only used by the {@link DfsGarbageCollector} when it 112 * builds a pack file by traversing the object graph and copying all 113 * reachable objects into a new pack stream. 114 * 115 * @see DfsGarbageCollector 116 */ 117 GC(2), 118 119 /** Created from non-heads by {@link DfsGarbageCollector}. */ 120 GC_REST(3), 121 122 /** 123 * RefTreeGraph pack was created by Git garbage collection. 124 * 125 * @see DfsGarbageCollector 126 */ 127 GC_TXN(4), 128 129 /** 130 * Pack was created by Git garbage collection. 131 * <p> 132 * This pack contains only unreachable garbage that was found during the 133 * last GC pass. It is retained in a new pack until it is safe to prune 134 * these objects from the repository. 135 */ 136 UNREACHABLE_GARBAGE(5); 137 138 final int category; 139 140 PackSource(int category) { 141 this.category = category; 142 } 143 } 144 145 private final AtomicReference<PackList> packList; 146 147 private final DfsRepository repository; 148 149 private DfsReaderOptions readerOptions; 150 151 /** 152 * Initialize an object database for our repository. 153 * 154 * @param repository 155 * repository owning this object database. 156 * 157 * @param options 158 * how readers should access the object database. 159 */ 160 protected DfsObjDatabase(DfsRepository repository, 161 DfsReaderOptions options) { 162 this.repository = repository; 163 this.packList = new AtomicReference<PackList>(NO_PACKS); 164 this.readerOptions = options; 165 } 166 167 /** @return configured reader options, such as read-ahead. */ 168 public DfsReaderOptions getReaderOptions() { 169 return readerOptions; 170 } 171 172 @Override 173 public ObjectReader newReader() { 174 return new DfsReader(this); 175 } 176 177 @Override 178 public ObjectInserter newInserter() { 179 return new DfsInserter(this); 180 } 181 182 /** 183 * Scan and list all available pack files in the repository. 184 * 185 * @return list of available packs. The returned array is shared with the 186 * implementation and must not be modified by the caller. 187 * @throws IOException 188 * the pack list cannot be initialized. 189 */ 190 public DfsPackFile[] getPacks() throws IOException { 191 return getPackList().packs; 192 } 193 194 /** 195 * Scan and list all available pack files in the repository. 196 * 197 * @return list of available packs, with some additional metadata. The 198 * returned array is shared with the implementation and must not be 199 * modified by the caller. 200 * @throws IOException 201 * the pack list cannot be initialized. 202 */ 203 public PackList getPackList() throws IOException { 204 return scanPacks(NO_PACKS); 205 } 206 207 /** @return repository owning this object database. */ 208 protected DfsRepository getRepository() { 209 return repository; 210 } 211 212 /** 213 * List currently known pack files in the repository, without scanning. 214 * 215 * @return list of available packs. The returned array is shared with the 216 * implementation and must not be modified by the caller. 217 */ 218 public DfsPackFile[] getCurrentPacks() { 219 return getCurrentPackList().packs; 220 } 221 222 /** 223 * List currently known pack files in the repository, without scanning. 224 * 225 * @return list of available packs, with some additional metadata. The 226 * returned array is shared with the implementation and must not be 227 * modified by the caller. 228 */ 229 public PackList getCurrentPackList() { 230 return packList.get(); 231 } 232 233 /** 234 * Does the requested object exist in this database? 235 * <p> 236 * This differs from ObjectDatabase's implementation in that we can selectively 237 * ignore unreachable (garbage) objects. 238 * 239 * @param objectId 240 * identity of the object to test for existence of. 241 * @param avoidUnreachableObjects 242 * if true, ignore objects that are unreachable. 243 * @return true if the specified object is stored in this database. 244 * @throws IOException 245 * the object store cannot be accessed. 246 */ 247 public boolean has(AnyObjectId objectId, boolean avoidUnreachableObjects) 248 throws IOException { 249 try (ObjectReader or = newReader()) { 250 or.setAvoidUnreachableObjects(avoidUnreachableObjects); 251 return or.has(objectId); 252 } 253 } 254 255 /** 256 * Generate a new unique name for a pack file. 257 * 258 * @param source 259 * where the pack stream is created. 260 * @return a unique name for the pack file. Must not collide with any other 261 * pack file name in the same DFS. 262 * @throws IOException 263 * a new unique pack description cannot be generated. 264 */ 265 protected abstract DfsPackDescription newPack(PackSource source) 266 throws IOException; 267 268 /** 269 * Commit a pack and index pair that was written to the DFS. 270 * <p> 271 * Committing the pack/index pair makes them visible to readers. The JGit 272 * DFS code always writes the pack, then the index. This allows a simple 273 * commit process to do nothing if readers always look for both files to 274 * exist and the DFS performs atomic creation of the file (e.g. stream to a 275 * temporary file and rename to target on close). 276 * <p> 277 * During pack compaction or GC the new pack file may be replacing other 278 * older files. Implementations should remove those older files (if any) as 279 * part of the commit of the new file. 280 * <p> 281 * This method is a trivial wrapper around 282 * {@link #commitPackImpl(Collection, Collection)} that calls the 283 * implementation and fires events. 284 * 285 * @param desc 286 * description of the new packs. 287 * @param replaces 288 * if not null, list of packs to remove. 289 * @throws IOException 290 * the packs cannot be committed. On failure a rollback must 291 * also be attempted by the caller. 292 */ 293 protected void commitPack(Collection<DfsPackDescription> desc, 294 Collection<DfsPackDescription> replaces) throws IOException { 295 commitPackImpl(desc, replaces); 296 getRepository().fireEvent(new DfsPacksChangedEvent()); 297 } 298 299 /** 300 * Implementation of pack commit. 301 * 302 * @see #commitPack(Collection, Collection) 303 * 304 * @param desc 305 * description of the new packs. 306 * @param replaces 307 * if not null, list of packs to remove. 308 * @throws IOException 309 * the packs cannot be committed. 310 */ 311 protected abstract void commitPackImpl(Collection<DfsPackDescription> desc, 312 Collection<DfsPackDescription> replaces) throws IOException; 313 314 /** 315 * Try to rollback a pack creation. 316 * <p> 317 * JGit DFS always writes the pack first, then the index. If the pack does 318 * not yet exist, then neither does the index. A safe DFS implementation 319 * would try to remove both files to ensure they are really gone. 320 * <p> 321 * A rollback does not support failures, as it only occurs when there is 322 * already a failure in progress. A DFS implementor may wish to log 323 * warnings/error messages when a rollback fails, but should not send new 324 * exceptions up the Java callstack. 325 * 326 * @param desc 327 * pack to delete. 328 */ 329 protected abstract void rollbackPack(Collection<DfsPackDescription> desc); 330 331 /** 332 * List the available pack files. 333 * <p> 334 * The returned list must support random access and must be mutable by the 335 * caller. It is sorted in place using the natural sorting of the returned 336 * DfsPackDescription objects. 337 * 338 * @return available packs. May be empty if there are no packs. 339 * @throws IOException 340 * the packs cannot be listed and the object database is not 341 * functional to the caller. 342 */ 343 protected abstract List<DfsPackDescription> listPacks() throws IOException; 344 345 /** 346 * Open a pack, pack index, or other related file for reading. 347 * 348 * @param desc 349 * description of pack related to the data that will be read. 350 * This is an instance previously obtained from 351 * {@link #listPacks()}, but not necessarily from the same 352 * DfsObjDatabase instance. 353 * @param ext 354 * file extension that will be read i.e "pack" or "idx". 355 * @return channel to read the file. 356 * @throws FileNotFoundException 357 * the file does not exist. 358 * @throws IOException 359 * the file cannot be opened. 360 */ 361 protected abstract ReadableChannel openFile( 362 DfsPackDescription desc, PackExt ext) 363 throws FileNotFoundException, IOException; 364 365 /** 366 * Open a pack, pack index, or other related file for writing. 367 * 368 * @param desc 369 * description of pack related to the data that will be written. 370 * This is an instance previously obtained from 371 * {@link #newPack(PackSource)}. 372 * @param ext 373 * file extension that will be written i.e "pack" or "idx". 374 * @return channel to write the file. 375 * @throws IOException 376 * the file cannot be opened. 377 */ 378 protected abstract DfsOutputStream writeFile( 379 DfsPackDescription desc, PackExt ext) throws IOException; 380 381 void addPack(DfsPackFile newPack) throws IOException { 382 PackList o, n; 383 do { 384 o = packList.get(); 385 if (o == NO_PACKS) { 386 // The repository may not have needed any existing objects to 387 // complete the current task of creating a pack (e.g. push of a 388 // pack with no external deltas). Because we don't scan for 389 // newly added packs on missed object lookups, scan now to 390 // make sure all older packs are available in the packList. 391 o = scanPacks(o); 392 393 // Its possible the scan identified the pack we were asked to 394 // add, as the pack was already committed via commitPack(). 395 // If this is the case return without changing the list. 396 for (DfsPackFile p : o.packs) { 397 if (p == newPack) 398 return; 399 } 400 } 401 402 DfsPackFile[] packs = new DfsPackFile[1 + o.packs.length]; 403 packs[0] = newPack; 404 System.arraycopy(o.packs, 0, packs, 1, o.packs.length); 405 n = new PackListImpl(packs); 406 } while (!packList.compareAndSet(o, n)); 407 } 408 409 PackList scanPacks(final PackList original) throws IOException { 410 PackList o, n; 411 synchronized (packList) { 412 do { 413 o = packList.get(); 414 if (o != original) { 415 // Another thread did the scan for us, while we 416 // were blocked on the monitor above. 417 // 418 return o; 419 } 420 n = scanPacksImpl(o); 421 if (n == o) 422 return n; 423 } while (!packList.compareAndSet(o, n)); 424 } 425 getRepository().fireEvent(new DfsPacksChangedEvent()); 426 return n; 427 } 428 429 private PackList scanPacksImpl(PackList old) throws IOException { 430 DfsBlockCache cache = DfsBlockCache.getInstance(); 431 Map<DfsPackDescription, DfsPackFile> forReuse = reuseMap(old); 432 List<DfsPackDescription> scanned = listPacks(); 433 Collections.sort(scanned); 434 435 List<DfsPackFile> list = new ArrayList<DfsPackFile>(scanned.size()); 436 boolean foundNew = false; 437 for (DfsPackDescription dsc : scanned) { 438 DfsPackFile oldPack = forReuse.remove(dsc); 439 if (oldPack != null) { 440 list.add(oldPack); 441 } else { 442 list.add(cache.getOrCreate(dsc, null)); 443 foundNew = true; 444 } 445 } 446 447 for (DfsPackFile p : forReuse.values()) 448 p.close(); 449 if (list.isEmpty()) 450 return new PackListImpl(NO_PACKS.packs); 451 if (!foundNew) { 452 old.clearDirty(); 453 return old; 454 } 455 return new PackListImpl(list.toArray(new DfsPackFile[list.size()])); 456 } 457 458 private static Map<DfsPackDescription, DfsPackFile> reuseMap(PackList old) { 459 Map<DfsPackDescription, DfsPackFile> forReuse 460 = new HashMap<DfsPackDescription, DfsPackFile>(); 461 for (DfsPackFile p : old.packs) { 462 if (p.invalid()) { 463 // The pack instance is corrupted, and cannot be safely used 464 // again. Do not include it in our reuse map. 465 // 466 p.close(); 467 continue; 468 } 469 470 DfsPackFile prior = forReuse.put(p.getPackDescription(), p); 471 if (prior != null) { 472 // This should never occur. It should be impossible for us 473 // to have two pack files with the same name, as all of them 474 // came out of the same directory. If it does, we promised to 475 // close any PackFiles we did not reuse, so close the second, 476 // readers are likely to be actively using the first. 477 // 478 forReuse.put(prior.getPackDescription(), prior); 479 p.close(); 480 } 481 } 482 return forReuse; 483 } 484 485 /** Clears the cached list of packs, forcing them to be scanned again. */ 486 protected void clearCache() { 487 packList.set(NO_PACKS); 488 } 489 490 @Override 491 public void close() { 492 // PackList packs = packList.get(); 493 packList.set(NO_PACKS); 494 495 // TODO Close packs if they aren't cached. 496 // for (DfsPackFile p : packs.packs) 497 // p.close(); 498 } 499 500 /** Snapshot of packs scanned in a single pass. */ 501 public static abstract class PackList { 502 /** All known packs, sorted. */ 503 public final DfsPackFile[] packs; 504 505 private long lastModified = -1; 506 507 PackList(DfsPackFile[] packs) { 508 this.packs = packs; 509 } 510 511 /** @return last modified time of all packs, in milliseconds. */ 512 public long getLastModified() { 513 if (lastModified < 0) { 514 long max = 0; 515 for (DfsPackFile pack : packs) { 516 max = Math.max(max, pack.getPackDescription().getLastModified()); 517 } 518 lastModified = max; 519 } 520 return lastModified; 521 } 522 523 abstract boolean dirty(); 524 abstract void clearDirty(); 525 526 /** 527 * Mark pack list as dirty. 528 * <p> 529 * Used when the caller knows that new data might have been written to the 530 * repository that could invalidate open readers depending on this pack list, 531 * for example if refs are newly scanned. 532 */ 533 public abstract void markDirty(); 534 } 535 536 private static final class PackListImpl extends PackList { 537 private volatile boolean dirty; 538 539 PackListImpl(DfsPackFile[] packs) { 540 super(packs); 541 } 542 543 @Override 544 boolean dirty() { 545 return dirty; 546 } 547 548 @Override 549 void clearDirty() { 550 dirty = false; 551 } 552 553 @Override 554 public void markDirty() { 555 dirty = true; 556 } 557 } 558 }