1 /* 2 * Copyright (C) 2011, Google Inc. 3 * and other copyright owners as documented in the project's IP log. 4 * 5 * This program and the accompanying materials are made available 6 * under the terms of the Eclipse Distribution License v1.0 which 7 * accompanies this distribution, is reproduced below, and is 8 * available at http://www.eclipse.org/org/documents/edl-v10.php 9 * 10 * All rights reserved. 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials provided 22 * with the distribution. 23 * 24 * - Neither the name of the Eclipse Foundation, Inc. nor the 25 * names of its contributors may be used to endorse or promote 26 * products derived from this software without specific prior 27 * written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND 30 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 31 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 32 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 33 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 34 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 35 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 36 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 37 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 38 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 39 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 40 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 41 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 42 */ 43 44 package org.eclipse.jgit.internal.storage.dfs; 45 46 import java.io.FileNotFoundException; 47 import java.io.IOException; 48 import java.util.ArrayList; 49 import java.util.Collection; 50 import java.util.Collections; 51 import java.util.HashMap; 52 import java.util.List; 53 import java.util.Map; 54 import java.util.concurrent.atomic.AtomicReference; 55 56 import org.eclipse.jgit.internal.storage.pack.PackExt; 57 import org.eclipse.jgit.lib.AnyObjectId; 58 import org.eclipse.jgit.lib.ObjectDatabase; 59 import org.eclipse.jgit.lib.ObjectInserter; 60 import org.eclipse.jgit.lib.ObjectReader; 61 62 /** Manages objects stored in {@link DfsPackFile} on a storage system. */ 63 public abstract class DfsObjDatabase extends ObjectDatabase { 64 private static final PackList NO_PACKS = new PackList(new DfsPackFile[0]); 65 66 /** Sources for a pack file. */ 67 public static enum PackSource { 68 /** The pack is created by ObjectInserter due to local activity. */ 69 INSERT(0), 70 71 /** 72 * The pack is created by PackParser due to a network event. 73 * <p> 74 * A received pack can be from either a push into the repository, or a 75 * fetch into the repository, the direction doesn't matter. A received 76 * pack was built by the remote Git implementation and may not match the 77 * storage layout preferred by this version. Received packs are likely 78 * to be either compacted or garbage collected in the future. 79 */ 80 RECEIVE(0), 81 82 /** 83 * Pack was created by Git garbage collection by this implementation. 84 * <p> 85 * This source is only used by the {@link DfsGarbageCollector} when it 86 * builds a pack file by traversing the object graph and copying all 87 * reachable objects into a new pack stream. 88 * 89 * @see DfsGarbageCollector 90 */ 91 GC(1), 92 93 /** 94 * RefTreeGraph pack was created by Git garbage collection. 95 * 96 * @see DfsGarbageCollector 97 */ 98 GC_TXN(1), 99 100 /** 101 * The pack was created by compacting multiple packs together. 102 * <p> 103 * Packs created by compacting multiple packs together aren't nearly as 104 * efficient as a fully garbage collected repository, but may save disk 105 * space by reducing redundant copies of base objects. 106 * 107 * @see DfsPackCompactor 108 */ 109 COMPACT(1), 110 111 /** 112 * Pack was created by Git garbage collection. 113 * <p> 114 * This pack contains only unreachable garbage that was found during the 115 * last GC pass. It is retained in a new pack until it is safe to prune 116 * these objects from the repository. 117 */ 118 UNREACHABLE_GARBAGE(2); 119 120 final int category; 121 122 PackSource(int category) { 123 this.category = category; 124 } 125 } 126 127 private final AtomicReference<PackList> packList; 128 129 private final DfsRepository repository; 130 131 private DfsReaderOptions readerOptions; 132 133 /** 134 * Initialize an object database for our repository. 135 * 136 * @param repository 137 * repository owning this object database. 138 * 139 * @param options 140 * how readers should access the object database. 141 */ 142 protected DfsObjDatabase(DfsRepository repository, 143 DfsReaderOptions options) { 144 this.repository = repository; 145 this.packList = new AtomicReference<PackList>(NO_PACKS); 146 this.readerOptions = options; 147 } 148 149 /** @return configured reader options, such as read-ahead. */ 150 public DfsReaderOptions getReaderOptions() { 151 return readerOptions; 152 } 153 154 @Override 155 public ObjectReader newReader() { 156 return new DfsReader(this); 157 } 158 159 @Override 160 public ObjectInserter newInserter() { 161 return new DfsInserter(this); 162 } 163 164 /** 165 * Scan and list all available pack files in the repository. 166 * 167 * @return list of available packs. The returned array is shared with the 168 * implementation and must not be modified by the caller. 169 * @throws IOException 170 * the pack list cannot be initialized. 171 */ 172 public DfsPackFile[] getPacks() throws IOException { 173 return scanPacks(NO_PACKS).packs; 174 } 175 176 /** @return repository owning this object database. */ 177 protected DfsRepository getRepository() { 178 return repository; 179 } 180 181 /** 182 * List currently known pack files in the repository, without scanning. 183 * 184 * @return list of available packs. The returned array is shared with the 185 * implementation and must not be modified by the caller. 186 */ 187 public DfsPackFile[] getCurrentPacks() { 188 return packList.get().packs; 189 } 190 191 /** 192 * Does the requested object exist in this database? 193 * <p> 194 * This differs from ObjectDatabase's implementation in that we can selectively 195 * ignore unreachable (garbage) objects. 196 * 197 * @param objectId 198 * identity of the object to test for existence of. 199 * @param avoidUnreachableObjects 200 * if true, ignore objects that are unreachable. 201 * @return true if the specified object is stored in this database. 202 * @throws IOException 203 * the object store cannot be accessed. 204 */ 205 public boolean has(AnyObjectId objectId, boolean avoidUnreachableObjects) 206 throws IOException { 207 try (ObjectReader or = newReader()) { 208 or.setAvoidUnreachableObjects(avoidUnreachableObjects); 209 return or.has(objectId); 210 } 211 } 212 213 /** 214 * Generate a new unique name for a pack file. 215 * 216 * @param source 217 * where the pack stream is created. 218 * @return a unique name for the pack file. Must not collide with any other 219 * pack file name in the same DFS. 220 * @throws IOException 221 * a new unique pack description cannot be generated. 222 */ 223 protected abstract DfsPackDescription newPack(PackSource source) 224 throws IOException; 225 226 /** 227 * Commit a pack and index pair that was written to the DFS. 228 * <p> 229 * Committing the pack/index pair makes them visible to readers. The JGit 230 * DFS code always writes the pack, then the index. This allows a simple 231 * commit process to do nothing if readers always look for both files to 232 * exist and the DFS performs atomic creation of the file (e.g. stream to a 233 * temporary file and rename to target on close). 234 * <p> 235 * During pack compaction or GC the new pack file may be replacing other 236 * older files. Implementations should remove those older files (if any) as 237 * part of the commit of the new file. 238 * <p> 239 * This method is a trivial wrapper around 240 * {@link #commitPackImpl(Collection, Collection)} that calls the 241 * implementation and fires events. 242 * 243 * @param desc 244 * description of the new packs. 245 * @param replaces 246 * if not null, list of packs to remove. 247 * @throws IOException 248 * the packs cannot be committed. On failure a rollback must 249 * also be attempted by the caller. 250 */ 251 protected void commitPack(Collection<DfsPackDescription> desc, 252 Collection<DfsPackDescription> replaces) throws IOException { 253 commitPackImpl(desc, replaces); 254 getRepository().fireEvent(new DfsPacksChangedEvent()); 255 } 256 257 /** 258 * Implementation of pack commit. 259 * 260 * @see #commitPack(Collection, Collection) 261 * 262 * @param desc 263 * description of the new packs. 264 * @param replaces 265 * if not null, list of packs to remove. 266 * @throws IOException 267 * the packs cannot be committed. 268 */ 269 protected abstract void commitPackImpl(Collection<DfsPackDescription> desc, 270 Collection<DfsPackDescription> replaces) throws IOException; 271 272 /** 273 * Try to rollback a pack creation. 274 * <p> 275 * JGit DFS always writes the pack first, then the index. If the pack does 276 * not yet exist, then neither does the index. A safe DFS implementation 277 * would try to remove both files to ensure they are really gone. 278 * <p> 279 * A rollback does not support failures, as it only occurs when there is 280 * already a failure in progress. A DFS implementor may wish to log 281 * warnings/error messages when a rollback fails, but should not send new 282 * exceptions up the Java callstack. 283 * 284 * @param desc 285 * pack to delete. 286 */ 287 protected abstract void rollbackPack(Collection<DfsPackDescription> desc); 288 289 /** 290 * List the available pack files. 291 * <p> 292 * The returned list must support random access and must be mutable by the 293 * caller. It is sorted in place using the natural sorting of the returned 294 * DfsPackDescription objects. 295 * 296 * @return available packs. May be empty if there are no packs. 297 * @throws IOException 298 * the packs cannot be listed and the object database is not 299 * functional to the caller. 300 */ 301 protected abstract List<DfsPackDescription> listPacks() throws IOException; 302 303 /** 304 * Open a pack, pack index, or other related file for reading. 305 * 306 * @param desc 307 * description of pack related to the data that will be read. 308 * This is an instance previously obtained from 309 * {@link #listPacks()}, but not necessarily from the same 310 * DfsObjDatabase instance. 311 * @param ext 312 * file extension that will be read i.e "pack" or "idx". 313 * @return channel to read the file. 314 * @throws FileNotFoundException 315 * the file does not exist. 316 * @throws IOException 317 * the file cannot be opened. 318 */ 319 protected abstract ReadableChannel openFile( 320 DfsPackDescription desc, PackExt ext) 321 throws FileNotFoundException, IOException; 322 323 /** 324 * Open a pack, pack index, or other related file for writing. 325 * 326 * @param desc 327 * description of pack related to the data that will be written. 328 * This is an instance previously obtained from 329 * {@link #newPack(PackSource)}. 330 * @param ext 331 * file extension that will be written i.e "pack" or "idx". 332 * @return channel to write the file. 333 * @throws IOException 334 * the file cannot be opened. 335 */ 336 protected abstract DfsOutputStream writeFile( 337 DfsPackDescription desc, PackExt ext) throws IOException; 338 339 void addPack(DfsPackFile newPack) throws IOException { 340 PackList o, n; 341 do { 342 o = packList.get(); 343 if (o == NO_PACKS) { 344 // The repository may not have needed any existing objects to 345 // complete the current task of creating a pack (e.g. push of a 346 // pack with no external deltas). Because we don't scan for 347 // newly added packs on missed object lookups, scan now to 348 // make sure all older packs are available in the packList. 349 o = scanPacks(o); 350 351 // Its possible the scan identified the pack we were asked to 352 // add, as the pack was already committed via commitPack(). 353 // If this is the case return without changing the list. 354 for (DfsPackFile p : o.packs) { 355 if (p == newPack) 356 return; 357 } 358 } 359 360 DfsPackFile[] packs = new DfsPackFile[1 + o.packs.length]; 361 packs[0] = newPack; 362 System.arraycopy(o.packs, 0, packs, 1, o.packs.length); 363 n = new PackList(packs); 364 } while (!packList.compareAndSet(o, n)); 365 } 366 367 private PackList scanPacks(final PackList original) throws IOException { 368 PackList o, n; 369 synchronized (packList) { 370 do { 371 o = packList.get(); 372 if (o != original) { 373 // Another thread did the scan for us, while we 374 // were blocked on the monitor above. 375 // 376 return o; 377 } 378 n = scanPacksImpl(o); 379 if (n == o) 380 return n; 381 } while (!packList.compareAndSet(o, n)); 382 } 383 getRepository().fireEvent(new DfsPacksChangedEvent()); 384 return n; 385 } 386 387 private PackList scanPacksImpl(PackList old) throws IOException { 388 DfsBlockCache cache = DfsBlockCache.getInstance(); 389 Map<DfsPackDescription, DfsPackFile> forReuse = reuseMap(old); 390 List<DfsPackDescription> scanned = listPacks(); 391 Collections.sort(scanned); 392 393 List<DfsPackFile> list = new ArrayList<DfsPackFile>(scanned.size()); 394 boolean foundNew = false; 395 for (DfsPackDescription dsc : scanned) { 396 DfsPackFile oldPack = forReuse.remove(dsc); 397 if (oldPack != null) { 398 list.add(oldPack); 399 } else { 400 list.add(cache.getOrCreate(dsc, null)); 401 foundNew = true; 402 } 403 } 404 405 for (DfsPackFile p : forReuse.values()) 406 p.close(); 407 if (list.isEmpty()) 408 return new PackList(NO_PACKS.packs); 409 if (!foundNew) 410 return old; 411 return new PackList(list.toArray(new DfsPackFile[list.size()])); 412 } 413 414 private static Map<DfsPackDescription, DfsPackFile> reuseMap(PackList old) { 415 Map<DfsPackDescription, DfsPackFile> forReuse 416 = new HashMap<DfsPackDescription, DfsPackFile>(); 417 for (DfsPackFile p : old.packs) { 418 if (p.invalid()) { 419 // The pack instance is corrupted, and cannot be safely used 420 // again. Do not include it in our reuse map. 421 // 422 p.close(); 423 continue; 424 } 425 426 DfsPackFile prior = forReuse.put(p.getPackDescription(), p); 427 if (prior != null) { 428 // This should never occur. It should be impossible for us 429 // to have two pack files with the same name, as all of them 430 // came out of the same directory. If it does, we promised to 431 // close any PackFiles we did not reuse, so close the second, 432 // readers are likely to be actively using the first. 433 // 434 forReuse.put(prior.getPackDescription(), prior); 435 p.close(); 436 } 437 } 438 return forReuse; 439 } 440 441 /** Clears the cached list of packs, forcing them to be scanned again. */ 442 protected void clearCache() { 443 packList.set(NO_PACKS); 444 } 445 446 @Override 447 public void close() { 448 // PackList packs = packList.get(); 449 packList.set(NO_PACKS); 450 451 // TODO Close packs if they aren't cached. 452 // for (DfsPackFile p : packs.packs) 453 // p.close(); 454 } 455 456 private static final class PackList { 457 /** All known packs, sorted. */ 458 final DfsPackFile[] packs; 459 460 PackList(final DfsPackFile[] packs) { 461 this.packs = packs; 462 } 463 } 464 }