View Javadoc
1   /*
2    * Copyright (C) 2011, Google Inc.
3    * and other copyright owners as documented in the project's IP log.
4    *
5    * This program and the accompanying materials are made available
6    * under the terms of the Eclipse Distribution License v1.0 which
7    * accompanies this distribution, is reproduced below, and is
8    * available at http://www.eclipse.org/org/documents/edl-v10.php
9    *
10   * All rights reserved.
11   *
12   * Redistribution and use in source and binary forms, with or
13   * without modification, are permitted provided that the following
14   * conditions are met:
15   *
16   * - Redistributions of source code must retain the above copyright
17   *   notice, this list of conditions and the following disclaimer.
18   *
19   * - Redistributions in binary form must reproduce the above
20   *   copyright notice, this list of conditions and the following
21   *   disclaimer in the documentation and/or other materials provided
22   *   with the distribution.
23   *
24   * - Neither the name of the Eclipse Foundation, Inc. nor the
25   *   names of its contributors may be used to endorse or promote
26   *   products derived from this software without specific prior
27   *   written permission.
28   *
29   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
30   * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
31   * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
32   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
34   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
36   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
37   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
38   * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
39   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
41   * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42   */
43  
44  package org.eclipse.jgit.internal.storage.dfs;
45  
46  import java.io.FileNotFoundException;
47  import java.io.IOException;
48  import java.util.ArrayList;
49  import java.util.Collection;
50  import java.util.Collections;
51  import java.util.HashMap;
52  import java.util.List;
53  import java.util.Map;
54  import java.util.concurrent.atomic.AtomicReference;
55  
56  import org.eclipse.jgit.internal.storage.pack.PackExt;
57  import org.eclipse.jgit.lib.AnyObjectId;
58  import org.eclipse.jgit.lib.ObjectDatabase;
59  import org.eclipse.jgit.lib.ObjectInserter;
60  import org.eclipse.jgit.lib.ObjectReader;
61  
62  /** Manages objects stored in {@link DfsPackFile} on a storage system. */
63  public abstract class DfsObjDatabase extends ObjectDatabase {
64  	private static final PackList NO_PACKS = new PackList(new DfsPackFile[0]) {
65  		@Override
66  		boolean dirty() {
67  			return true;
68  		}
69  
70  		@Override
71  		void clearDirty() {
72  			// Always dirty.
73  		}
74  
75  		@Override
76  		public void markDirty() {
77  			// Always dirty.
78  		}
79  	};
80  
81  	/** Sources for a pack file. */
82  	public static enum PackSource {
83  		/** The pack is created by ObjectInserter due to local activity. */
84  		INSERT(0),
85  
86  		/**
87  		 * The pack is created by PackParser due to a network event.
88  		 * <p>
89  		 * A received pack can be from either a push into the repository, or a
90  		 * fetch into the repository, the direction doesn't matter. A received
91  		 * pack was built by the remote Git implementation and may not match the
92  		 * storage layout preferred by this version. Received packs are likely
93  		 * to be either compacted or garbage collected in the future.
94  		 */
95  		RECEIVE(0),
96  
97  		/**
98  		 * The pack was created by compacting multiple packs together.
99  		 * <p>
100 		 * Packs created by compacting multiple packs together aren't nearly as
101 		 * efficient as a fully garbage collected repository, but may save disk
102 		 * space by reducing redundant copies of base objects.
103 		 *
104 		 * @see DfsPackCompactor
105 		 */
106 		COMPACT(1),
107 
108 		/**
109 		 * Pack was created by Git garbage collection by this implementation.
110 		 * <p>
111 		 * This source is only used by the {@link DfsGarbageCollector} when it
112 		 * builds a pack file by traversing the object graph and copying all
113 		 * reachable objects into a new pack stream.
114 		 *
115 		 * @see DfsGarbageCollector
116 		 */
117 		GC(2),
118 
119 		/** Created from non-heads by {@link DfsGarbageCollector}. */
120 		GC_REST(3),
121 
122 		/**
123 		 * RefTreeGraph pack was created by Git garbage collection.
124 		 *
125 		 * @see DfsGarbageCollector
126 		 */
127 		GC_TXN(4),
128 
129 		/**
130 		 * Pack was created by Git garbage collection.
131 		 * <p>
132 		 * This pack contains only unreachable garbage that was found during the
133 		 * last GC pass. It is retained in a new pack until it is safe to prune
134 		 * these objects from the repository.
135 		 */
136 		UNREACHABLE_GARBAGE(5);
137 
138 		final int category;
139 
140 		PackSource(int category) {
141 			this.category = category;
142 		}
143 	}
144 
145 	private final AtomicReference<PackList> packList;
146 
147 	private final DfsRepository repository;
148 
149 	private DfsReaderOptions readerOptions;
150 
151 	/**
152 	 * Initialize an object database for our repository.
153 	 *
154 	 * @param repository
155 	 *            repository owning this object database.
156 	 *
157 	 * @param options
158 	 *            how readers should access the object database.
159 	 */
160 	protected DfsObjDatabase(DfsRepository repository,
161 			DfsReaderOptions options) {
162 		this.repository = repository;
163 		this.packList = new AtomicReference<PackList>(NO_PACKS);
164 		this.readerOptions = options;
165 	}
166 
167 	/** @return configured reader options, such as read-ahead. */
168 	public DfsReaderOptions getReaderOptions() {
169 		return readerOptions;
170 	}
171 
172 	@Override
173 	public ObjectReader newReader() {
174 		return new DfsReader(this);
175 	}
176 
177 	@Override
178 	public ObjectInserter newInserter() {
179 		return new DfsInserter(this);
180 	}
181 
182 	/**
183 	 * Scan and list all available pack files in the repository.
184 	 *
185 	 * @return list of available packs. The returned array is shared with the
186 	 *         implementation and must not be modified by the caller.
187 	 * @throws IOException
188 	 *             the pack list cannot be initialized.
189 	 */
190 	public DfsPackFile[] getPacks() throws IOException {
191 		return getPackList().packs;
192 	}
193 
194 	/**
195 	 * Scan and list all available pack files in the repository.
196 	 *
197 	 * @return list of available packs, with some additional metadata. The
198 	 *         returned array is shared with the implementation and must not be
199 	 *         modified by the caller.
200 	 * @throws IOException
201 	 *             the pack list cannot be initialized.
202 	 */
203 	public PackList getPackList() throws IOException {
204 		return scanPacks(NO_PACKS);
205 	}
206 
207 	/** @return repository owning this object database. */
208 	protected DfsRepository getRepository() {
209 		return repository;
210 	}
211 
212 	/**
213 	 * List currently known pack files in the repository, without scanning.
214 	 *
215 	 * @return list of available packs. The returned array is shared with the
216 	 *         implementation and must not be modified by the caller.
217 	 */
218 	public DfsPackFile[] getCurrentPacks() {
219 		return getCurrentPackList().packs;
220 	}
221 
222 	/**
223 	 * List currently known pack files in the repository, without scanning.
224 	 *
225 	 * @return list of available packs, with some additional metadata. The
226 	 *         returned array is shared with the implementation and must not be
227 	 *         modified by the caller.
228 	 */
229 	public PackList getCurrentPackList() {
230 		return packList.get();
231 	}
232 
233 	/**
234 	 * Does the requested object exist in this database?
235 	 * <p>
236 	 * This differs from ObjectDatabase's implementation in that we can selectively
237 	 * ignore unreachable (garbage) objects.
238 	 *
239 	 * @param objectId
240 	 *            identity of the object to test for existence of.
241 	 * @param avoidUnreachableObjects
242 	 *            if true, ignore objects that are unreachable.
243 	 * @return true if the specified object is stored in this database.
244 	 * @throws IOException
245 	 *             the object store cannot be accessed.
246 	 */
247 	public boolean has(AnyObjectId objectId, boolean avoidUnreachableObjects)
248 			throws IOException {
249 		try (ObjectReader or = newReader()) {
250 			or.setAvoidUnreachableObjects(avoidUnreachableObjects);
251 			return or.has(objectId);
252 		}
253 	}
254 
255 	/**
256 	 * Generate a new unique name for a pack file.
257 	 *
258 	 * @param source
259 	 *            where the pack stream is created.
260 	 * @return a unique name for the pack file. Must not collide with any other
261 	 *         pack file name in the same DFS.
262 	 * @throws IOException
263 	 *             a new unique pack description cannot be generated.
264 	 */
265 	protected abstract DfsPackDescription newPack(PackSource source)
266 			throws IOException;
267 
268 	/**
269 	 * Commit a pack and index pair that was written to the DFS.
270 	 * <p>
271 	 * Committing the pack/index pair makes them visible to readers. The JGit
272 	 * DFS code always writes the pack, then the index. This allows a simple
273 	 * commit process to do nothing if readers always look for both files to
274 	 * exist and the DFS performs atomic creation of the file (e.g. stream to a
275 	 * temporary file and rename to target on close).
276 	 * <p>
277 	 * During pack compaction or GC the new pack file may be replacing other
278 	 * older files. Implementations should remove those older files (if any) as
279 	 * part of the commit of the new file.
280 	 * <p>
281 	 * This method is a trivial wrapper around
282 	 * {@link #commitPackImpl(Collection, Collection)} that calls the
283 	 * implementation and fires events.
284 	 *
285 	 * @param desc
286 	 *            description of the new packs.
287 	 * @param replaces
288 	 *            if not null, list of packs to remove.
289 	 * @throws IOException
290 	 *             the packs cannot be committed. On failure a rollback must
291 	 *             also be attempted by the caller.
292 	 */
293 	protected void commitPack(Collection<DfsPackDescription> desc,
294 			Collection<DfsPackDescription> replaces) throws IOException {
295 		commitPackImpl(desc, replaces);
296 		getRepository().fireEvent(new DfsPacksChangedEvent());
297 	}
298 
299 	/**
300 	 * Implementation of pack commit.
301 	 *
302 	 * @see #commitPack(Collection, Collection)
303 	 *
304 	 * @param desc
305 	 *            description of the new packs.
306 	 * @param replaces
307 	 *            if not null, list of packs to remove.
308 	 * @throws IOException
309 	 *             the packs cannot be committed.
310 	 */
311 	protected abstract void commitPackImpl(Collection<DfsPackDescription> desc,
312 			Collection<DfsPackDescription> replaces) throws IOException;
313 
314 	/**
315 	 * Try to rollback a pack creation.
316 	 * <p>
317 	 * JGit DFS always writes the pack first, then the index. If the pack does
318 	 * not yet exist, then neither does the index. A safe DFS implementation
319 	 * would try to remove both files to ensure they are really gone.
320 	 * <p>
321 	 * A rollback does not support failures, as it only occurs when there is
322 	 * already a failure in progress. A DFS implementor may wish to log
323 	 * warnings/error messages when a rollback fails, but should not send new
324 	 * exceptions up the Java callstack.
325 	 *
326 	 * @param desc
327 	 *            pack to delete.
328 	 */
329 	protected abstract void rollbackPack(Collection<DfsPackDescription> desc);
330 
331 	/**
332 	 * List the available pack files.
333 	 * <p>
334 	 * The returned list must support random access and must be mutable by the
335 	 * caller. It is sorted in place using the natural sorting of the returned
336 	 * DfsPackDescription objects.
337 	 *
338 	 * @return available packs. May be empty if there are no packs.
339 	 * @throws IOException
340 	 *             the packs cannot be listed and the object database is not
341 	 *             functional to the caller.
342 	 */
343 	protected abstract List<DfsPackDescription> listPacks() throws IOException;
344 
345 	/**
346 	 * Open a pack, pack index, or other related file for reading.
347 	 *
348 	 * @param desc
349 	 *            description of pack related to the data that will be read.
350 	 *            This is an instance previously obtained from
351 	 *            {@link #listPacks()}, but not necessarily from the same
352 	 *            DfsObjDatabase instance.
353 	 * @param ext
354 	 *            file extension that will be read i.e "pack" or "idx".
355 	 * @return channel to read the file.
356 	 * @throws FileNotFoundException
357 	 *             the file does not exist.
358 	 * @throws IOException
359 	 *             the file cannot be opened.
360 	 */
361 	protected abstract ReadableChannel openFile(
362 			DfsPackDescription desc, PackExt ext)
363 			throws FileNotFoundException, IOException;
364 
365 	/**
366 	 * Open a pack, pack index, or other related file for writing.
367 	 *
368 	 * @param desc
369 	 *            description of pack related to the data that will be written.
370 	 *            This is an instance previously obtained from
371 	 *            {@link #newPack(PackSource)}.
372 	 * @param ext
373 	 *            file extension that will be written i.e "pack" or "idx".
374 	 * @return channel to write the file.
375 	 * @throws IOException
376 	 *             the file cannot be opened.
377 	 */
378 	protected abstract DfsOutputStream writeFile(
379 			DfsPackDescription desc, PackExt ext) throws IOException;
380 
381 	void addPack(DfsPackFile newPack) throws IOException {
382 		PackList o, n;
383 		do {
384 			o = packList.get();
385 			if (o == NO_PACKS) {
386 				// The repository may not have needed any existing objects to
387 				// complete the current task of creating a pack (e.g. push of a
388 				// pack with no external deltas). Because we don't scan for
389 				// newly added packs on missed object lookups, scan now to
390 				// make sure all older packs are available in the packList.
391 				o = scanPacks(o);
392 
393 				// Its possible the scan identified the pack we were asked to
394 				// add, as the pack was already committed via commitPack().
395 				// If this is the case return without changing the list.
396 				for (DfsPackFile p : o.packs) {
397 					if (p == newPack)
398 						return;
399 				}
400 			}
401 
402 			DfsPackFile[] packs = new DfsPackFile[1 + o.packs.length];
403 			packs[0] = newPack;
404 			System.arraycopy(o.packs, 0, packs, 1, o.packs.length);
405 			n = new PackListImpl(packs);
406 		} while (!packList.compareAndSet(o, n));
407 	}
408 
409 	PackList scanPacks(final PackList original) throws IOException {
410 		PackList o, n;
411 		synchronized (packList) {
412 			do {
413 				o = packList.get();
414 				if (o != original) {
415 					// Another thread did the scan for us, while we
416 					// were blocked on the monitor above.
417 					//
418 					return o;
419 				}
420 				n = scanPacksImpl(o);
421 				if (n == o)
422 					return n;
423 			} while (!packList.compareAndSet(o, n));
424 		}
425 		getRepository().fireEvent(new DfsPacksChangedEvent());
426 		return n;
427 	}
428 
429 	private PackList scanPacksImpl(PackList old) throws IOException {
430 		DfsBlockCache cache = DfsBlockCache.getInstance();
431 		Map<DfsPackDescription, DfsPackFile> forReuse = reuseMap(old);
432 		List<DfsPackDescription> scanned = listPacks();
433 		Collections.sort(scanned);
434 
435 		List<DfsPackFile> list = new ArrayList<DfsPackFile>(scanned.size());
436 		boolean foundNew = false;
437 		for (DfsPackDescription dsc : scanned) {
438 			DfsPackFile oldPack = forReuse.remove(dsc);
439 			if (oldPack != null) {
440 				list.add(oldPack);
441 			} else {
442 				list.add(cache.getOrCreate(dsc, null));
443 				foundNew = true;
444 			}
445 		}
446 
447 		for (DfsPackFile p : forReuse.values())
448 			p.close();
449 		if (list.isEmpty())
450 			return new PackListImpl(NO_PACKS.packs);
451 		if (!foundNew) {
452 			old.clearDirty();
453 			return old;
454 		}
455 		return new PackListImpl(list.toArray(new DfsPackFile[list.size()]));
456 	}
457 
458 	private static Map<DfsPackDescription, DfsPackFile> reuseMap(PackList old) {
459 		Map<DfsPackDescription, DfsPackFile> forReuse
460 			= new HashMap<DfsPackDescription, DfsPackFile>();
461 		for (DfsPackFile p : old.packs) {
462 			if (p.invalid()) {
463 				// The pack instance is corrupted, and cannot be safely used
464 				// again. Do not include it in our reuse map.
465 				//
466 				p.close();
467 				continue;
468 			}
469 
470 			DfsPackFile prior = forReuse.put(p.getPackDescription(), p);
471 			if (prior != null) {
472 				// This should never occur. It should be impossible for us
473 				// to have two pack files with the same name, as all of them
474 				// came out of the same directory. If it does, we promised to
475 				// close any PackFiles we did not reuse, so close the second,
476 				// readers are likely to be actively using the first.
477 				//
478 				forReuse.put(prior.getPackDescription(), prior);
479 				p.close();
480 			}
481 		}
482 		return forReuse;
483 	}
484 
485 	/** Clears the cached list of packs, forcing them to be scanned again. */
486 	protected void clearCache() {
487 		packList.set(NO_PACKS);
488 	}
489 
490 	@Override
491 	public void close() {
492 		// PackList packs = packList.get();
493 		packList.set(NO_PACKS);
494 
495 		// TODO Close packs if they aren't cached.
496 		// for (DfsPackFile p : packs.packs)
497 		// p.close();
498 	}
499 
500 	/** Snapshot of packs scanned in a single pass. */
501 	public static abstract class PackList {
502 		/** All known packs, sorted. */
503 		public final DfsPackFile[] packs;
504 
505 		private long lastModified = -1;
506 
507 		PackList(DfsPackFile[] packs) {
508 			this.packs = packs;
509 		}
510 
511 		/** @return last modified time of all packs, in milliseconds. */
512 		public long getLastModified() {
513 			if (lastModified < 0) {
514 				long max = 0;
515 				for (DfsPackFile pack : packs) {
516 					max = Math.max(max, pack.getPackDescription().getLastModified());
517 				}
518 				lastModified = max;
519 			}
520 			return lastModified;
521 		}
522 
523 		abstract boolean dirty();
524 		abstract void clearDirty();
525 
526 		/**
527 		 * Mark pack list as dirty.
528 		 * <p>
529 		 * Used when the caller knows that new data might have been written to the
530 		 * repository that could invalidate open readers depending on this pack list,
531 		 * for example if refs are newly scanned.
532 		 */
533 		public abstract void markDirty();
534 	}
535 
536 	private static final class PackListImpl extends PackList {
537 		private volatile boolean dirty;
538 
539 		PackListImpl(DfsPackFile[] packs) {
540 			super(packs);
541 		}
542 
543 		@Override
544 		boolean dirty() {
545 			return dirty;
546 		}
547 
548 		@Override
549 		void clearDirty() {
550 			dirty = false;
551 		}
552 
553 		@Override
554 		public void markDirty() {
555 			dirty = true;
556 		}
557 	}
558 }