View Javadoc
1   /*
2    * Copyright (C) 2011, Google Inc.
3    * and other copyright owners as documented in the project's IP log.
4    *
5    * This program and the accompanying materials are made available
6    * under the terms of the Eclipse Distribution License v1.0 which
7    * accompanies this distribution, is reproduced below, and is
8    * available at http://www.eclipse.org/org/documents/edl-v10.php
9    *
10   * All rights reserved.
11   *
12   * Redistribution and use in source and binary forms, with or
13   * without modification, are permitted provided that the following
14   * conditions are met:
15   *
16   * - Redistributions of source code must retain the above copyright
17   *   notice, this list of conditions and the following disclaimer.
18   *
19   * - Redistributions in binary form must reproduce the above
20   *   copyright notice, this list of conditions and the following
21   *   disclaimer in the documentation and/or other materials provided
22   *   with the distribution.
23   *
24   * - Neither the name of the Eclipse Foundation, Inc. nor the
25   *   names of its contributors may be used to endorse or promote
26   *   products derived from this software without specific prior
27   *   written permission.
28   *
29   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
30   * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
31   * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
32   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
34   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
36   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
37   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
38   * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
39   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
41   * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42   */
43  
44  package org.eclipse.jgit.internal.storage.dfs;
45  
46  import java.io.FileNotFoundException;
47  import java.io.IOException;
48  import java.util.ArrayList;
49  import java.util.Collection;
50  import java.util.Collections;
51  import java.util.HashMap;
52  import java.util.List;
53  import java.util.Map;
54  import java.util.concurrent.atomic.AtomicReference;
55  
56  import org.eclipse.jgit.internal.storage.pack.PackExt;
57  import org.eclipse.jgit.lib.AnyObjectId;
58  import org.eclipse.jgit.lib.ObjectDatabase;
59  import org.eclipse.jgit.lib.ObjectInserter;
60  import org.eclipse.jgit.lib.ObjectReader;
61  
62  /** Manages objects stored in {@link DfsPackFile} on a storage system. */
63  public abstract class DfsObjDatabase extends ObjectDatabase {
64  	private static final PackList NO_PACKS = new PackList(new DfsPackFile[0]);
65  
66  	/** Sources for a pack file. */
67  	public static enum PackSource {
68  		/** The pack is created by ObjectInserter due to local activity. */
69  		INSERT(0),
70  
71  		/**
72  		 * The pack is created by PackParser due to a network event.
73  		 * <p>
74  		 * A received pack can be from either a push into the repository, or a
75  		 * fetch into the repository, the direction doesn't matter. A received
76  		 * pack was built by the remote Git implementation and may not match the
77  		 * storage layout preferred by this version. Received packs are likely
78  		 * to be either compacted or garbage collected in the future.
79  		 */
80  		RECEIVE(0),
81  
82  		/**
83  		 * Pack was created by Git garbage collection by this implementation.
84  		 * <p>
85  		 * This source is only used by the {@link DfsGarbageCollector} when it
86  		 * builds a pack file by traversing the object graph and copying all
87  		 * reachable objects into a new pack stream.
88  		 *
89  		 * @see DfsGarbageCollector
90  		 */
91  		GC(1),
92  
93  		/**
94  		 * RefTreeGraph pack was created by Git garbage collection.
95  		 *
96  		 * @see DfsGarbageCollector
97  		 */
98  		GC_TXN(1),
99  
100 		/**
101 		 * The pack was created by compacting multiple packs together.
102 		 * <p>
103 		 * Packs created by compacting multiple packs together aren't nearly as
104 		 * efficient as a fully garbage collected repository, but may save disk
105 		 * space by reducing redundant copies of base objects.
106 		 *
107 		 * @see DfsPackCompactor
108 		 */
109 		COMPACT(1),
110 
111 		/**
112 		 * Pack was created by Git garbage collection.
113 		 * <p>
114 		 * This pack contains only unreachable garbage that was found during the
115 		 * last GC pass. It is retained in a new pack until it is safe to prune
116 		 * these objects from the repository.
117 		 */
118 		UNREACHABLE_GARBAGE(2);
119 
120 		final int category;
121 
122 		PackSource(int category) {
123 			this.category = category;
124 		}
125 	}
126 
127 	private final AtomicReference<PackList> packList;
128 
129 	private final DfsRepository repository;
130 
131 	private DfsReaderOptions readerOptions;
132 
133 	/**
134 	 * Initialize an object database for our repository.
135 	 *
136 	 * @param repository
137 	 *            repository owning this object database.
138 	 *
139 	 * @param options
140 	 *            how readers should access the object database.
141 	 */
142 	protected DfsObjDatabase(DfsRepository repository,
143 			DfsReaderOptions options) {
144 		this.repository = repository;
145 		this.packList = new AtomicReference<PackList>(NO_PACKS);
146 		this.readerOptions = options;
147 	}
148 
149 	/** @return configured reader options, such as read-ahead. */
150 	public DfsReaderOptions getReaderOptions() {
151 		return readerOptions;
152 	}
153 
154 	@Override
155 	public ObjectReader newReader() {
156 		return new DfsReader(this);
157 	}
158 
159 	@Override
160 	public ObjectInserter newInserter() {
161 		return new DfsInserter(this);
162 	}
163 
164 	/**
165 	 * Scan and list all available pack files in the repository.
166 	 *
167 	 * @return list of available packs. The returned array is shared with the
168 	 *         implementation and must not be modified by the caller.
169 	 * @throws IOException
170 	 *             the pack list cannot be initialized.
171 	 */
172 	public DfsPackFile[] getPacks() throws IOException {
173 		return scanPacks(NO_PACKS).packs;
174 	}
175 
176 	/** @return repository owning this object database. */
177 	protected DfsRepository getRepository() {
178 		return repository;
179 	}
180 
181 	/**
182 	 * List currently known pack files in the repository, without scanning.
183 	 *
184 	 * @return list of available packs. The returned array is shared with the
185 	 *         implementation and must not be modified by the caller.
186 	 */
187 	public DfsPackFile[] getCurrentPacks() {
188 		return packList.get().packs;
189 	}
190 
191 	/**
192 	 * Does the requested object exist in this database?
193 	 * <p>
194 	 * This differs from ObjectDatabase's implementation in that we can selectively
195 	 * ignore unreachable (garbage) objects.
196 	 *
197 	 * @param objectId
198 	 *            identity of the object to test for existence of.
199 	 * @param avoidUnreachableObjects
200 	 *            if true, ignore objects that are unreachable.
201 	 * @return true if the specified object is stored in this database.
202 	 * @throws IOException
203 	 *             the object store cannot be accessed.
204 	 */
205 	public boolean has(AnyObjectId objectId, boolean avoidUnreachableObjects)
206 			throws IOException {
207 		try (ObjectReader or = newReader()) {
208 			or.setAvoidUnreachableObjects(avoidUnreachableObjects);
209 			return or.has(objectId);
210 		}
211 	}
212 
213 	/**
214 	 * Generate a new unique name for a pack file.
215 	 *
216 	 * @param source
217 	 *            where the pack stream is created.
218 	 * @return a unique name for the pack file. Must not collide with any other
219 	 *         pack file name in the same DFS.
220 	 * @throws IOException
221 	 *             a new unique pack description cannot be generated.
222 	 */
223 	protected abstract DfsPackDescription newPack(PackSource source)
224 			throws IOException;
225 
226 	/**
227 	 * Commit a pack and index pair that was written to the DFS.
228 	 * <p>
229 	 * Committing the pack/index pair makes them visible to readers. The JGit
230 	 * DFS code always writes the pack, then the index. This allows a simple
231 	 * commit process to do nothing if readers always look for both files to
232 	 * exist and the DFS performs atomic creation of the file (e.g. stream to a
233 	 * temporary file and rename to target on close).
234 	 * <p>
235 	 * During pack compaction or GC the new pack file may be replacing other
236 	 * older files. Implementations should remove those older files (if any) as
237 	 * part of the commit of the new file.
238 	 * <p>
239 	 * This method is a trivial wrapper around
240 	 * {@link #commitPackImpl(Collection, Collection)} that calls the
241 	 * implementation and fires events.
242 	 *
243 	 * @param desc
244 	 *            description of the new packs.
245 	 * @param replaces
246 	 *            if not null, list of packs to remove.
247 	 * @throws IOException
248 	 *             the packs cannot be committed. On failure a rollback must
249 	 *             also be attempted by the caller.
250 	 */
251 	protected void commitPack(Collection<DfsPackDescription> desc,
252 			Collection<DfsPackDescription> replaces) throws IOException {
253 		commitPackImpl(desc, replaces);
254 		getRepository().fireEvent(new DfsPacksChangedEvent());
255 	}
256 
257 	/**
258 	 * Implementation of pack commit.
259 	 *
260 	 * @see #commitPack(Collection, Collection)
261 	 *
262 	 * @param desc
263 	 *            description of the new packs.
264 	 * @param replaces
265 	 *            if not null, list of packs to remove.
266 	 * @throws IOException
267 	 *             the packs cannot be committed.
268 	 */
269 	protected abstract void commitPackImpl(Collection<DfsPackDescription> desc,
270 			Collection<DfsPackDescription> replaces) throws IOException;
271 
272 	/**
273 	 * Try to rollback a pack creation.
274 	 * <p>
275 	 * JGit DFS always writes the pack first, then the index. If the pack does
276 	 * not yet exist, then neither does the index. A safe DFS implementation
277 	 * would try to remove both files to ensure they are really gone.
278 	 * <p>
279 	 * A rollback does not support failures, as it only occurs when there is
280 	 * already a failure in progress. A DFS implementor may wish to log
281 	 * warnings/error messages when a rollback fails, but should not send new
282 	 * exceptions up the Java callstack.
283 	 *
284 	 * @param desc
285 	 *            pack to delete.
286 	 */
287 	protected abstract void rollbackPack(Collection<DfsPackDescription> desc);
288 
289 	/**
290 	 * List the available pack files.
291 	 * <p>
292 	 * The returned list must support random access and must be mutable by the
293 	 * caller. It is sorted in place using the natural sorting of the returned
294 	 * DfsPackDescription objects.
295 	 *
296 	 * @return available packs. May be empty if there are no packs.
297 	 * @throws IOException
298 	 *             the packs cannot be listed and the object database is not
299 	 *             functional to the caller.
300 	 */
301 	protected abstract List<DfsPackDescription> listPacks() throws IOException;
302 
303 	/**
304 	 * Open a pack, pack index, or other related file for reading.
305 	 *
306 	 * @param desc
307 	 *            description of pack related to the data that will be read.
308 	 *            This is an instance previously obtained from
309 	 *            {@link #listPacks()}, but not necessarily from the same
310 	 *            DfsObjDatabase instance.
311 	 * @param ext
312 	 *            file extension that will be read i.e "pack" or "idx".
313 	 * @return channel to read the file.
314 	 * @throws FileNotFoundException
315 	 *             the file does not exist.
316 	 * @throws IOException
317 	 *             the file cannot be opened.
318 	 */
319 	protected abstract ReadableChannel openFile(
320 			DfsPackDescription desc, PackExt ext)
321 			throws FileNotFoundException, IOException;
322 
323 	/**
324 	 * Open a pack, pack index, or other related file for writing.
325 	 *
326 	 * @param desc
327 	 *            description of pack related to the data that will be written.
328 	 *            This is an instance previously obtained from
329 	 *            {@link #newPack(PackSource)}.
330 	 * @param ext
331 	 *            file extension that will be written i.e "pack" or "idx".
332 	 * @return channel to write the file.
333 	 * @throws IOException
334 	 *             the file cannot be opened.
335 	 */
336 	protected abstract DfsOutputStream writeFile(
337 			DfsPackDescription desc, PackExt ext) throws IOException;
338 
339 	void addPack(DfsPackFile newPack) throws IOException {
340 		PackList o, n;
341 		do {
342 			o = packList.get();
343 			if (o == NO_PACKS) {
344 				// The repository may not have needed any existing objects to
345 				// complete the current task of creating a pack (e.g. push of a
346 				// pack with no external deltas). Because we don't scan for
347 				// newly added packs on missed object lookups, scan now to
348 				// make sure all older packs are available in the packList.
349 				o = scanPacks(o);
350 
351 				// Its possible the scan identified the pack we were asked to
352 				// add, as the pack was already committed via commitPack().
353 				// If this is the case return without changing the list.
354 				for (DfsPackFile p : o.packs) {
355 					if (p == newPack)
356 						return;
357 				}
358 			}
359 
360 			DfsPackFile[] packs = new DfsPackFile[1 + o.packs.length];
361 			packs[0] = newPack;
362 			System.arraycopy(o.packs, 0, packs, 1, o.packs.length);
363 			n = new PackList(packs);
364 		} while (!packList.compareAndSet(o, n));
365 	}
366 
367 	private PackList scanPacks(final PackList original) throws IOException {
368 		PackList o, n;
369 		synchronized (packList) {
370 			do {
371 				o = packList.get();
372 				if (o != original) {
373 					// Another thread did the scan for us, while we
374 					// were blocked on the monitor above.
375 					//
376 					return o;
377 				}
378 				n = scanPacksImpl(o);
379 				if (n == o)
380 					return n;
381 			} while (!packList.compareAndSet(o, n));
382 		}
383 		getRepository().fireEvent(new DfsPacksChangedEvent());
384 		return n;
385 	}
386 
387 	private PackList scanPacksImpl(PackList old) throws IOException {
388 		DfsBlockCache cache = DfsBlockCache.getInstance();
389 		Map<DfsPackDescription, DfsPackFile> forReuse = reuseMap(old);
390 		List<DfsPackDescription> scanned = listPacks();
391 		Collections.sort(scanned);
392 
393 		List<DfsPackFile> list = new ArrayList<DfsPackFile>(scanned.size());
394 		boolean foundNew = false;
395 		for (DfsPackDescription dsc : scanned) {
396 			DfsPackFile oldPack = forReuse.remove(dsc);
397 			if (oldPack != null) {
398 				list.add(oldPack);
399 			} else {
400 				list.add(cache.getOrCreate(dsc, null));
401 				foundNew = true;
402 			}
403 		}
404 
405 		for (DfsPackFile p : forReuse.values())
406 			p.close();
407 		if (list.isEmpty())
408 			return new PackList(NO_PACKS.packs);
409 		if (!foundNew)
410 			return old;
411 		return new PackList(list.toArray(new DfsPackFile[list.size()]));
412 	}
413 
414 	private static Map<DfsPackDescription, DfsPackFile> reuseMap(PackList old) {
415 		Map<DfsPackDescription, DfsPackFile> forReuse
416 			= new HashMap<DfsPackDescription, DfsPackFile>();
417 		for (DfsPackFile p : old.packs) {
418 			if (p.invalid()) {
419 				// The pack instance is corrupted, and cannot be safely used
420 				// again. Do not include it in our reuse map.
421 				//
422 				p.close();
423 				continue;
424 			}
425 
426 			DfsPackFile prior = forReuse.put(p.getPackDescription(), p);
427 			if (prior != null) {
428 				// This should never occur. It should be impossible for us
429 				// to have two pack files with the same name, as all of them
430 				// came out of the same directory. If it does, we promised to
431 				// close any PackFiles we did not reuse, so close the second,
432 				// readers are likely to be actively using the first.
433 				//
434 				forReuse.put(prior.getPackDescription(), prior);
435 				p.close();
436 			}
437 		}
438 		return forReuse;
439 	}
440 
441 	/** Clears the cached list of packs, forcing them to be scanned again. */
442 	protected void clearCache() {
443 		packList.set(NO_PACKS);
444 	}
445 
446 	@Override
447 	public void close() {
448 		// PackList packs = packList.get();
449 		packList.set(NO_PACKS);
450 
451 		// TODO Close packs if they aren't cached.
452 		// for (DfsPackFile p : packs.packs)
453 		// p.close();
454 	}
455 
456 	private static final class PackList {
457 		/** All known packs, sorted. */
458 		final DfsPackFile[] packs;
459 
460 		PackList(final DfsPackFile[] packs) {
461 			this.packs = packs;
462 		}
463 	}
464 }