1 /*
2 * Copyright (C) 2011, Google Inc.
3 * and other copyright owners as documented in the project's IP log.
4 *
5 * This program and the accompanying materials are made available
6 * under the terms of the Eclipse Distribution License v1.0 which
7 * accompanies this distribution, is reproduced below, and is
8 * available at http://www.eclipse.org/org/documents/edl-v10.php
9 *
10 * All rights reserved.
11 *
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
14 * conditions are met:
15 *
16 * - Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials provided
22 * with the distribution.
23 *
24 * - Neither the name of the Eclipse Foundation, Inc. nor the
25 * names of its contributors may be used to endorse or promote
26 * products derived from this software without specific prior
27 * written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
30 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
31 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
32 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
34 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
36 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
37 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
38 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
39 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
41 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42 */
43
44 package org.eclipse.jgit.internal.storage.dfs;
45
46 import java.io.FileNotFoundException;
47 import java.io.IOException;
48 import java.util.ArrayList;
49 import java.util.Collection;
50 import java.util.Collections;
51 import java.util.HashMap;
52 import java.util.List;
53 import java.util.Map;
54 import java.util.concurrent.atomic.AtomicReference;
55
56 import org.eclipse.jgit.internal.storage.pack.PackExt;
57 import org.eclipse.jgit.lib.AnyObjectId;
58 import org.eclipse.jgit.lib.ObjectDatabase;
59 import org.eclipse.jgit.lib.ObjectInserter;
60 import org.eclipse.jgit.lib.ObjectReader;
61
62 /** Manages objects stored in {@link DfsPackFile} on a storage system. */
63 public abstract class DfsObjDatabase extends ObjectDatabase {
64 private static final PackList NO_PACKS = new PackList(new DfsPackFile[0]);
65
66 /** Sources for a pack file. */
67 public static enum PackSource {
68 /** The pack is created by ObjectInserter due to local activity. */
69 INSERT(0),
70
71 /**
72 * The pack is created by PackParser due to a network event.
73 * <p>
74 * A received pack can be from either a push into the repository, or a
75 * fetch into the repository, the direction doesn't matter. A received
76 * pack was built by the remote Git implementation and may not match the
77 * storage layout preferred by this version. Received packs are likely
78 * to be either compacted or garbage collected in the future.
79 */
80 RECEIVE(0),
81
82 /**
83 * Pack was created by Git garbage collection by this implementation.
84 * <p>
85 * This source is only used by the {@link DfsGarbageCollector} when it
86 * builds a pack file by traversing the object graph and copying all
87 * reachable objects into a new pack stream.
88 *
89 * @see DfsGarbageCollector
90 */
91 GC(1),
92
93 /**
94 * RefTreeGraph pack was created by Git garbage collection.
95 *
96 * @see DfsGarbageCollector
97 */
98 GC_TXN(1),
99
100 /**
101 * The pack was created by compacting multiple packs together.
102 * <p>
103 * Packs created by compacting multiple packs together aren't nearly as
104 * efficient as a fully garbage collected repository, but may save disk
105 * space by reducing redundant copies of base objects.
106 *
107 * @see DfsPackCompactor
108 */
109 COMPACT(1),
110
111 /**
112 * Pack was created by Git garbage collection.
113 * <p>
114 * This pack contains only unreachable garbage that was found during the
115 * last GC pass. It is retained in a new pack until it is safe to prune
116 * these objects from the repository.
117 */
118 UNREACHABLE_GARBAGE(2);
119
120 final int category;
121
122 PackSource(int category) {
123 this.category = category;
124 }
125 }
126
127 private final AtomicReference<PackList> packList;
128
129 private final DfsRepository repository;
130
131 private DfsReaderOptions readerOptions;
132
133 /**
134 * Initialize an object database for our repository.
135 *
136 * @param repository
137 * repository owning this object database.
138 *
139 * @param options
140 * how readers should access the object database.
141 */
142 protected DfsObjDatabase(DfsRepository repository,
143 DfsReaderOptions options) {
144 this.repository = repository;
145 this.packList = new AtomicReference<PackList>(NO_PACKS);
146 this.readerOptions = options;
147 }
148
149 /** @return configured reader options, such as read-ahead. */
150 public DfsReaderOptions getReaderOptions() {
151 return readerOptions;
152 }
153
154 @Override
155 public ObjectReader newReader() {
156 return new DfsReader(this);
157 }
158
159 @Override
160 public ObjectInserter newInserter() {
161 return new DfsInserter(this);
162 }
163
164 /**
165 * Scan and list all available pack files in the repository.
166 *
167 * @return list of available packs. The returned array is shared with the
168 * implementation and must not be modified by the caller.
169 * @throws IOException
170 * the pack list cannot be initialized.
171 */
172 public DfsPackFile[] getPacks() throws IOException {
173 return scanPacks(NO_PACKS).packs;
174 }
175
176 /** @return repository owning this object database. */
177 protected DfsRepository getRepository() {
178 return repository;
179 }
180
181 /**
182 * List currently known pack files in the repository, without scanning.
183 *
184 * @return list of available packs. The returned array is shared with the
185 * implementation and must not be modified by the caller.
186 */
187 public DfsPackFile[] getCurrentPacks() {
188 return packList.get().packs;
189 }
190
191 /**
192 * Does the requested object exist in this database?
193 * <p>
194 * This differs from ObjectDatabase's implementation in that we can selectively
195 * ignore unreachable (garbage) objects.
196 *
197 * @param objectId
198 * identity of the object to test for existence of.
199 * @param avoidUnreachableObjects
200 * if true, ignore objects that are unreachable.
201 * @return true if the specified object is stored in this database.
202 * @throws IOException
203 * the object store cannot be accessed.
204 */
205 public boolean has(AnyObjectId objectId, boolean avoidUnreachableObjects)
206 throws IOException {
207 try (ObjectReader or = newReader()) {
208 or.setAvoidUnreachableObjects(avoidUnreachableObjects);
209 return or.has(objectId);
210 }
211 }
212
213 /**
214 * Generate a new unique name for a pack file.
215 *
216 * @param source
217 * where the pack stream is created.
218 * @return a unique name for the pack file. Must not collide with any other
219 * pack file name in the same DFS.
220 * @throws IOException
221 * a new unique pack description cannot be generated.
222 */
223 protected abstract DfsPackDescription newPack(PackSource source)
224 throws IOException;
225
226 /**
227 * Commit a pack and index pair that was written to the DFS.
228 * <p>
229 * Committing the pack/index pair makes them visible to readers. The JGit
230 * DFS code always writes the pack, then the index. This allows a simple
231 * commit process to do nothing if readers always look for both files to
232 * exist and the DFS performs atomic creation of the file (e.g. stream to a
233 * temporary file and rename to target on close).
234 * <p>
235 * During pack compaction or GC the new pack file may be replacing other
236 * older files. Implementations should remove those older files (if any) as
237 * part of the commit of the new file.
238 * <p>
239 * This method is a trivial wrapper around
240 * {@link #commitPackImpl(Collection, Collection)} that calls the
241 * implementation and fires events.
242 *
243 * @param desc
244 * description of the new packs.
245 * @param replaces
246 * if not null, list of packs to remove.
247 * @throws IOException
248 * the packs cannot be committed. On failure a rollback must
249 * also be attempted by the caller.
250 */
251 protected void commitPack(Collection<DfsPackDescription> desc,
252 Collection<DfsPackDescription> replaces) throws IOException {
253 commitPackImpl(desc, replaces);
254 getRepository().fireEvent(new DfsPacksChangedEvent());
255 }
256
257 /**
258 * Implementation of pack commit.
259 *
260 * @see #commitPack(Collection, Collection)
261 *
262 * @param desc
263 * description of the new packs.
264 * @param replaces
265 * if not null, list of packs to remove.
266 * @throws IOException
267 * the packs cannot be committed.
268 */
269 protected abstract void commitPackImpl(Collection<DfsPackDescription> desc,
270 Collection<DfsPackDescription> replaces) throws IOException;
271
272 /**
273 * Try to rollback a pack creation.
274 * <p>
275 * JGit DFS always writes the pack first, then the index. If the pack does
276 * not yet exist, then neither does the index. A safe DFS implementation
277 * would try to remove both files to ensure they are really gone.
278 * <p>
279 * A rollback does not support failures, as it only occurs when there is
280 * already a failure in progress. A DFS implementor may wish to log
281 * warnings/error messages when a rollback fails, but should not send new
282 * exceptions up the Java callstack.
283 *
284 * @param desc
285 * pack to delete.
286 */
287 protected abstract void rollbackPack(Collection<DfsPackDescription> desc);
288
289 /**
290 * List the available pack files.
291 * <p>
292 * The returned list must support random access and must be mutable by the
293 * caller. It is sorted in place using the natural sorting of the returned
294 * DfsPackDescription objects.
295 *
296 * @return available packs. May be empty if there are no packs.
297 * @throws IOException
298 * the packs cannot be listed and the object database is not
299 * functional to the caller.
300 */
301 protected abstract List<DfsPackDescription> listPacks() throws IOException;
302
303 /**
304 * Open a pack, pack index, or other related file for reading.
305 *
306 * @param desc
307 * description of pack related to the data that will be read.
308 * This is an instance previously obtained from
309 * {@link #listPacks()}, but not necessarily from the same
310 * DfsObjDatabase instance.
311 * @param ext
312 * file extension that will be read i.e "pack" or "idx".
313 * @return channel to read the file.
314 * @throws FileNotFoundException
315 * the file does not exist.
316 * @throws IOException
317 * the file cannot be opened.
318 */
319 protected abstract ReadableChannel openFile(
320 DfsPackDescription desc, PackExt ext)
321 throws FileNotFoundException, IOException;
322
323 /**
324 * Open a pack, pack index, or other related file for writing.
325 *
326 * @param desc
327 * description of pack related to the data that will be written.
328 * This is an instance previously obtained from
329 * {@link #newPack(PackSource)}.
330 * @param ext
331 * file extension that will be written i.e "pack" or "idx".
332 * @return channel to write the file.
333 * @throws IOException
334 * the file cannot be opened.
335 */
336 protected abstract DfsOutputStream writeFile(
337 DfsPackDescription desc, PackExt ext) throws IOException;
338
339 void addPack(DfsPackFile newPack) throws IOException {
340 PackList o, n;
341 do {
342 o = packList.get();
343 if (o == NO_PACKS) {
344 // The repository may not have needed any existing objects to
345 // complete the current task of creating a pack (e.g. push of a
346 // pack with no external deltas). Because we don't scan for
347 // newly added packs on missed object lookups, scan now to
348 // make sure all older packs are available in the packList.
349 o = scanPacks(o);
350
351 // Its possible the scan identified the pack we were asked to
352 // add, as the pack was already committed via commitPack().
353 // If this is the case return without changing the list.
354 for (DfsPackFile p : o.packs) {
355 if (p == newPack)
356 return;
357 }
358 }
359
360 DfsPackFile[] packs = new DfsPackFile[1 + o.packs.length];
361 packs[0] = newPack;
362 System.arraycopy(o.packs, 0, packs, 1, o.packs.length);
363 n = new PackList(packs);
364 } while (!packList.compareAndSet(o, n));
365 }
366
367 private PackList scanPacks(final PackList original) throws IOException {
368 PackList o, n;
369 synchronized (packList) {
370 do {
371 o = packList.get();
372 if (o != original) {
373 // Another thread did the scan for us, while we
374 // were blocked on the monitor above.
375 //
376 return o;
377 }
378 n = scanPacksImpl(o);
379 if (n == o)
380 return n;
381 } while (!packList.compareAndSet(o, n));
382 }
383 getRepository().fireEvent(new DfsPacksChangedEvent());
384 return n;
385 }
386
387 private PackList scanPacksImpl(PackList old) throws IOException {
388 DfsBlockCache cache = DfsBlockCache.getInstance();
389 Map<DfsPackDescription, DfsPackFile> forReuse = reuseMap(old);
390 List<DfsPackDescription> scanned = listPacks();
391 Collections.sort(scanned);
392
393 List<DfsPackFile> list = new ArrayList<DfsPackFile>(scanned.size());
394 boolean foundNew = false;
395 for (DfsPackDescription dsc : scanned) {
396 DfsPackFile oldPack = forReuse.remove(dsc);
397 if (oldPack != null) {
398 list.add(oldPack);
399 } else {
400 list.add(cache.getOrCreate(dsc, null));
401 foundNew = true;
402 }
403 }
404
405 for (DfsPackFile p : forReuse.values())
406 p.close();
407 if (list.isEmpty())
408 return new PackList(NO_PACKS.packs);
409 if (!foundNew)
410 return old;
411 return new PackList(list.toArray(new DfsPackFile[list.size()]));
412 }
413
414 private static Map<DfsPackDescription, DfsPackFile> reuseMap(PackList old) {
415 Map<DfsPackDescription, DfsPackFile> forReuse
416 = new HashMap<DfsPackDescription, DfsPackFile>();
417 for (DfsPackFile p : old.packs) {
418 if (p.invalid()) {
419 // The pack instance is corrupted, and cannot be safely used
420 // again. Do not include it in our reuse map.
421 //
422 p.close();
423 continue;
424 }
425
426 DfsPackFile prior = forReuse.put(p.getPackDescription(), p);
427 if (prior != null) {
428 // This should never occur. It should be impossible for us
429 // to have two pack files with the same name, as all of them
430 // came out of the same directory. If it does, we promised to
431 // close any PackFiles we did not reuse, so close the second,
432 // readers are likely to be actively using the first.
433 //
434 forReuse.put(prior.getPackDescription(), prior);
435 p.close();
436 }
437 }
438 return forReuse;
439 }
440
441 /** Clears the cached list of packs, forcing them to be scanned again. */
442 protected void clearCache() {
443 packList.set(NO_PACKS);
444 }
445
446 @Override
447 public void close() {
448 // PackList packs = packList.get();
449 packList.set(NO_PACKS);
450
451 // TODO Close packs if they aren't cached.
452 // for (DfsPackFile p : packs.packs)
453 // p.close();
454 }
455
456 private static final class PackList {
457 /** All known packs, sorted. */
458 final DfsPackFile[] packs;
459
460 PackList(final DfsPackFile[] packs) {
461 this.packs = packs;
462 }
463 }
464 }