1 /*
2 * Copyright (C) 2011, Google Inc.
3 * and other copyright owners as documented in the project's IP log.
4 *
5 * This program and the accompanying materials are made available
6 * under the terms of the Eclipse Distribution License v1.0 which
7 * accompanies this distribution, is reproduced below, and is
8 * available at http://www.eclipse.org/org/documents/edl-v10.php
9 *
10 * All rights reserved.
11 *
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
14 * conditions are met:
15 *
16 * - Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials provided
22 * with the distribution.
23 *
24 * - Neither the name of the Eclipse Foundation, Inc. nor the
25 * names of its contributors may be used to endorse or promote
26 * products derived from this software without specific prior
27 * written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
30 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
31 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
32 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
34 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
36 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
37 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
38 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
39 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
41 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42 */
43
44 package org.eclipse.jgit.internal.storage.dfs;
45
46 import java.io.FileNotFoundException;
47 import java.io.IOException;
48 import java.util.ArrayList;
49 import java.util.Collection;
50 import java.util.Collections;
51 import java.util.Comparator;
52 import java.util.HashMap;
53 import java.util.List;
54 import java.util.Map;
55 import java.util.Set;
56 import java.util.concurrent.atomic.AtomicReference;
57
58 import org.eclipse.jgit.internal.storage.pack.PackExt;
59 import org.eclipse.jgit.lib.AnyObjectId;
60 import org.eclipse.jgit.lib.ObjectDatabase;
61 import org.eclipse.jgit.lib.ObjectInserter;
62 import org.eclipse.jgit.lib.ObjectReader;
63
64 /**
65 * Manages objects stored in
66 * {@link org.eclipse.jgit.internal.storage.dfs.DfsPackFile} on a storage
67 * system.
68 */
69 public abstract class DfsObjDatabase extends ObjectDatabase {
70 private static final PackList NO_PACKS = new PackList(
71 new DfsPackFile[0],
72 new DfsReftable[0]) {
73 @Override
74 boolean dirty() {
75 return true;
76 }
77
78 @Override
79 void clearDirty() {
80 // Always dirty.
81 }
82
83 @Override
84 public void markDirty() {
85 // Always dirty.
86 }
87 };
88
89 /** Sources for a pack file. */
90 public static enum PackSource {
91 /** The pack is created by ObjectInserter due to local activity. */
92 INSERT(0),
93
94 /**
95 * The pack is created by PackParser due to a network event.
96 * <p>
97 * A received pack can be from either a push into the repository, or a
98 * fetch into the repository, the direction doesn't matter. A received
99 * pack was built by the remote Git implementation and may not match the
100 * storage layout preferred by this version. Received packs are likely
101 * to be either compacted or garbage collected in the future.
102 */
103 RECEIVE(0),
104
105 /**
106 * The pack was created by compacting multiple packs together.
107 * <p>
108 * Packs created by compacting multiple packs together aren't nearly as
109 * efficient as a fully garbage collected repository, but may save disk
110 * space by reducing redundant copies of base objects.
111 *
112 * @see DfsPackCompactor
113 */
114 COMPACT(1),
115
116 /**
117 * Pack was created by Git garbage collection by this implementation.
118 * <p>
119 * This source is only used by the {@link DfsGarbageCollector} when it
120 * builds a pack file by traversing the object graph and copying all
121 * reachable objects into a new pack stream.
122 *
123 * @see DfsGarbageCollector
124 */
125 GC(2),
126
127 /** Created from non-heads by {@link DfsGarbageCollector}. */
128 GC_REST(3),
129
130 /**
131 * RefTreeGraph pack was created by Git garbage collection.
132 *
133 * @see DfsGarbageCollector
134 */
135 GC_TXN(4),
136
137 /**
138 * Pack was created by Git garbage collection.
139 * <p>
140 * This pack contains only unreachable garbage that was found during the
141 * last GC pass. It is retained in a new pack until it is safe to prune
142 * these objects from the repository.
143 */
144 UNREACHABLE_GARBAGE(5);
145
146 final int category;
147
148 PackSource(int category) {
149 this.category = category;
150 }
151 }
152
153 private final AtomicReference<PackList> packList;
154
155 private final DfsRepository repository;
156
157 private DfsReaderOptions readerOptions;
158
159 /**
160 * Initialize an object database for our repository.
161 *
162 * @param repository
163 * repository owning this object database.
164 * @param options
165 * how readers should access the object database.
166 */
167 protected DfsObjDatabase(DfsRepository repository,
168 DfsReaderOptions options) {
169 this.repository = repository;
170 this.packList = new AtomicReference<>(NO_PACKS);
171 this.readerOptions = options;
172 }
173
174 /**
175 * Get configured reader options, such as read-ahead.
176 *
177 * @return configured reader options, such as read-ahead.
178 */
179 public DfsReaderOptions getReaderOptions() {
180 return readerOptions;
181 }
182
183 /** {@inheritDoc} */
184 @Override
185 public DfsReader newReader() {
186 return new DfsReader(this);
187 }
188
189 /** {@inheritDoc} */
190 @Override
191 public ObjectInserter newInserter() {
192 return new DfsInserter(this);
193 }
194
195 /**
196 * Scan and list all available pack files in the repository.
197 *
198 * @return list of available packs. The returned array is shared with the
199 * implementation and must not be modified by the caller.
200 * @throws java.io.IOException
201 * the pack list cannot be initialized.
202 */
203 public DfsPackFile[] getPacks() throws IOException {
204 return getPackList().packs;
205 }
206
207 /**
208 * Scan and list all available reftable files in the repository.
209 *
210 * @return list of available reftables. The returned array is shared with
211 * the implementation and must not be modified by the caller.
212 * @throws java.io.IOException
213 * the pack list cannot be initialized.
214 */
215 public DfsReftable[] getReftables() throws IOException {
216 return getPackList().reftables;
217 }
218
219 /**
220 * Scan and list all available pack files in the repository.
221 *
222 * @return list of available packs, with some additional metadata. The
223 * returned array is shared with the implementation and must not be
224 * modified by the caller.
225 * @throws java.io.IOException
226 * the pack list cannot be initialized.
227 */
228 public PackList getPackList() throws IOException {
229 return scanPacks(NO_PACKS);
230 }
231
232 /**
233 * Get repository owning this object database.
234 *
235 * @return repository owning this object database.
236 */
237 protected DfsRepository getRepository() {
238 return repository;
239 }
240
241 /**
242 * List currently known pack files in the repository, without scanning.
243 *
244 * @return list of available packs. The returned array is shared with the
245 * implementation and must not be modified by the caller.
246 */
247 public DfsPackFile[] getCurrentPacks() {
248 return getCurrentPackList().packs;
249 }
250
251 /**
252 * List currently known reftable files in the repository, without scanning.
253 *
254 * @return list of available reftables. The returned array is shared with
255 * the implementation and must not be modified by the caller.
256 */
257 public DfsReftable[] getCurrentReftables() {
258 return getCurrentPackList().reftables;
259 }
260
261 /**
262 * List currently known pack files in the repository, without scanning.
263 *
264 * @return list of available packs, with some additional metadata. The
265 * returned array is shared with the implementation and must not be
266 * modified by the caller.
267 */
268 public PackList getCurrentPackList() {
269 return packList.get();
270 }
271
272 /**
273 * Does the requested object exist in this database?
274 * <p>
275 * This differs from ObjectDatabase's implementation in that we can selectively
276 * ignore unreachable (garbage) objects.
277 *
278 * @param objectId
279 * identity of the object to test for existence of.
280 * @param avoidUnreachableObjects
281 * if true, ignore objects that are unreachable.
282 * @return true if the specified object is stored in this database.
283 * @throws java.io.IOException
284 * the object store cannot be accessed.
285 */
286 public boolean has(AnyObjectId objectId, boolean avoidUnreachableObjects)
287 throws IOException {
288 try (ObjectReader or = newReader()) {
289 or.setAvoidUnreachableObjects(avoidUnreachableObjects);
290 return or.has(objectId);
291 }
292 }
293
294 /**
295 * Generate a new unique name for a pack file.
296 *
297 * @param source
298 * where the pack stream is created.
299 * @return a unique name for the pack file. Must not collide with any other
300 * pack file name in the same DFS.
301 * @throws java.io.IOException
302 * a new unique pack description cannot be generated.
303 */
304 protected abstract DfsPackDescription newPack(PackSource source)
305 throws IOException;
306
307 /**
308 * Generate a new unique name for a pack file.
309 *
310 * <p>
311 * Default implementation of this method would be equivalent to
312 * {@code newPack(source).setEstimatedPackSize(estimatedPackSize)}. But the
313 * clients can override this method to use the given
314 * {@code estomatedPackSize} value more efficiently in the process of
315 * creating a new
316 * {@link org.eclipse.jgit.internal.storage.dfs.DfsPackDescription} object.
317 *
318 * @param source
319 * where the pack stream is created.
320 * @param estimatedPackSize
321 * the estimated size of the pack.
322 * @return a unique name for the pack file. Must not collide with any other
323 * pack file name in the same DFS.
324 * @throws java.io.IOException
325 * a new unique pack description cannot be generated.
326 */
327 protected DfsPackDescription newPack(PackSource source,
328 long estimatedPackSize) throws IOException {
329 DfsPackDescription pack = newPack(source);
330 pack.setEstimatedPackSize(estimatedPackSize);
331 return pack;
332 }
333
334 /**
335 * Commit a pack and index pair that was written to the DFS.
336 * <p>
337 * Committing the pack/index pair makes them visible to readers. The JGit
338 * DFS code always writes the pack, then the index. This allows a simple
339 * commit process to do nothing if readers always look for both files to
340 * exist and the DFS performs atomic creation of the file (e.g. stream to a
341 * temporary file and rename to target on close).
342 * <p>
343 * During pack compaction or GC the new pack file may be replacing other
344 * older files. Implementations should remove those older files (if any) as
345 * part of the commit of the new file.
346 * <p>
347 * This method is a trivial wrapper around
348 * {@link #commitPackImpl(Collection, Collection)} that calls the
349 * implementation and fires events.
350 *
351 * @param desc
352 * description of the new packs.
353 * @param replaces
354 * if not null, list of packs to remove.
355 * @throws java.io.IOException
356 * the packs cannot be committed. On failure a rollback must
357 * also be attempted by the caller.
358 */
359 protected void commitPack(Collection<DfsPackDescription> desc,
360 Collection<DfsPackDescription> replaces) throws IOException {
361 commitPackImpl(desc, replaces);
362 getRepository().fireEvent(new DfsPacksChangedEvent());
363 }
364
365 /**
366 * Implementation of pack commit.
367 *
368 * @see #commitPack(Collection, Collection)
369 * @param desc
370 * description of the new packs.
371 * @param replaces
372 * if not null, list of packs to remove.
373 * @throws java.io.IOException
374 * the packs cannot be committed.
375 */
376 protected abstract void commitPackImpl(Collection<DfsPackDescription> desc,
377 Collection<DfsPackDescription> replaces) throws IOException;
378
379 /**
380 * Try to rollback a pack creation.
381 * <p>
382 * JGit DFS always writes the pack first, then the index. If the pack does
383 * not yet exist, then neither does the index. A safe DFS implementation
384 * would try to remove both files to ensure they are really gone.
385 * <p>
386 * A rollback does not support failures, as it only occurs when there is
387 * already a failure in progress. A DFS implementor may wish to log
388 * warnings/error messages when a rollback fails, but should not send new
389 * exceptions up the Java callstack.
390 *
391 * @param desc
392 * pack to delete.
393 */
394 protected abstract void rollbackPack(Collection<DfsPackDescription> desc);
395
396 /**
397 * List the available pack files.
398 * <p>
399 * The returned list must support random access and must be mutable by the
400 * caller. It is sorted in place using the natural sorting of the returned
401 * DfsPackDescription objects.
402 *
403 * @return available packs. May be empty if there are no packs.
404 * @throws java.io.IOException
405 * the packs cannot be listed and the object database is not
406 * functional to the caller.
407 */
408 protected abstract List<DfsPackDescription> listPacks() throws IOException;
409
410 /**
411 * Open a pack, pack index, or other related file for reading.
412 *
413 * @param desc
414 * description of pack related to the data that will be read.
415 * This is an instance previously obtained from
416 * {@link #listPacks()}, but not necessarily from the same
417 * DfsObjDatabase instance.
418 * @param ext
419 * file extension that will be read i.e "pack" or "idx".
420 * @return channel to read the file.
421 * @throws java.io.FileNotFoundException
422 * the file does not exist.
423 * @throws java.io.IOException
424 * the file cannot be opened.
425 */
426 protected abstract ReadableChannel openFile(
427 DfsPackDescription desc, PackExt ext)
428 throws FileNotFoundException, IOException;
429
430 /**
431 * Open a pack, pack index, or other related file for writing.
432 *
433 * @param desc
434 * description of pack related to the data that will be written.
435 * This is an instance previously obtained from
436 * {@link #newPack(PackSource)}.
437 * @param ext
438 * file extension that will be written i.e "pack" or "idx".
439 * @return channel to write the file.
440 * @throws java.io.IOException
441 * the file cannot be opened.
442 */
443 protected abstract DfsOutputStream writeFile(
444 DfsPackDescription desc, PackExt ext) throws IOException;
445
446 void addPack(DfsPackFile newPack) throws IOException {
447 PackList o, n;
448 do {
449 o = packList.get();
450 if (o == NO_PACKS) {
451 // The repository may not have needed any existing objects to
452 // complete the current task of creating a pack (e.g. push of a
453 // pack with no external deltas). Because we don't scan for
454 // newly added packs on missed object lookups, scan now to
455 // make sure all older packs are available in the packList.
456 o = scanPacks(o);
457
458 // Its possible the scan identified the pack we were asked to
459 // add, as the pack was already committed via commitPack().
460 // If this is the case return without changing the list.
461 for (DfsPackFile p : o.packs) {
462 if (p.key.equals(newPack.key)) {
463 return;
464 }
465 }
466 }
467
468 DfsPackFile[] packs = new DfsPackFile[1 + o.packs.length];
469 packs[0] = newPack;
470 System.arraycopy(o.packs, 0, packs, 1, o.packs.length);
471 n = new PackListImpl(packs, o.reftables);
472 } while (!packList.compareAndSet(o, n));
473 }
474
475 void addReftable(DfsPackDescription add, Set<DfsPackDescription> remove)
476 throws IOException {
477 PackList o, n;
478 do {
479 o = packList.get();
480 if (o == NO_PACKS) {
481 o = scanPacks(o);
482 for (DfsReftable t : o.reftables) {
483 if (t.getPackDescription().equals(add)) {
484 return;
485 }
486 }
487 }
488
489 List<DfsReftable> tables = new ArrayList<>(1 + o.reftables.length);
490 for (DfsReftable t : o.reftables) {
491 if (!remove.contains(t.getPackDescription())) {
492 tables.add(t);
493 }
494 }
495 tables.add(new DfsReftable(add));
496 n = new PackListImpl(o.packs, tables.toArray(new DfsReftable[0]));
497 } while (!packList.compareAndSet(o, n));
498 }
499
500 PackList scanPacks(final PackList original) throws IOException {
501 PackList o, n;
502 synchronized (packList) {
503 do {
504 o = packList.get();
505 if (o != original) {
506 // Another thread did the scan for us, while we
507 // were blocked on the monitor above.
508 //
509 return o;
510 }
511 n = scanPacksImpl(o);
512 if (n == o)
513 return n;
514 } while (!packList.compareAndSet(o, n));
515 }
516 getRepository().fireEvent(new DfsPacksChangedEvent());
517 return n;
518 }
519
520 private PackList scanPacksImpl(PackList old) throws IOException {
521 DfsBlockCache cache = DfsBlockCache.getInstance();
522 Map<DfsPackDescription, DfsPackFile> packs = packMap(old);
523 Map<DfsPackDescription, DfsReftable> reftables = reftableMap(old);
524
525 List<DfsPackDescription> scanned = listPacks();
526 Collections.sort(scanned);
527
528 List<DfsPackFile> newPacks = new ArrayList<>(scanned.size());
529 List<DfsReftable> newReftables = new ArrayList<>(scanned.size());
530 boolean foundNew = false;
531 for (DfsPackDescription dsc : scanned) {
532 DfsPackFile oldPack = packs.remove(dsc);
533 if (oldPack != null) {
534 newPacks.add(oldPack);
535 } else if (dsc.hasFileExt(PackExt.PACK)) {
536 newPacks.add(new DfsPackFile(cache, dsc));
537 foundNew = true;
538 }
539
540 DfsReftable oldReftable = reftables.remove(dsc);
541 if (oldReftable != null) {
542 newReftables.add(oldReftable);
543 } else if (dsc.hasFileExt(PackExt.REFTABLE)) {
544 newReftables.add(new DfsReftable(cache, dsc));
545 foundNew = true;
546 }
547 }
548
549 if (newPacks.isEmpty())
550 return new PackListImpl(NO_PACKS.packs, NO_PACKS.reftables);
551 if (!foundNew) {
552 old.clearDirty();
553 return old;
554 }
555 Collections.sort(newReftables, reftableComparator());
556 return new PackListImpl(
557 newPacks.toArray(new DfsPackFile[0]),
558 newReftables.toArray(new DfsReftable[0]));
559 }
560
561 private static Map<DfsPackDescription, DfsPackFile> packMap(PackList old) {
562 Map<DfsPackDescription, DfsPackFile> forReuse = new HashMap<>();
563 for (DfsPackFile p : old.packs) {
564 if (!p.invalid()) {
565 forReuse.put(p.desc, p);
566 }
567 }
568 return forReuse;
569 }
570
571 private static Map<DfsPackDescription, DfsReftable> reftableMap(PackList old) {
572 Map<DfsPackDescription, DfsReftable> forReuse = new HashMap<>();
573 for (DfsReftable p : old.reftables) {
574 if (!p.invalid()) {
575 forReuse.put(p.desc, p);
576 }
577 }
578 return forReuse;
579 }
580
581 /**
582 * Get comparator to sort {@link DfsReftable} by priority.
583 *
584 * @return comparator to sort {@link DfsReftable} by priority.
585 */
586 protected Comparator<DfsReftable> reftableComparator() {
587 return (fa, fb) -> {
588 DfsPackDescription a = fa.getPackDescription();
589 DfsPackDescription b = fb.getPackDescription();
590
591 // GC, COMPACT reftables first by higher category.
592 int c = category(b) - category(a);
593 if (c != 0) {
594 return c;
595 }
596
597 // Lower maxUpdateIndex first.
598 c = Long.signum(a.getMaxUpdateIndex() - b.getMaxUpdateIndex());
599 if (c != 0) {
600 return c;
601 }
602
603 // Older reftable first.
604 return Long.signum(a.getLastModified() - b.getLastModified());
605 };
606 }
607
608 static int category(DfsPackDescription d) {
609 PackSource s = d.getPackSource();
610 return s != null ? s.category : 0;
611 }
612
613 /**
614 * Clears the cached list of packs, forcing them to be scanned again.
615 */
616 protected void clearCache() {
617 packList.set(NO_PACKS);
618 }
619
620 /** {@inheritDoc} */
621 @Override
622 public void close() {
623 packList.set(NO_PACKS);
624 }
625
626 /** Snapshot of packs scanned in a single pass. */
627 public static abstract class PackList {
628 /** All known packs, sorted. */
629 public final DfsPackFile[] packs;
630
631 /** All known reftables, sorted. */
632 public final DfsReftable[] reftables;
633
634 private long lastModified = -1;
635
636 PackList(DfsPackFile[] packs, DfsReftable[] reftables) {
637 this.packs = packs;
638 this.reftables = reftables;
639 }
640
641 /** @return last modified time of all packs, in milliseconds. */
642 public long getLastModified() {
643 if (lastModified < 0) {
644 long max = 0;
645 for (DfsPackFile pack : packs) {
646 max = Math.max(max, pack.getPackDescription().getLastModified());
647 }
648 lastModified = max;
649 }
650 return lastModified;
651 }
652
653 abstract boolean dirty();
654 abstract void clearDirty();
655
656 /**
657 * Mark pack list as dirty.
658 * <p>
659 * Used when the caller knows that new data might have been written to the
660 * repository that could invalidate open readers depending on this pack list,
661 * for example if refs are newly scanned.
662 */
663 public abstract void markDirty();
664 }
665
666 private static final class PackListImpl extends PackList {
667 private volatile boolean dirty;
668
669 PackListImpl(DfsPackFile[] packs, DfsReftable[] reftables) {
670 super(packs, reftables);
671 }
672
673 @Override
674 boolean dirty() {
675 return dirty;
676 }
677
678 @Override
679 void clearDirty() {
680 dirty = false;
681 }
682
683 @Override
684 public void markDirty() {
685 dirty = true;
686 }
687 }
688 }