1 /*
2 * Copyright (C) 2011, Google Inc. and others
3 *
4 * This program and the accompanying materials are made available under the
5 * terms of the Eclipse Distribution License v. 1.0 which is available at
6 * https://www.eclipse.org/org/documents/edl-v10.php.
7 *
8 * SPDX-License-Identifier: BSD-3-Clause
9 */
10
11 package org.eclipse.jgit.internal.storage.dfs;
12
13 import static java.util.stream.Collectors.joining;
14
15 import java.io.FileNotFoundException;
16 import java.io.IOException;
17 import java.util.ArrayList;
18 import java.util.Arrays;
19 import java.util.Collection;
20 import java.util.Collections;
21 import java.util.Comparator;
22 import java.util.HashMap;
23 import java.util.HashSet;
24 import java.util.List;
25 import java.util.Map;
26 import java.util.Set;
27 import java.util.concurrent.atomic.AtomicReference;
28
29 import org.eclipse.jgit.internal.storage.pack.PackExt;
30 import org.eclipse.jgit.lib.AnyObjectId;
31 import org.eclipse.jgit.lib.ObjectDatabase;
32 import org.eclipse.jgit.lib.ObjectInserter;
33 import org.eclipse.jgit.lib.ObjectReader;
34
35 /**
36 * Manages objects stored in
37 * {@link org.eclipse.jgit.internal.storage.dfs.DfsPackFile} on a storage
38 * system.
39 */
40 public abstract class DfsObjDatabase extends ObjectDatabase {
41 private static final PackList NO_PACKS = new PackList(
42 new DfsPackFile[0],
43 new DfsReftable[0]) {
44 @Override
45 boolean dirty() {
46 return true;
47 }
48
49 @Override
50 void clearDirty() {
51 // Always dirty.
52 }
53
54 @Override
55 public void markDirty() {
56 // Always dirty.
57 }
58 };
59
60 /**
61 * Sources for a pack file.
62 * <p>
63 * <strong>Note:</strong> When sorting packs by source, do not use the default
64 * comparator based on {@link Enum#compareTo}. Prefer {@link
65 * #DEFAULT_COMPARATOR} or your own {@link ComparatorBuilder}.
66 */
67 public enum PackSource {
68 /** The pack is created by ObjectInserter due to local activity. */
69 INSERT,
70
71 /**
72 * The pack is created by PackParser due to a network event.
73 * <p>
74 * A received pack can be from either a push into the repository, or a
75 * fetch into the repository, the direction doesn't matter. A received
76 * pack was built by the remote Git implementation and may not match the
77 * storage layout preferred by this version. Received packs are likely
78 * to be either compacted or garbage collected in the future.
79 */
80 RECEIVE,
81
82 /**
83 * The pack was created by compacting multiple packs together.
84 * <p>
85 * Packs created by compacting multiple packs together aren't nearly as
86 * efficient as a fully garbage collected repository, but may save disk
87 * space by reducing redundant copies of base objects.
88 *
89 * @see DfsPackCompactor
90 */
91 COMPACT,
92
93 /**
94 * Pack was created by Git garbage collection by this implementation.
95 * <p>
96 * This source is only used by the {@link DfsGarbageCollector} when it
97 * builds a pack file by traversing the object graph and copying all
98 * reachable objects into a new pack stream.
99 *
100 * @see DfsGarbageCollector
101 */
102 GC,
103
104 /** Created from non-heads by {@link DfsGarbageCollector}. */
105 GC_REST,
106
107 /**
108 * Pack was created by Git garbage collection.
109 * <p>
110 * This pack contains only unreachable garbage that was found during the
111 * last GC pass. It is retained in a new pack until it is safe to prune
112 * these objects from the repository.
113 */
114 UNREACHABLE_GARBAGE;
115
116 /**
117 * Default comparator for sources.
118 * <p>
119 * Sorts generally newer, smaller types such as {@code INSERT} and {@code
120 * RECEIVE} earlier; older, larger types such as {@code GC} later; and
121 * {@code UNREACHABLE_GARBAGE} at the end.
122 */
123 public static final Comparator<PackSource> DEFAULT_COMPARATOR =
124 new ComparatorBuilder()
125 .add(INSERT, RECEIVE)
126 .add(COMPACT)
127 .add(GC)
128 .add(GC_REST)
129 .add(UNREACHABLE_GARBAGE)
130 .build();
131
132 /**
133 * Builder for describing {@link PackSource} ordering where some values are
134 * explicitly considered equal to others.
135 */
136 public static class ComparatorBuilder {
137 private final Map<PackSource, Integer> ranks = new HashMap<>();
138 private int counter;
139
140 /**
141 * Add a collection of sources that should sort as equal.
142 * <p>
143 * Sources in the input will sort after sources listed in previous calls
144 * to this method.
145 *
146 * @param sources
147 * sources in this equivalence class.
148 * @return this.
149 */
150 public ComparatorBuilder add(PackSource... sources) {
151 for (PackSource s : sources) {
152 ranks.put(s, Integer.valueOf(counter));
153 }
154 counter++;
155 return this;
156 }
157
158 /**
159 * Build the comparator.
160 *
161 * @return new comparator instance.
162 * @throws IllegalArgumentException
163 * not all {@link PackSource} instances were explicitly assigned
164 * an equivalence class.
165 */
166 public Comparator<PackSource> build() {
167 return new PackSourceComparator(ranks);
168 }
169 }
170
171 private static class PackSourceComparator implements Comparator<PackSource> {
172 private final Map<PackSource, Integer> ranks;
173
174 private PackSourceComparator(Map<PackSource, Integer> ranks) {
175 if (!ranks.keySet().equals(
176 new HashSet<>(Arrays.asList(PackSource.values())))) {
177 throw new IllegalArgumentException();
178 }
179 this.ranks = new HashMap<>(ranks);
180 }
181
182 @Override
183 public int compare(PackSource a, PackSource b) {
184 return ranks.get(a).compareTo(ranks.get(b));
185 }
186
187 @Override
188 public String toString() {
189 return Arrays.stream(PackSource.values())
190 .map(s -> s + "=" + ranks.get(s)) //$NON-NLS-1$
191 .collect(joining(", ", getClass().getSimpleName() + "{", "}")); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
192 }
193 }
194 }
195
196 private final AtomicReference<PackList> packList;
197
198 private final DfsRepository repository;
199
200 private DfsReaderOptions readerOptions;
201
202 private Comparator<DfsPackDescription> packComparator;
203
204 /**
205 * Initialize an object database for our repository.
206 *
207 * @param repository
208 * repository owning this object database.
209 * @param options
210 * how readers should access the object database.
211 */
212 protected DfsObjDatabase(DfsRepository repository,
213 DfsReaderOptions options) {
214 this.repository = repository;
215 this.packList = new AtomicReference<>(NO_PACKS);
216 this.readerOptions = options;
217 this.packComparator = DfsPackDescription.objectLookupComparator();
218 }
219
220 /**
221 * Get configured reader options, such as read-ahead.
222 *
223 * @return configured reader options, such as read-ahead.
224 */
225 public DfsReaderOptions getReaderOptions() {
226 return readerOptions;
227 }
228
229 /**
230 * Set the comparator used when searching for objects across packs.
231 * <p>
232 * An optimal comparator will find more objects without having to load large
233 * idx files from storage only to find that they don't contain the object.
234 * See {@link DfsPackDescription#objectLookupComparator()} for the default
235 * heuristics.
236 *
237 * @param packComparator
238 * comparator.
239 */
240 public void setPackComparator(Comparator<DfsPackDescription> packComparator) {
241 this.packComparator = packComparator;
242 }
243
244 /** {@inheritDoc} */
245 @Override
246 public DfsReader newReader() {
247 return new DfsReader(this);
248 }
249
250 /** {@inheritDoc} */
251 @Override
252 public ObjectInserter newInserter() {
253 return new DfsInserter(this);
254 }
255
256 /**
257 * Scan and list all available pack files in the repository.
258 *
259 * @return list of available packs. The returned array is shared with the
260 * implementation and must not be modified by the caller.
261 * @throws java.io.IOException
262 * the pack list cannot be initialized.
263 */
264 public DfsPackFile[] getPacks() throws IOException {
265 return getPackList().packs;
266 }
267
268 /**
269 * Scan and list all available reftable files in the repository.
270 *
271 * @return list of available reftables. The returned array is shared with
272 * the implementation and must not be modified by the caller.
273 * @throws java.io.IOException
274 * the pack list cannot be initialized.
275 */
276 public DfsReftable[] getReftables() throws IOException {
277 return getPackList().reftables;
278 }
279
280 /**
281 * Scan and list all available pack files in the repository.
282 *
283 * @return list of available packs, with some additional metadata. The
284 * returned array is shared with the implementation and must not be
285 * modified by the caller.
286 * @throws java.io.IOException
287 * the pack list cannot be initialized.
288 */
289 public PackList getPackList() throws IOException {
290 return scanPacks(NO_PACKS);
291 }
292
293 /**
294 * Get repository owning this object database.
295 *
296 * @return repository owning this object database.
297 */
298 protected DfsRepository getRepository() {
299 return repository;
300 }
301
302 /**
303 * List currently known pack files in the repository, without scanning.
304 *
305 * @return list of available packs. The returned array is shared with the
306 * implementation and must not be modified by the caller.
307 */
308 public DfsPackFile[] getCurrentPacks() {
309 return getCurrentPackList().packs;
310 }
311
312 /**
313 * List currently known reftable files in the repository, without scanning.
314 *
315 * @return list of available reftables. The returned array is shared with
316 * the implementation and must not be modified by the caller.
317 */
318 public DfsReftable[] getCurrentReftables() {
319 return getCurrentPackList().reftables;
320 }
321
322 /**
323 * List currently known pack files in the repository, without scanning.
324 *
325 * @return list of available packs, with some additional metadata. The
326 * returned array is shared with the implementation and must not be
327 * modified by the caller.
328 */
329 public PackList getCurrentPackList() {
330 return packList.get();
331 }
332
333 /**
334 * Does the requested object exist in this database?
335 * <p>
336 * This differs from ObjectDatabase's implementation in that we can selectively
337 * ignore unreachable (garbage) objects.
338 *
339 * @param objectId
340 * identity of the object to test for existence of.
341 * @param avoidUnreachableObjects
342 * if true, ignore objects that are unreachable.
343 * @return true if the specified object is stored in this database.
344 * @throws java.io.IOException
345 * the object store cannot be accessed.
346 */
347 public boolean has(AnyObjectId objectId, boolean avoidUnreachableObjects)
348 throws IOException {
349 try (ObjectReader or = newReader()) {
350 or.setAvoidUnreachableObjects(avoidUnreachableObjects);
351 return or.has(objectId);
352 }
353 }
354
355 /**
356 * Generate a new unique name for a pack file.
357 *
358 * @param source
359 * where the pack stream is created.
360 * @return a unique name for the pack file. Must not collide with any other
361 * pack file name in the same DFS.
362 * @throws java.io.IOException
363 * a new unique pack description cannot be generated.
364 */
365 protected abstract DfsPackDescription newPack(PackSource source)
366 throws IOException;
367
368 /**
369 * Generate a new unique name for a pack file.
370 *
371 * <p>
372 * Default implementation of this method would be equivalent to
373 * {@code newPack(source).setEstimatedPackSize(estimatedPackSize)}. But the
374 * clients can override this method to use the given
375 * {@code estomatedPackSize} value more efficiently in the process of
376 * creating a new
377 * {@link org.eclipse.jgit.internal.storage.dfs.DfsPackDescription} object.
378 *
379 * @param source
380 * where the pack stream is created.
381 * @param estimatedPackSize
382 * the estimated size of the pack.
383 * @return a unique name for the pack file. Must not collide with any other
384 * pack file name in the same DFS.
385 * @throws java.io.IOException
386 * a new unique pack description cannot be generated.
387 */
388 protected DfsPackDescription newPack(PackSource source,
389 long estimatedPackSize) throws IOException {
390 DfsPackDescription pack = newPack(source);
391 pack.setEstimatedPackSize(estimatedPackSize);
392 return pack;
393 }
394
395 /**
396 * Commit a pack and index pair that was written to the DFS.
397 * <p>
398 * Committing the pack/index pair makes them visible to readers. The JGit
399 * DFS code always writes the pack, then the index. This allows a simple
400 * commit process to do nothing if readers always look for both files to
401 * exist and the DFS performs atomic creation of the file (e.g. stream to a
402 * temporary file and rename to target on close).
403 * <p>
404 * During pack compaction or GC the new pack file may be replacing other
405 * older files. Implementations should remove those older files (if any) as
406 * part of the commit of the new file.
407 * <p>
408 * This method is a trivial wrapper around
409 * {@link #commitPackImpl(Collection, Collection)} that calls the
410 * implementation and fires events.
411 *
412 * @param desc
413 * description of the new packs.
414 * @param replaces
415 * if not null, list of packs to remove.
416 * @throws java.io.IOException
417 * the packs cannot be committed. On failure a rollback must
418 * also be attempted by the caller.
419 */
420 protected void commitPack(Collection<DfsPackDescription> desc,
421 Collection<DfsPackDescription> replaces) throws IOException {
422 commitPackImpl(desc, replaces);
423 getRepository().fireEvent(new DfsPacksChangedEvent());
424 }
425
426 /**
427 * Implementation of pack commit.
428 *
429 * @see #commitPack(Collection, Collection)
430 * @param desc
431 * description of the new packs.
432 * @param replaces
433 * if not null, list of packs to remove.
434 * @throws java.io.IOException
435 * the packs cannot be committed.
436 */
437 protected abstract void commitPackImpl(Collection<DfsPackDescription> desc,
438 Collection<DfsPackDescription> replaces) throws IOException;
439
440 /**
441 * Try to rollback a pack creation.
442 * <p>
443 * JGit DFS always writes the pack first, then the index. If the pack does
444 * not yet exist, then neither does the index. A safe DFS implementation
445 * would try to remove both files to ensure they are really gone.
446 * <p>
447 * A rollback does not support failures, as it only occurs when there is
448 * already a failure in progress. A DFS implementor may wish to log
449 * warnings/error messages when a rollback fails, but should not send new
450 * exceptions up the Java callstack.
451 *
452 * @param desc
453 * pack to delete.
454 */
455 protected abstract void rollbackPack(Collection<DfsPackDescription> desc);
456
457 /**
458 * List the available pack files.
459 * <p>
460 * The returned list must support random access and must be mutable by the
461 * caller. It is sorted in place using the natural sorting of the returned
462 * DfsPackDescription objects.
463 *
464 * @return available packs. May be empty if there are no packs.
465 * @throws java.io.IOException
466 * the packs cannot be listed and the object database is not
467 * functional to the caller.
468 */
469 protected abstract List<DfsPackDescription> listPacks() throws IOException;
470
471 /**
472 * Open a pack, pack index, or other related file for reading.
473 *
474 * @param desc
475 * description of pack related to the data that will be read.
476 * This is an instance previously obtained from
477 * {@link #listPacks()}, but not necessarily from the same
478 * DfsObjDatabase instance.
479 * @param ext
480 * file extension that will be read i.e "pack" or "idx".
481 * @return channel to read the file.
482 * @throws java.io.FileNotFoundException
483 * the file does not exist.
484 * @throws java.io.IOException
485 * the file cannot be opened.
486 */
487 protected abstract ReadableChannel openFile(
488 DfsPackDescription desc, PackExt ext)
489 throws FileNotFoundException, IOException;
490
491 /**
492 * Open a pack, pack index, or other related file for writing.
493 *
494 * @param desc
495 * description of pack related to the data that will be written.
496 * This is an instance previously obtained from
497 * {@link #newPack(PackSource)}.
498 * @param ext
499 * file extension that will be written i.e "pack" or "idx".
500 * @return channel to write the file.
501 * @throws java.io.IOException
502 * the file cannot be opened.
503 */
504 protected abstract DfsOutputStream writeFile(
505 DfsPackDescription desc, PackExt ext) throws IOException;
506
507 void addPack(DfsPackFile newPack) throws IOException {
508 PackList o, n;
509 do {
510 o = packList.get();
511 if (o == NO_PACKS) {
512 // The repository may not have needed any existing objects to
513 // complete the current task of creating a pack (e.g. push of a
514 // pack with no external deltas). Because we don't scan for
515 // newly added packs on missed object lookups, scan now to
516 // make sure all older packs are available in the packList.
517 o = scanPacks(o);
518
519 // Its possible the scan identified the pack we were asked to
520 // add, as the pack was already committed via commitPack().
521 // If this is the case return without changing the list.
522 for (DfsPackFile p : o.packs) {
523 if (p.key.equals(newPack.key)) {
524 return;
525 }
526 }
527 }
528
529 DfsPackFile[] packs = new DfsPackFile[1 + o.packs.length];
530 packs[0] = newPack;
531 System.arraycopy(o.packs, 0, packs, 1, o.packs.length);
532 n = new PackListImpl(packs, o.reftables);
533 } while (!packList.compareAndSet(o, n));
534 }
535
536 void addReftable(DfsPackDescription add, Set<DfsPackDescription> remove)
537 throws IOException {
538 PackList o, n;
539 do {
540 o = packList.get();
541 if (o == NO_PACKS) {
542 o = scanPacks(o);
543 for (DfsReftable t : o.reftables) {
544 if (t.getPackDescription().equals(add)) {
545 return;
546 }
547 }
548 }
549
550 List<DfsReftable> tables = new ArrayList<>(1 + o.reftables.length);
551 for (DfsReftable t : o.reftables) {
552 if (!remove.contains(t.getPackDescription())) {
553 tables.add(t);
554 }
555 }
556 tables.add(new DfsReftable(add));
557 n = new PackListImpl(o.packs, tables.toArray(new DfsReftable[0]));
558 } while (!packList.compareAndSet(o, n));
559 }
560
561 PackList scanPacks(PackList original) throws IOException {
562 PackList o, n;
563 synchronized (packList) {
564 do {
565 o = packList.get();
566 if (o != original) {
567 // Another thread did the scan for us, while we
568 // were blocked on the monitor above.
569 //
570 return o;
571 }
572 n = scanPacksImpl(o);
573 if (n == o)
574 return n;
575 } while (!packList.compareAndSet(o, n));
576 }
577 getRepository().fireEvent(new DfsPacksChangedEvent());
578 return n;
579 }
580
581 private PackList scanPacksImpl(PackList old) throws IOException {
582 DfsBlockCache cache = DfsBlockCache.getInstance();
583 Map<DfsPackDescription, DfsPackFile> packs = packMap(old);
584 Map<DfsPackDescription, DfsReftable> reftables = reftableMap(old);
585
586 List<DfsPackDescription> scanned = listPacks();
587 Collections.sort(scanned, packComparator);
588
589 List<DfsPackFile> newPacks = new ArrayList<>(scanned.size());
590 List<DfsReftable> newReftables = new ArrayList<>(scanned.size());
591 boolean foundNew = false;
592 for (DfsPackDescription dsc : scanned) {
593 DfsPackFile oldPack = packs.remove(dsc);
594 if (oldPack != null) {
595 newPacks.add(oldPack);
596 } else if (dsc.hasFileExt(PackExt.PACK)) {
597 newPacks.add(new DfsPackFile(cache, dsc));
598 foundNew = true;
599 }
600
601 DfsReftable oldReftable = reftables.remove(dsc);
602 if (oldReftable != null) {
603 newReftables.add(oldReftable);
604 } else if (dsc.hasFileExt(PackExt.REFTABLE)) {
605 newReftables.add(new DfsReftable(cache, dsc));
606 foundNew = true;
607 }
608 }
609
610 if (newPacks.isEmpty() && newReftables.isEmpty())
611 return new PackListImpl(NO_PACKS.packs, NO_PACKS.reftables);
612 if (!foundNew) {
613 old.clearDirty();
614 return old;
615 }
616 Collections.sort(newReftables, reftableComparator());
617 return new PackListImpl(
618 newPacks.toArray(new DfsPackFile[0]),
619 newReftables.toArray(new DfsReftable[0]));
620 }
621
622 private static Map<DfsPackDescription, DfsPackFile> packMap(PackList old) {
623 Map<DfsPackDescription, DfsPackFile> forReuse = new HashMap<>();
624 for (DfsPackFile p : old.packs) {
625 if (!p.invalid()) {
626 forReuse.put(p.desc, p);
627 }
628 }
629 return forReuse;
630 }
631
632 private static Map<DfsPackDescription, DfsReftable> reftableMap(PackList old) {
633 Map<DfsPackDescription, DfsReftable> forReuse = new HashMap<>();
634 for (DfsReftable p : old.reftables) {
635 if (!p.invalid()) {
636 forReuse.put(p.desc, p);
637 }
638 }
639 return forReuse;
640 }
641
642 /**
643 * Get comparator to sort {@link DfsReftable} by priority.
644 *
645 * @return comparator to sort {@link DfsReftable} by priority.
646 */
647 protected Comparator<DfsReftable> reftableComparator() {
648 return Comparator.comparing(
649 DfsReftable::getPackDescription,
650 DfsPackDescription.reftableComparator());
651 }
652
653 /**
654 * Clears the cached list of packs, forcing them to be scanned again.
655 */
656 protected void clearCache() {
657 packList.set(NO_PACKS);
658 }
659
660 /** {@inheritDoc} */
661 @Override
662 public void close() {
663 packList.set(NO_PACKS);
664 }
665
666 /** Snapshot of packs scanned in a single pass. */
667 public abstract static class PackList {
668 /** All known packs, sorted. */
669 public final DfsPackFile[] packs;
670
671 /** All known reftables, sorted. */
672 public final DfsReftable[] reftables;
673
674 private long lastModified = -1;
675
676 PackList(DfsPackFile[] packs, DfsReftable[] reftables) {
677 this.packs = packs;
678 this.reftables = reftables;
679 }
680
681 /** @return last modified time of all packs, in milliseconds. */
682 public long getLastModified() {
683 if (lastModified < 0) {
684 long max = 0;
685 for (DfsPackFile pack : packs) {
686 max = Math.max(max, pack.getPackDescription().getLastModified());
687 }
688 lastModified = max;
689 }
690 return lastModified;
691 }
692
693 abstract boolean dirty();
694 abstract void clearDirty();
695
696 /**
697 * Mark pack list as dirty.
698 * <p>
699 * Used when the caller knows that new data might have been written to the
700 * repository that could invalidate open readers depending on this pack list,
701 * for example if refs are newly scanned.
702 */
703 public abstract void markDirty();
704 }
705
706 private static final class PackListImpl extends PackList {
707 private volatile boolean dirty;
708
709 PackListImpl(DfsPackFile[] packs, DfsReftable[] reftables) {
710 super(packs, reftables);
711 }
712
713 @Override
714 boolean dirty() {
715 return dirty;
716 }
717
718 @Override
719 void clearDirty() {
720 dirty = false;
721 }
722
723 @Override
724 public void markDirty() {
725 dirty = true;
726 }
727 }
728 }