1 /*
2 * Copyright (C) 2017, Google Inc.
3 * and other copyright owners as documented in the project's IP log.
4 *
5 * This program and the accompanying materials are made available
6 * under the terms of the Eclipse Distribution License v1.0 which
7 * accompanies this distribution, is reproduced below, and is
8 * available at http://www.eclipse.org/org/documents/edl-v10.php
9 *
10 * All rights reserved.
11 *
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
14 * conditions are met:
15 *
16 * - Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials provided
22 * with the distribution.
23 *
24 * - Neither the name of the Eclipse Foundation, Inc. nor the
25 * names of its contributors may be used to endorse or promote
26 * products derived from this software without specific prior
27 * written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
30 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
31 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
32 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
34 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
36 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
37 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
38 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
39 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
41 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42 */
43
44 package org.eclipse.jgit.internal.storage.reftable;
45
46 import java.io.IOException;
47 import java.io.OutputStream;
48 import java.util.ArrayDeque;
49 import java.util.ArrayList;
50 import java.util.List;
51
52 import org.eclipse.jgit.internal.storage.reftable.ReftableWriter.Stats;
53 import org.eclipse.jgit.lib.PersonIdent;
54 import org.eclipse.jgit.lib.ReflogEntry;
55
56 /**
57 * Merges reftables and compacts them into a single output.
58 * <p>
59 * For a partial compaction callers should {@link #setIncludeDeletes(boolean)}
60 * to {@code true} to ensure the new reftable continues to use a delete marker
61 * to shadow any lower reftable that may have the reference present.
62 * <p>
63 * By default all log entries within the range defined by
64 * {@link #setMinUpdateIndex(long)} and {@link #setMaxUpdateIndex(long)} are
65 * copied, even if no references in the output file match the log records.
66 * Callers may truncate the log to a more recent time horizon with
67 * {@link #setOldestReflogTimeMillis(long)}, or disable the log altogether with
68 * {@code setOldestReflogTimeMillis(Long.MAX_VALUE)}.
69 */
70 public class ReftableCompactor {
71 private final ReftableWriter writer = new ReftableWriter();
72 private final ArrayDeque<Reftable> tables = new ArrayDeque<>();
73
74 private long compactBytesLimit;
75 private long bytesToCompact;
76 private boolean includeDeletes;
77 private long minUpdateIndex = -1;
78 private long maxUpdateIndex;
79 private long oldestReflogTimeMillis;
80 private Stats stats;
81
82 /**
83 * Set configuration for the reftable.
84 *
85 * @param cfg
86 * configuration for the reftable.
87 * @return {@code this}
88 */
89 public ReftableCompactor setConfig(ReftableConfig cfg) {
90 writer.setConfig(cfg);
91 return this;
92 }
93
94 /**
95 * Set limit on number of bytes from source tables to compact.
96 *
97 * @param bytes
98 * limit on number of bytes from source tables to compact.
99 * @return {@code this}
100 */
101 public ReftableCompactor setCompactBytesLimit(long bytes) {
102 compactBytesLimit = bytes;
103 return this;
104 }
105
106 /**
107 * Whether to include deletions in the output, which may be necessary for
108 * partial compaction.
109 *
110 * @param deletes
111 * {@code true} to include deletions in the output, which may be
112 * necessary for partial compaction.
113 * @return {@code this}
114 */
115 public ReftableCompactor setIncludeDeletes(boolean deletes) {
116 includeDeletes = deletes;
117 return this;
118 }
119
120 /**
121 * Set the minimum update index for log entries that appear in the compacted
122 * reftable.
123 *
124 * @param min
125 * the minimum update index for log entries that appear in the
126 * compacted reftable. This should be 1 higher than the prior
127 * reftable's {@code maxUpdateIndex} if this table will be used
128 * in a stack.
129 * @return {@code this}
130 */
131 public ReftableCompactor setMinUpdateIndex(long min) {
132 minUpdateIndex = min;
133 return this;
134 }
135
136 /**
137 * Set the maximum update index for log entries that appear in the compacted
138 * reftable.
139 *
140 * @param max
141 * the maximum update index for log entries that appear in the
142 * compacted reftable. This should be at least 1 higher than the
143 * prior reftable's {@code maxUpdateIndex} if this table will be
144 * used in a stack.
145 * @return {@code this}
146 */
147 public ReftableCompactor setMaxUpdateIndex(long max) {
148 maxUpdateIndex = max;
149 return this;
150 }
151
152 /**
153 * Set oldest reflog time to preserve.
154 *
155 * @param timeMillis
156 * oldest log time to preserve. Entries whose timestamps are
157 * {@code >= timeMillis} will be copied into the output file. Log
158 * entries that predate {@code timeMillis} will be discarded.
159 * Specified in Java standard milliseconds since the epoch.
160 * @return {@code this}
161 */
162 public ReftableCompactor setOldestReflogTimeMillis(long timeMillis) {
163 oldestReflogTimeMillis = timeMillis;
164 return this;
165 }
166
167 /**
168 * Add all of the tables, in the specified order.
169 * <p>
170 * Unconditionally adds all tables, ignoring the
171 * {@link #setCompactBytesLimit(long)}.
172 *
173 * @param readers
174 * tables to compact. Tables should be ordered oldest first/most
175 * recent last so that the more recent tables can shadow the
176 * older results. Caller is responsible for closing the readers.
177 * @throws java.io.IOException
178 * update indexes of a reader cannot be accessed.
179 */
180 public void addAll(List<? extends Reftable> readers) throws IOException {
181 tables.addAll(readers);
182 for (Reftable r : readers) {
183 if (r instanceof ReftableReader) {
184 adjustUpdateIndexes((ReftableReader) r);
185 }
186 }
187 }
188
189 /**
190 * Try to add this reader at the bottom of the stack.
191 * <p>
192 * A reader may be rejected by returning {@code false} if the compactor is
193 * already rewriting its {@link #setCompactBytesLimit(long)}. When this
194 * happens the caller should stop trying to add tables, and execute the
195 * compaction.
196 *
197 * @param reader
198 * the reader to insert at the bottom of the stack. Caller is
199 * responsible for closing the reader.
200 * @return {@code true} if the compactor accepted this table; {@code false}
201 * if the compactor has reached its limit.
202 * @throws java.io.IOException
203 * if size of {@code reader}, or its update indexes cannot be read.
204 */
205 public boolean tryAddFirst(ReftableReader reader) throws IOException {
206 long sz = reader.size();
207 if (compactBytesLimit > 0 && bytesToCompact + sz > compactBytesLimit) {
208 return false;
209 }
210 bytesToCompact += sz;
211 adjustUpdateIndexes(reader);
212 tables.addFirst(reader);
213 return true;
214 }
215
216 private void adjustUpdateIndexes(ReftableReader reader) throws IOException {
217 if (minUpdateIndex == -1) {
218 minUpdateIndex = reader.minUpdateIndex();
219 } else {
220 minUpdateIndex = Math.min(minUpdateIndex, reader.minUpdateIndex());
221 }
222 maxUpdateIndex = Math.max(maxUpdateIndex, reader.maxUpdateIndex());
223 }
224
225 /**
226 * Write a compaction to {@code out}.
227 *
228 * @param out
229 * stream to write the compacted tables to. Caller is responsible
230 * for closing {@code out}.
231 * @throws java.io.IOException
232 * if tables cannot be read, or cannot be written.
233 */
234 public void compact(OutputStream out) throws IOException {
235 MergedReftable mr = new MergedReftable(new ArrayList<>(tables));
236 mr.setIncludeDeletes(includeDeletes);
237
238 writer.setMinUpdateIndex(Math.max(minUpdateIndex, 0));
239 writer.setMaxUpdateIndex(maxUpdateIndex);
240 writer.begin(out);
241 mergeRefs(mr);
242 mergeLogs(mr);
243 writer.finish();
244 stats = writer.getStats();
245 }
246
247 /**
248 * Get statistics of the last written reftable.
249 *
250 * @return statistics of the last written reftable.
251 */
252 public Stats getStats() {
253 return stats;
254 }
255
256 private void mergeRefs(MergedReftable mr) throws IOException {
257 try (RefCursor rc = mr.allRefs()) {
258 while (rc.next()) {
259 writer.writeRef(rc.getRef(), rc.getUpdateIndex());
260 }
261 }
262 }
263
264 private void mergeLogs(MergedReftable mr) throws IOException {
265 if (oldestReflogTimeMillis == Long.MAX_VALUE) {
266 return;
267 }
268
269 try (LogCursor lc = mr.allLogs()) {
270 while (lc.next()) {
271 long updateIndex = lc.getUpdateIndex();
272 if (updateIndex < minUpdateIndex
273 || updateIndex > maxUpdateIndex) {
274 // Cannot merge log records outside the header's range.
275 continue;
276 }
277
278 String refName = lc.getRefName();
279 ReflogEntry log = lc.getReflogEntry();
280 if (log == null) {
281 if (includeDeletes) {
282 writer.deleteLog(refName, updateIndex);
283 }
284 continue;
285 }
286
287 PersonIdent who = log.getWho();
288 if (who.getWhen().getTime() >= oldestReflogTimeMillis) {
289 writer.writeLog(
290 refName,
291 updateIndex,
292 who,
293 log.getOldId(),
294 log.getNewId(),
295 log.getComment());
296 }
297 }
298 }
299 }
300 }