1 /* 2 * Copyright (C) 2017, Google Inc. 3 * and other copyright owners as documented in the project's IP log. 4 * 5 * This program and the accompanying materials are made available 6 * under the terms of the Eclipse Distribution License v1.0 which 7 * accompanies this distribution, is reproduced below, and is 8 * available at http://www.eclipse.org/org/documents/edl-v10.php 9 * 10 * All rights reserved. 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials provided 22 * with the distribution. 23 * 24 * - Neither the name of the Eclipse Foundation, Inc. nor the 25 * names of its contributors may be used to endorse or promote 26 * products derived from this software without specific prior 27 * written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND 30 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 31 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 32 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 33 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 34 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 35 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 36 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 37 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 38 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 39 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 40 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 41 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 42 */ 43 44 package org.eclipse.jgit.internal.storage.reftable; 45 46 import java.io.IOException; 47 import java.io.OutputStream; 48 import java.util.ArrayDeque; 49 import java.util.ArrayList; 50 import java.util.List; 51 52 import org.eclipse.jgit.internal.storage.reftable.ReftableWriter.Stats; 53 import org.eclipse.jgit.lib.PersonIdent; 54 import org.eclipse.jgit.lib.ReflogEntry; 55 56 /** 57 * Merges reftables and compacts them into a single output. 58 * <p> 59 * For a partial compaction callers should {@link #setIncludeDeletes(boolean)} 60 * to {@code true} to ensure the new reftable continues to use a delete marker 61 * to shadow any lower reftable that may have the reference present. 62 * <p> 63 * By default all log entries within the range defined by 64 * {@link #setMinUpdateIndex(long)} and {@link #setMaxUpdateIndex(long)} are 65 * copied, even if no references in the output file match the log records. 66 * Callers may truncate the log to a more recent time horizon with 67 * {@link #setOldestReflogTimeMillis(long)}, or disable the log altogether with 68 * {@code setOldestReflogTimeMillis(Long.MAX_VALUE)}. 69 */ 70 public class ReftableCompactor { 71 private final ReftableWriter writer = new ReftableWriter(); 72 private final ArrayDeque<Reftable> tables = new ArrayDeque<>(); 73 74 private long compactBytesLimit; 75 private long bytesToCompact; 76 private boolean includeDeletes; 77 private long minUpdateIndex = -1; 78 private long maxUpdateIndex; 79 private long oldestReflogTimeMillis; 80 private Stats stats; 81 82 /** 83 * Set configuration for the reftable. 84 * 85 * @param cfg 86 * configuration for the reftable. 87 * @return {@code this} 88 */ 89 public ReftableCompactor setConfig(ReftableConfig cfg) { 90 writer.setConfig(cfg); 91 return this; 92 } 93 94 /** 95 * Set limit on number of bytes from source tables to compact. 96 * 97 * @param bytes 98 * limit on number of bytes from source tables to compact. 99 * @return {@code this} 100 */ 101 public ReftableCompactor setCompactBytesLimit(long bytes) { 102 compactBytesLimit = bytes; 103 return this; 104 } 105 106 /** 107 * Whether to include deletions in the output, which may be necessary for 108 * partial compaction. 109 * 110 * @param deletes 111 * {@code true} to include deletions in the output, which may be 112 * necessary for partial compaction. 113 * @return {@code this} 114 */ 115 public ReftableCompactor setIncludeDeletes(boolean deletes) { 116 includeDeletes = deletes; 117 return this; 118 } 119 120 /** 121 * Set the minimum update index for log entries that appear in the compacted 122 * reftable. 123 * 124 * @param min 125 * the minimum update index for log entries that appear in the 126 * compacted reftable. This should be 1 higher than the prior 127 * reftable's {@code maxUpdateIndex} if this table will be used 128 * in a stack. 129 * @return {@code this} 130 */ 131 public ReftableCompactor setMinUpdateIndex(long min) { 132 minUpdateIndex = min; 133 return this; 134 } 135 136 /** 137 * Set the maximum update index for log entries that appear in the compacted 138 * reftable. 139 * 140 * @param max 141 * the maximum update index for log entries that appear in the 142 * compacted reftable. This should be at least 1 higher than the 143 * prior reftable's {@code maxUpdateIndex} if this table will be 144 * used in a stack. 145 * @return {@code this} 146 */ 147 public ReftableCompactor setMaxUpdateIndex(long max) { 148 maxUpdateIndex = max; 149 return this; 150 } 151 152 /** 153 * Set oldest reflog time to preserve. 154 * 155 * @param timeMillis 156 * oldest log time to preserve. Entries whose timestamps are 157 * {@code >= timeMillis} will be copied into the output file. Log 158 * entries that predate {@code timeMillis} will be discarded. 159 * Specified in Java standard milliseconds since the epoch. 160 * @return {@code this} 161 */ 162 public ReftableCompactor setOldestReflogTimeMillis(long timeMillis) { 163 oldestReflogTimeMillis = timeMillis; 164 return this; 165 } 166 167 /** 168 * Add all of the tables, in the specified order. 169 * <p> 170 * Unconditionally adds all tables, ignoring the 171 * {@link #setCompactBytesLimit(long)}. 172 * 173 * @param readers 174 * tables to compact. Tables should be ordered oldest first/most 175 * recent last so that the more recent tables can shadow the 176 * older results. Caller is responsible for closing the readers. 177 * @throws java.io.IOException 178 * update indexes of a reader cannot be accessed. 179 */ 180 public void addAll(List<? extends Reftable> readers) throws IOException { 181 tables.addAll(readers); 182 for (Reftable r : readers) { 183 if (r instanceof ReftableReader) { 184 adjustUpdateIndexes((ReftableReader) r); 185 } 186 } 187 } 188 189 /** 190 * Try to add this reader at the bottom of the stack. 191 * <p> 192 * A reader may be rejected by returning {@code false} if the compactor is 193 * already rewriting its {@link #setCompactBytesLimit(long)}. When this 194 * happens the caller should stop trying to add tables, and execute the 195 * compaction. 196 * 197 * @param reader 198 * the reader to insert at the bottom of the stack. Caller is 199 * responsible for closing the reader. 200 * @return {@code true} if the compactor accepted this table; {@code false} 201 * if the compactor has reached its limit. 202 * @throws java.io.IOException 203 * if size of {@code reader}, or its update indexes cannot be read. 204 */ 205 public boolean tryAddFirst(ReftableReader reader) throws IOException { 206 long sz = reader.size(); 207 if (compactBytesLimit > 0 && bytesToCompact + sz > compactBytesLimit) { 208 return false; 209 } 210 bytesToCompact += sz; 211 adjustUpdateIndexes(reader); 212 tables.addFirst(reader); 213 return true; 214 } 215 216 private void adjustUpdateIndexes(ReftableReader reader) throws IOException { 217 if (minUpdateIndex == -1) { 218 minUpdateIndex = reader.minUpdateIndex(); 219 } else { 220 minUpdateIndex = Math.min(minUpdateIndex, reader.minUpdateIndex()); 221 } 222 maxUpdateIndex = Math.max(maxUpdateIndex, reader.maxUpdateIndex()); 223 } 224 225 /** 226 * Write a compaction to {@code out}. 227 * 228 * @param out 229 * stream to write the compacted tables to. Caller is responsible 230 * for closing {@code out}. 231 * @throws java.io.IOException 232 * if tables cannot be read, or cannot be written. 233 */ 234 public void compact(OutputStream out) throws IOException { 235 MergedReftable mr = new MergedReftable(new ArrayList<>(tables)); 236 mr.setIncludeDeletes(includeDeletes); 237 238 writer.setMinUpdateIndex(Math.max(minUpdateIndex, 0)); 239 writer.setMaxUpdateIndex(maxUpdateIndex); 240 writer.begin(out); 241 mergeRefs(mr); 242 mergeLogs(mr); 243 writer.finish(); 244 stats = writer.getStats(); 245 } 246 247 /** 248 * Get statistics of the last written reftable. 249 * 250 * @return statistics of the last written reftable. 251 */ 252 public Stats getStats() { 253 return stats; 254 } 255 256 private void mergeRefs(MergedReftable mr) throws IOException { 257 try (RefCursor rc = mr.allRefs()) { 258 while (rc.next()) { 259 writer.writeRef(rc.getRef(), rc.getUpdateIndex()); 260 } 261 } 262 } 263 264 private void mergeLogs(MergedReftable mr) throws IOException { 265 if (oldestReflogTimeMillis == Long.MAX_VALUE) { 266 return; 267 } 268 269 try (LogCursor lc = mr.allLogs()) { 270 while (lc.next()) { 271 long updateIndex = lc.getUpdateIndex(); 272 if (updateIndex < minUpdateIndex 273 || updateIndex > maxUpdateIndex) { 274 // Cannot merge log records outside the header's range. 275 continue; 276 } 277 278 String refName = lc.getRefName(); 279 ReflogEntry log = lc.getReflogEntry(); 280 if (log == null) { 281 if (includeDeletes) { 282 writer.deleteLog(refName, updateIndex); 283 } 284 continue; 285 } 286 287 PersonIdent who = log.getWho(); 288 if (who.getWhen().getTime() >= oldestReflogTimeMillis) { 289 writer.writeLog( 290 refName, 291 updateIndex, 292 who, 293 log.getOldId(), 294 log.getNewId(), 295 log.getComment()); 296 } 297 } 298 } 299 } 300 }