1 /* 2 * Copyright (C) 2008-2009, Google Inc. 3 * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org> 4 * and other copyright owners as documented in the project's IP log. 5 * 6 * This program and the accompanying materials are made available 7 * under the terms of the Eclipse Distribution License v1.0 which 8 * accompanies this distribution, is reproduced below, and is 9 * available at http://www.eclipse.org/org/documents/edl-v10.php 10 * 11 * All rights reserved. 12 * 13 * Redistribution and use in source and binary forms, with or 14 * without modification, are permitted provided that the following 15 * conditions are met: 16 * 17 * - Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials provided 23 * with the distribution. 24 * 25 * - Neither the name of the Eclipse Foundation, Inc. nor the 26 * names of its contributors may be used to endorse or promote 27 * products derived from this software without specific prior 28 * written permission. 29 * 30 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND 31 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 32 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 33 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 34 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 35 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 36 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 37 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 38 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 39 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 40 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 41 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 42 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 43 */ 44 45 package org.eclipse.jgit.revwalk; 46 47 import static java.nio.charset.StandardCharsets.UTF_8; 48 49 import java.io.IOException; 50 import java.nio.charset.Charset; 51 import java.nio.charset.IllegalCharsetNameException; 52 import java.nio.charset.UnsupportedCharsetException; 53 import java.util.ArrayList; 54 import java.util.Collections; 55 import java.util.List; 56 57 import org.eclipse.jgit.annotations.Nullable; 58 import org.eclipse.jgit.errors.IncorrectObjectTypeException; 59 import org.eclipse.jgit.errors.MissingObjectException; 60 import org.eclipse.jgit.lib.AnyObjectId; 61 import org.eclipse.jgit.lib.Constants; 62 import org.eclipse.jgit.lib.MutableObjectId; 63 import org.eclipse.jgit.lib.ObjectInserter; 64 import org.eclipse.jgit.lib.ObjectReader; 65 import org.eclipse.jgit.lib.PersonIdent; 66 import org.eclipse.jgit.util.RawParseUtils; 67 import org.eclipse.jgit.util.StringUtils; 68 69 /** A commit reference to a commit in the DAG. */ 70 public class RevCommit extends RevObject { 71 private static final int STACK_DEPTH = 500; 72 73 /** 74 * Parse a commit from its canonical format. 75 * 76 * This method constructs a temporary revision pool, parses the commit as 77 * supplied, and returns it to the caller. Since the commit was built inside 78 * of a private revision pool its parent pointers will be initialized, but 79 * will not have their headers loaded. 80 * 81 * Applications are discouraged from using this API. Callers usually need 82 * more than one commit. Use {@link RevWalk#parseCommit(AnyObjectId)} to 83 * obtain a RevCommit from an existing repository. 84 * 85 * @param raw 86 * the canonical formatted commit to be parsed. 87 * @return the parsed commit, in an isolated revision pool that is not 88 * available to the caller. 89 */ 90 public static RevCommit parse(byte[] raw) { 91 try { 92 return parse(new RevWalk((ObjectReader) null), raw); 93 } catch (IOException ex) { 94 throw new RuntimeException(ex); 95 } 96 } 97 98 /** 99 * Parse a commit from its canonical format. 100 * <p> 101 * This method inserts the commit directly into the caller supplied revision 102 * pool, making it appear as though the commit exists in the repository, 103 * even if it doesn't. The repository under the pool is not affected. 104 * <p> 105 * The body of the commit (message, author, committer) is always retained in 106 * the returned {@code RevCommit}, even if the supplied {@code RevWalk} has 107 * been configured with {@code setRetainBody(false)}. 108 * 109 * @param rw 110 * the revision pool to allocate the commit within. The commit's 111 * tree and parent pointers will be obtained from this pool. 112 * @param raw 113 * the canonical formatted commit to be parsed. This buffer will 114 * be retained by the returned {@code RevCommit} and must not be 115 * modified by the caller. 116 * @return the parsed commit, in an isolated revision pool that is not 117 * available to the caller. 118 * @throws IOException 119 * in case of RevWalk initialization fails 120 */ 121 public static RevCommit parse(RevWalk rw, byte[] raw) throws IOException { 122 try (ObjectInserter.Formatter fmt = new ObjectInserter.Formatter()) { 123 RevCommit r = rw.lookupCommit(fmt.idFor(Constants.OBJ_COMMIT, raw)); 124 r.parseCanonical(rw, raw); 125 r.buffer = raw; 126 return r; 127 } 128 } 129 130 static final RevCommit[] NO_PARENTS = {}; 131 132 private RevTree tree; 133 134 RevCommit[] parents; 135 136 int commitTime; // An int here for performance, overflows in 2038 137 138 int inDegree; 139 140 private byte[] buffer; 141 142 /** 143 * Create a new commit reference. 144 * 145 * @param id 146 * object name for the commit. 147 */ 148 protected RevCommit(final AnyObjectId id) { 149 super(id); 150 } 151 152 @Override 153 void parseHeaders(final RevWalk walk) throws MissingObjectException, 154 IncorrectObjectTypeException, IOException { 155 parseCanonical(walk, walk.getCachedBytes(this)); 156 } 157 158 @Override 159 void parseBody(final RevWalk walk) throws MissingObjectException, 160 IncorrectObjectTypeException, IOException { 161 if (buffer == null) { 162 buffer = walk.getCachedBytes(this); 163 if ((flags & PARSED) == 0) 164 parseCanonical(walk, buffer); 165 } 166 } 167 168 void parseCanonical(final RevWalk walk, final byte[] raw) 169 throws IOException { 170 if (!walk.shallowCommitsInitialized) 171 walk.initializeShallowCommits(); 172 173 final MutableObjectId idBuffer = walk.idBuffer; 174 idBuffer.fromString(raw, 5); 175 tree = walk.lookupTree(idBuffer); 176 177 int ptr = 46; 178 if (parents == null) { 179 RevCommit[] pList = new RevCommit[1]; 180 int nParents = 0; 181 for (;;) { 182 if (raw[ptr] != 'p') 183 break; 184 idBuffer.fromString(raw, ptr + 7); 185 final RevCommit p = walk.lookupCommit(idBuffer); 186 if (nParents == 0) 187 pList[nParents++] = p; 188 else if (nParents == 1) { 189 pList = new RevCommit[] { pList[0], p }; 190 nParents = 2; 191 } else { 192 if (pList.length <= nParents) { 193 RevCommit[] old = pList; 194 pList = new RevCommit[pList.length + 32]; 195 System.arraycopy(old, 0, pList, 0, nParents); 196 } 197 pList[nParents++] = p; 198 } 199 ptr += 48; 200 } 201 if (nParents != pList.length) { 202 RevCommit[] old = pList; 203 pList = new RevCommit[nParents]; 204 System.arraycopy(old, 0, pList, 0, nParents); 205 } 206 parents = pList; 207 } 208 209 // extract time from "committer " 210 ptr = RawParseUtils.committer(raw, ptr); 211 if (ptr > 0) { 212 ptr = RawParseUtils.nextLF(raw, ptr, '>'); 213 214 // In 2038 commitTime will overflow unless it is changed to long. 215 commitTime = RawParseUtils.parseBase10(raw, ptr, null); 216 } 217 218 if (walk.isRetainBody()) 219 buffer = raw; 220 flags |= PARSED; 221 } 222 223 @Override 224 public final int getType() { 225 return Constants.OBJ_COMMIT; 226 } 227 228 static void carryFlags(RevCommit c, int carry) { 229 FIFORevQueue q = carryFlags1(c, carry, 0); 230 if (q != null) 231 slowCarryFlags(q, carry); 232 } 233 234 private static FIFORevQueue carryFlags1(RevCommit c, int carry, int depth) { 235 for(;;) { 236 RevCommit[] pList = c.parents; 237 if (pList == null || pList.length == 0) 238 return null; 239 if (pList.length != 1) { 240 if (depth == STACK_DEPTH) 241 return defer(c); 242 for (int i = 1; i < pList.length; i++) { 243 RevCommit p = pList[i]; 244 if ((p.flags & carry) == carry) 245 continue; 246 p.flags |= carry; 247 FIFORevQueue q = carryFlags1(p, carry, depth + 1); 248 if (q != null) 249 return defer(q, carry, pList, i + 1); 250 } 251 } 252 253 c = pList[0]; 254 if ((c.flags & carry) == carry) 255 return null; 256 c.flags |= carry; 257 } 258 } 259 260 private static FIFORevQueue defer(RevCommit c) { 261 FIFORevQueue q = new FIFORevQueue(); 262 q.add(c); 263 return q; 264 } 265 266 private static FIFORevQueue defer(FIFORevQueue q, int carry, 267 RevCommit[] pList, int i) { 268 // In normal case the caller will run pList[0] in a tail recursive 269 // fashion by updating the variable. However the caller is unwinding 270 // the stack and will skip that pList[0] execution step. 271 carryOneStep(q, carry, pList[0]); 272 273 // Remaining parents (if any) need to have flags checked and be 274 // enqueued if they have ancestors. 275 for (; i < pList.length; i++) 276 carryOneStep(q, carry, pList[i]); 277 return q; 278 } 279 280 private static void slowCarryFlags(FIFORevQueue q, int carry) { 281 // Commits in q have non-null parent arrays and have set all 282 // flags in carry. This loop finishes copying over the graph. 283 for (RevCommit c; (c = q.next()) != null;) { 284 for (RevCommit p : c.parents) 285 carryOneStep(q, carry, p); 286 } 287 } 288 289 private static void carryOneStep(FIFORevQueue q, int carry, RevCommit c) { 290 if ((c.flags & carry) != carry) { 291 c.flags |= carry; 292 if (c.parents != null) 293 q.add(c); 294 } 295 } 296 297 /** 298 * Carry a RevFlag set on this commit to its parents. 299 * <p> 300 * If this commit is parsed, has parents, and has the supplied flag set on 301 * it we automatically add it to the parents, grand-parents, and so on until 302 * an unparsed commit or a commit with no parents is discovered. This 303 * permits applications to force a flag through the history chain when 304 * necessary. 305 * 306 * @param flag 307 * the single flag value to carry back onto parents. 308 */ 309 public void carry(final RevFlag flag) { 310 final int carry = flags & flag.mask; 311 if (carry != 0) 312 carryFlags(this, carry); 313 } 314 315 /** 316 * Time from the "committer " line of the buffer. 317 * 318 * @return time, expressed as seconds since the epoch. 319 */ 320 public final int getCommitTime() { 321 return commitTime; 322 } 323 324 /** 325 * Get a reference to this commit's tree. 326 * 327 * @return tree of this commit. 328 */ 329 public final RevTree getTree() { 330 return tree; 331 } 332 333 /** 334 * Get the number of parent commits listed in this commit. 335 * 336 * @return number of parents; always a positive value but can be 0. 337 */ 338 public final int getParentCount() { 339 return parents.length; 340 } 341 342 /** 343 * Get the nth parent from this commit's parent list. 344 * 345 * @param nth 346 * parent index to obtain. Must be in the range 0 through 347 * {@link #getParentCount()}-1. 348 * @return the specified parent. 349 * @throws ArrayIndexOutOfBoundsException 350 * an invalid parent index was specified. 351 */ 352 public final RevCommit getParent(final int nth) { 353 return parents[nth]; 354 } 355 356 /** 357 * Obtain an array of all parents (<b>NOTE - THIS IS NOT A COPY</b>). 358 * <p> 359 * This method is exposed only to provide very fast, efficient access to 360 * this commit's parent list. Applications relying on this list should be 361 * very careful to ensure they do not modify its contents during their use 362 * of it. 363 * 364 * @return the array of parents. 365 */ 366 public final RevCommit[] getParents() { 367 return parents; 368 } 369 370 /** 371 * Obtain the raw unparsed commit body (<b>NOTE - THIS IS NOT A COPY</b>). 372 * <p> 373 * This method is exposed only to provide very fast, efficient access to 374 * this commit's message buffer within a RevFilter. Applications relying on 375 * this buffer should be very careful to ensure they do not modify its 376 * contents during their use of it. 377 * 378 * @return the raw unparsed commit body. This is <b>NOT A COPY</b>. 379 * Altering the contents of this buffer may alter the walker's 380 * knowledge of this commit, and the results it produces. 381 */ 382 public final byte[] getRawBuffer() { 383 return buffer; 384 } 385 386 /** 387 * Parse the author identity from the raw buffer. 388 * <p> 389 * This method parses and returns the content of the author line, after 390 * taking the commit's character set into account and decoding the author 391 * name and email address. This method is fairly expensive and produces a 392 * new PersonIdent instance on each invocation. Callers should invoke this 393 * method only if they are certain they will be outputting the result, and 394 * should cache the return value for as long as necessary to use all 395 * information from it. 396 * <p> 397 * RevFilter implementations should try to use {@link RawParseUtils} to scan 398 * the {@link #getRawBuffer()} instead, as this will allow faster evaluation 399 * of commits. 400 * 401 * @return identity of the author (name, email) and the time the commit was 402 * made by the author; null if no author line was found. 403 */ 404 public final PersonIdent getAuthorIdent() { 405 final byte[] raw = buffer; 406 final int nameB = RawParseUtils.author(raw, 0); 407 if (nameB < 0) 408 return null; 409 return RawParseUtils.parsePersonIdent(raw, nameB); 410 } 411 412 /** 413 * Parse the committer identity from the raw buffer. 414 * <p> 415 * This method parses and returns the content of the committer line, after 416 * taking the commit's character set into account and decoding the committer 417 * name and email address. This method is fairly expensive and produces a 418 * new PersonIdent instance on each invocation. Callers should invoke this 419 * method only if they are certain they will be outputting the result, and 420 * should cache the return value for as long as necessary to use all 421 * information from it. 422 * <p> 423 * RevFilter implementations should try to use {@link RawParseUtils} to scan 424 * the {@link #getRawBuffer()} instead, as this will allow faster evaluation 425 * of commits. 426 * 427 * @return identity of the committer (name, email) and the time the commit 428 * was made by the committer; null if no committer line was found. 429 */ 430 public final PersonIdent getCommitterIdent() { 431 final byte[] raw = buffer; 432 final int nameB = RawParseUtils.committer(raw, 0); 433 if (nameB < 0) 434 return null; 435 return RawParseUtils.parsePersonIdent(raw, nameB); 436 } 437 438 /** 439 * Parse the complete commit message and decode it to a string. 440 * <p> 441 * This method parses and returns the message portion of the commit buffer, 442 * after taking the commit's character set into account and decoding the 443 * buffer using that character set. This method is a fairly expensive 444 * operation and produces a new string on each invocation. 445 * 446 * @return decoded commit message as a string. Never null. 447 */ 448 public final String getFullMessage() { 449 byte[] raw = buffer; 450 int msgB = RawParseUtils.commitMessage(raw, 0); 451 if (msgB < 0) { 452 return ""; //$NON-NLS-1$ 453 } 454 return RawParseUtils.decode(guessEncoding(), raw, msgB, raw.length); 455 } 456 457 /** 458 * Parse the commit message and return the first "line" of it. 459 * <p> 460 * The first line is everything up to the first pair of LFs. This is the 461 * "oneline" format, suitable for output in a single line display. 462 * <p> 463 * This method parses and returns the message portion of the commit buffer, 464 * after taking the commit's character set into account and decoding the 465 * buffer using that character set. This method is a fairly expensive 466 * operation and produces a new string on each invocation. 467 * 468 * @return decoded commit message as a string. Never null. The returned 469 * string does not contain any LFs, even if the first paragraph 470 * spanned multiple lines. Embedded LFs are converted to spaces. 471 */ 472 public final String getShortMessage() { 473 byte[] raw = buffer; 474 int msgB = RawParseUtils.commitMessage(raw, 0); 475 if (msgB < 0) { 476 return ""; //$NON-NLS-1$ 477 } 478 479 int msgE = RawParseUtils.endOfParagraph(raw, msgB); 480 String str = RawParseUtils.decode(guessEncoding(), raw, msgB, msgE); 481 if (hasLF(raw, msgB, msgE)) { 482 str = StringUtils.replaceLineBreaksWithSpace(str); 483 } 484 return str; 485 } 486 487 static boolean hasLF(final byte[] r, int b, final int e) { 488 while (b < e) 489 if (r[b++] == '\n') 490 return true; 491 return false; 492 } 493 494 /** 495 * Determine the encoding of the commit message buffer. 496 * <p> 497 * Locates the "encoding" header (if present) and returns its value. Due to 498 * corruption in the wild this may be an invalid encoding name that is not 499 * recognized by any character encoding library. 500 * <p> 501 * If no encoding header is present, null. 502 * 503 * @return the preferred encoding of {@link #getRawBuffer()}; or null. 504 * @since 4.2 505 */ 506 @Nullable 507 public final String getEncodingName() { 508 return RawParseUtils.parseEncodingName(buffer); 509 } 510 511 /** 512 * Determine the encoding of the commit message buffer. 513 * <p> 514 * Locates the "encoding" header (if present) and then returns the proper 515 * character set to apply to this buffer to evaluate its contents as 516 * character data. 517 * <p> 518 * If no encoding header is present {@code UTF-8} is assumed. 519 * 520 * @return the preferred encoding of {@link #getRawBuffer()}. 521 * @throws IllegalCharsetNameException 522 * if the character set requested by the encoding header is 523 * malformed and unsupportable. 524 * @throws UnsupportedCharsetException 525 * if the JRE does not support the character set requested by 526 * the encoding header. 527 */ 528 public final Charset getEncoding() { 529 return RawParseUtils.parseEncoding(buffer); 530 } 531 532 private Charset guessEncoding() { 533 try { 534 return getEncoding(); 535 } catch (IllegalCharsetNameException | UnsupportedCharsetException e) { 536 return UTF_8; 537 } 538 } 539 540 /** 541 * Parse the footer lines (e.g. "Signed-off-by") for machine processing. 542 * <p> 543 * This method splits all of the footer lines out of the last paragraph of 544 * the commit message, providing each line as a key-value pair, ordered by 545 * the order of the line's appearance in the commit message itself. 546 * <p> 547 * A footer line's key must match the pattern {@code ^[A-Za-z0-9-]+:}, while 548 * the value is free-form, but must not contain an LF. Very common keys seen 549 * in the wild are: 550 * <ul> 551 * <li>{@code Signed-off-by} (agrees to Developer Certificate of Origin) 552 * <li>{@code Acked-by} (thinks change looks sane in context) 553 * <li>{@code Reported-by} (originally found the issue this change fixes) 554 * <li>{@code Tested-by} (validated change fixes the issue for them) 555 * <li>{@code CC}, {@code Cc} (copy on all email related to this change) 556 * <li>{@code Bug} (link to project's bug tracking system) 557 * </ul> 558 * 559 * @return ordered list of footer lines; empty list if no footers found. 560 */ 561 public final List<FooterLine> getFooterLines() { 562 final byte[] raw = buffer; 563 int ptr = raw.length - 1; 564 while (raw[ptr] == '\n') // trim any trailing LFs, not interesting 565 ptr--; 566 567 final int msgB = RawParseUtils.commitMessage(raw, 0); 568 final ArrayList<FooterLine> r = new ArrayList<>(4); 569 final Charset enc = guessEncoding(); 570 for (;;) { 571 ptr = RawParseUtils.prevLF(raw, ptr); 572 if (ptr <= msgB) 573 break; // Don't parse commit headers as footer lines. 574 575 final int keyStart = ptr + 2; 576 if (raw[keyStart] == '\n') 577 break; // Stop at first paragraph break, no footers above it. 578 579 final int keyEnd = RawParseUtils.endOfFooterLineKey(raw, keyStart); 580 if (keyEnd < 0) 581 continue; // Not a well formed footer line, skip it. 582 583 // Skip over the ': *' at the end of the key before the value. 584 // 585 int valStart = keyEnd + 1; 586 while (valStart < raw.length && raw[valStart] == ' ') 587 valStart++; 588 589 // Value ends at the LF, and does not include it. 590 // 591 int valEnd = RawParseUtils.nextLF(raw, valStart); 592 if (raw[valEnd - 1] == '\n') 593 valEnd--; 594 595 r.add(new FooterLine(raw, enc, keyStart, keyEnd, valStart, valEnd)); 596 } 597 Collections.reverse(r); 598 return r; 599 } 600 601 /** 602 * Get the values of all footer lines with the given key. 603 * 604 * @param keyName 605 * footer key to find values of, case insensitive. 606 * @return values of footers with key of {@code keyName}, ordered by their 607 * order of appearance. Duplicates may be returned if the same 608 * footer appeared more than once. Empty list if no footers appear 609 * with the specified key, or there are no footers at all. 610 * @see #getFooterLines() 611 */ 612 public final List<String> getFooterLines(final String keyName) { 613 return getFooterLines(new FooterKey(keyName)); 614 } 615 616 /** 617 * Get the values of all footer lines with the given key. 618 * 619 * @param keyName 620 * footer key to find values of, case insensitive. 621 * @return values of footers with key of {@code keyName}, ordered by their 622 * order of appearance. Duplicates may be returned if the same 623 * footer appeared more than once. Empty list if no footers appear 624 * with the specified key, or there are no footers at all. 625 * @see #getFooterLines() 626 */ 627 public final List<String> getFooterLines(final FooterKey keyName) { 628 final List<FooterLine> src = getFooterLines(); 629 if (src.isEmpty()) 630 return Collections.emptyList(); 631 final ArrayList<String> r = new ArrayList<>(src.size()); 632 for (final FooterLine f : src) { 633 if (f.matches(keyName)) 634 r.add(f.getValue()); 635 } 636 return r; 637 } 638 639 /** 640 * Reset this commit to allow another RevWalk with the same instances. 641 * <p> 642 * Subclasses <b>must</b> call <code>super.reset()</code> to ensure the 643 * basic information can be correctly cleared out. 644 */ 645 public void reset() { 646 inDegree = 0; 647 } 648 649 /** 650 * Discard the message buffer to reduce memory usage. 651 * <p> 652 * After discarding the memory usage of the {@code RevCommit} is reduced to 653 * only the {@link #getTree()} and {@link #getParents()} pointers and the 654 * time in {@link #getCommitTime()}. Accessing other properties such as 655 * {@link #getAuthorIdent()}, {@link #getCommitterIdent()} or either message 656 * function requires reloading the buffer by invoking 657 * {@link RevWalk#parseBody(RevObject)}. 658 * 659 * @since 4.0 660 */ 661 public final void disposeBody() { 662 buffer = null; 663 } 664 665 @Override 666 public String toString() { 667 final StringBuilder s = new StringBuilder(); 668 s.append(Constants.typeString(getType())); 669 s.append(' '); 670 s.append(name()); 671 s.append(' '); 672 s.append(commitTime); 673 s.append(' '); 674 appendCoreFlags(s); 675 return s.toString(); 676 } 677 }