1 /* 2 * Copyright (C) 2008-2009, Google Inc. 3 * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org> and others 4 * 5 * This program and the accompanying materials are made available under the 6 * terms of the Eclipse Distribution License v. 1.0 which is available at 7 * https://www.eclipse.org/org/documents/edl-v10.php. 8 * 9 * SPDX-License-Identifier: BSD-3-Clause 10 */ 11 12 package org.eclipse.jgit.revwalk; 13 14 import static java.nio.charset.StandardCharsets.UTF_8; 15 16 import java.io.IOException; 17 import java.nio.charset.Charset; 18 import java.nio.charset.IllegalCharsetNameException; 19 import java.nio.charset.UnsupportedCharsetException; 20 import java.util.ArrayList; 21 import java.util.Arrays; 22 import java.util.Collections; 23 import java.util.List; 24 25 import org.eclipse.jgit.annotations.Nullable; 26 import org.eclipse.jgit.errors.IncorrectObjectTypeException; 27 import org.eclipse.jgit.errors.MissingObjectException; 28 import org.eclipse.jgit.lib.AnyObjectId; 29 import org.eclipse.jgit.lib.Constants; 30 import org.eclipse.jgit.lib.MutableObjectId; 31 import org.eclipse.jgit.lib.ObjectInserter; 32 import org.eclipse.jgit.lib.ObjectReader; 33 import org.eclipse.jgit.lib.PersonIdent; 34 import org.eclipse.jgit.util.RawParseUtils; 35 import org.eclipse.jgit.util.StringUtils; 36 37 /** 38 * A commit reference to a commit in the DAG. 39 */ 40 public class RevCommit extends RevObject { 41 private static final int STACK_DEPTH = 500; 42 43 /** 44 * Parse a commit from its canonical format. 45 * 46 * This method constructs a temporary revision pool, parses the commit as 47 * supplied, and returns it to the caller. Since the commit was built inside 48 * of a private revision pool its parent pointers will be initialized, but 49 * will not have their headers loaded. 50 * 51 * Applications are discouraged from using this API. Callers usually need 52 * more than one commit. Use 53 * {@link org.eclipse.jgit.revwalk.RevWalk#parseCommit(AnyObjectId)} to 54 * obtain a RevCommit from an existing repository. 55 * 56 * @param raw 57 * the canonical formatted commit to be parsed. 58 * @return the parsed commit, in an isolated revision pool that is not 59 * available to the caller. 60 */ 61 public static RevCommit parse(byte[] raw) { 62 try { 63 return parse(new RevWalk((ObjectReader) null), raw); 64 } catch (IOException ex) { 65 throw new RuntimeException(ex); 66 } 67 } 68 69 /** 70 * Parse a commit from its canonical format. 71 * <p> 72 * This method inserts the commit directly into the caller supplied revision 73 * pool, making it appear as though the commit exists in the repository, 74 * even if it doesn't. The repository under the pool is not affected. 75 * <p> 76 * The body of the commit (message, author, committer) is always retained in 77 * the returned {@code RevCommit}, even if the supplied {@code RevWalk} has 78 * been configured with {@code setRetainBody(false)}. 79 * 80 * @param rw 81 * the revision pool to allocate the commit within. The commit's 82 * tree and parent pointers will be obtained from this pool. 83 * @param raw 84 * the canonical formatted commit to be parsed. This buffer will 85 * be retained by the returned {@code RevCommit} and must not be 86 * modified by the caller. 87 * @return the parsed commit, in an isolated revision pool that is not 88 * available to the caller. 89 * @throws java.io.IOException 90 * in case of RevWalk initialization fails 91 */ 92 public static RevCommit parse(RevWalk rw, byte[] raw) throws IOException { 93 try (ObjectInserter.Formatter fmt = new ObjectInserter.Formatter()) { 94 RevCommit r = rw.lookupCommit(fmt.idFor(Constants.OBJ_COMMIT, raw)); 95 r.parseCanonical(rw, raw); 96 r.buffer = raw; 97 return r; 98 } 99 } 100 101 static final RevCommit[] NO_PARENTS = {}; 102 103 private RevTree tree; 104 105 RevCommit[] parents; 106 107 int commitTime; // An int here for performance, overflows in 2038 108 109 int inDegree; 110 111 private byte[] buffer; 112 113 /** 114 * Create a new commit reference. 115 * 116 * @param id 117 * object name for the commit. 118 */ 119 protected RevCommit(AnyObjectId id) { 120 super(id); 121 } 122 123 @Override 124 void parseHeaders(RevWalk walk) throws MissingObjectException, 125 IncorrectObjectTypeException, IOException { 126 parseCanonical(walk, walk.getCachedBytes(this)); 127 } 128 129 @Override 130 void parseBody(RevWalk walk) throws MissingObjectException, 131 IncorrectObjectTypeException, IOException { 132 if (buffer == null) { 133 buffer = walk.getCachedBytes(this); 134 if ((flags & PARSED) == 0) 135 parseCanonical(walk, buffer); 136 } 137 } 138 139 void parseCanonical(RevWalk walk, byte[] raw) throws IOException { 140 if (!walk.shallowCommitsInitialized) { 141 walk.initializeShallowCommits(this); 142 } 143 144 final MutableObjectId idBuffer = walk.idBuffer; 145 idBuffer.fromString(raw, 5); 146 tree = walk.lookupTree(idBuffer); 147 148 int ptr = 46; 149 if (parents == null) { 150 RevCommit[] pList = new RevCommit[1]; 151 int nParents = 0; 152 for (;;) { 153 if (raw[ptr] != 'p') { 154 break; 155 } 156 idBuffer.fromString(raw, ptr + 7); 157 final RevCommit p = walk.lookupCommit(idBuffer); 158 switch (nParents) { 159 case 0: 160 pList[nParents++] = p; 161 break; 162 case 1: 163 pList = new RevCommit[] { pList[0], p }; 164 nParents = 2; 165 break; 166 default: 167 if (pList.length <= nParents) { 168 RevCommit[] old = pList; 169 pList = new RevCommit[pList.length + 32]; 170 System.arraycopy(old, 0, pList, 0, nParents); 171 } 172 pList[nParents++] = p; 173 break; 174 } 175 ptr += 48; 176 } 177 if (nParents != pList.length) { 178 RevCommit[] old = pList; 179 pList = new RevCommit[nParents]; 180 System.arraycopy(old, 0, pList, 0, nParents); 181 } 182 parents = pList; 183 } 184 185 // extract time from "committer " 186 ptr = RawParseUtils.committer(raw, ptr); 187 if (ptr > 0) { 188 ptr = RawParseUtils.nextLF(raw, ptr, '>'); 189 190 // In 2038 commitTime will overflow unless it is changed to long. 191 commitTime = RawParseUtils.parseBase10(raw, ptr, null); 192 } 193 194 if (walk.isRetainBody()) { 195 buffer = raw; 196 } 197 flags |= PARSED; 198 } 199 200 /** {@inheritDoc} */ 201 @Override 202 public final int getType() { 203 return Constants.OBJ_COMMIT; 204 } 205 206 static void carryFlags(RevCommit c, int carry) { 207 FIFORevQueue q = carryFlags1(c, carry, 0); 208 if (q != null) 209 slowCarryFlags(q, carry); 210 } 211 212 private static FIFORevQueue carryFlags1(RevCommit c, int carry, int depth) { 213 for(;;) { 214 RevCommit[] pList = c.parents; 215 if (pList == null || pList.length == 0) 216 return null; 217 if (pList.length != 1) { 218 if (depth == STACK_DEPTH) 219 return defer(c); 220 for (int i = 1; i < pList.length; i++) { 221 RevCommit p = pList[i]; 222 if ((p.flags & carry) == carry) 223 continue; 224 p.flags |= carry; 225 FIFORevQueue q = carryFlags1(p, carry, depth + 1); 226 if (q != null) 227 return defer(q, carry, pList, i + 1); 228 } 229 } 230 231 c = pList[0]; 232 if ((c.flags & carry) == carry) 233 return null; 234 c.flags |= carry; 235 } 236 } 237 238 private static FIFORevQueue defer(RevCommit c) { 239 FIFORevQueue q = new FIFORevQueue(); 240 q.add(c); 241 return q; 242 } 243 244 private static FIFORevQueue/../../../org/eclipse/jgit/revwalk/FIFORevQueue.html#FIFORevQueue">FIFORevQueue defer(FIFORevQueue q, int carry, 245 RevCommit[] pList, int i) { 246 // In normal case the caller will run pList[0] in a tail recursive 247 // fashion by updating the variable. However the caller is unwinding 248 // the stack and will skip that pList[0] execution step. 249 carryOneStep(q, carry, pList[0]); 250 251 // Remaining parents (if any) need to have flags checked and be 252 // enqueued if they have ancestors. 253 for (; i < pList.length; i++) 254 carryOneStep(q, carry, pList[i]); 255 return q; 256 } 257 258 private static void slowCarryFlags(FIFORevQueue q, int carry) { 259 // Commits in q have non-null parent arrays and have set all 260 // flags in carry. This loop finishes copying over the graph. 261 for (RevCommit c; (c = q.next()) != null;) { 262 for (RevCommit p : c.parents) 263 carryOneStep(q, carry, p); 264 } 265 } 266 267 private static void carryOneStep(FIFORevQueue q, int carry, RevCommit c) { 268 if ((c.flags & carry) != carry) { 269 c.flags |= carry; 270 if (c.parents != null) 271 q.add(c); 272 } 273 } 274 275 /** 276 * Carry a RevFlag set on this commit to its parents. 277 * <p> 278 * If this commit is parsed, has parents, and has the supplied flag set on 279 * it we automatically add it to the parents, grand-parents, and so on until 280 * an unparsed commit or a commit with no parents is discovered. This 281 * permits applications to force a flag through the history chain when 282 * necessary. 283 * 284 * @param flag 285 * the single flag value to carry back onto parents. 286 */ 287 public void carry(RevFlag flag) { 288 final int carry = flags & flag.mask; 289 if (carry != 0) 290 carryFlags(this, carry); 291 } 292 293 /** 294 * Time from the "committer " line of the buffer. 295 * 296 * @return commit time 297 */ 298 public final int getCommitTime() { 299 return commitTime; 300 } 301 302 /** 303 * Get a reference to this commit's tree. 304 * 305 * @return tree of this commit. 306 */ 307 public final RevTree getTree() { 308 return tree; 309 } 310 311 /** 312 * Get the number of parent commits listed in this commit. 313 * 314 * @return number of parents; always a positive value but can be 0. 315 */ 316 public final int getParentCount() { 317 return parents.length; 318 } 319 320 /** 321 * Get the nth parent from this commit's parent list. 322 * 323 * @param nth 324 * parent index to obtain. Must be in the range 0 through 325 * {@link #getParentCount()}-1. 326 * @return the specified parent. 327 * @throws java.lang.ArrayIndexOutOfBoundsException 328 * an invalid parent index was specified. 329 */ 330 public final RevCommit getParent(int nth) { 331 return parents[nth]; 332 } 333 334 /** 335 * Obtain an array of all parents (<b>NOTE - THIS IS NOT A COPY</b>). 336 * <p> 337 * This method is exposed only to provide very fast, efficient access to 338 * this commit's parent list. Applications relying on this list should be 339 * very careful to ensure they do not modify its contents during their use 340 * of it. 341 * 342 * @return the array of parents. 343 */ 344 public final RevCommit[] getParents() { 345 return parents; 346 } 347 348 /** 349 * Obtain the raw unparsed commit body (<b>NOTE - THIS IS NOT A COPY</b>). 350 * <p> 351 * This method is exposed only to provide very fast, efficient access to 352 * this commit's message buffer within a RevFilter. Applications relying on 353 * this buffer should be very careful to ensure they do not modify its 354 * contents during their use of it. 355 * 356 * @return the raw unparsed commit body. This is <b>NOT A COPY</b>. 357 * Altering the contents of this buffer may alter the walker's 358 * knowledge of this commit, and the results it produces. 359 */ 360 public final byte[] getRawBuffer() { 361 return buffer; 362 } 363 364 /** 365 * Parse the gpg signature from the raw buffer. 366 * <p> 367 * This method parses and returns the raw content of the gpgsig lines. This 368 * method is fairly expensive and produces a new byte[] instance on each 369 * invocation. Callers should invoke this method only if they are certain 370 * they will need, and should cache the return value for as long as 371 * necessary to use all information from it. 372 * <p> 373 * RevFilter implementations should try to use 374 * {@link org.eclipse.jgit.util.RawParseUtils} to scan the 375 * {@link #getRawBuffer()} instead, as this will allow faster evaluation of 376 * commits. 377 * 378 * @return contents of the gpg signature; null if the commit was not signed. 379 * @since 5.1 380 */ 381 public final byte[] getRawGpgSignature() { 382 final byte[] raw = buffer; 383 final byte[] header = {'g', 'p', 'g', 's', 'i', 'g'}; 384 final int start = RawParseUtils.headerStart(header, raw, 0); 385 if (start < 0) { 386 return null; 387 } 388 final int end = RawParseUtils.headerEnd(raw, start); 389 return Arrays.copyOfRange(raw, start, end); 390 } 391 392 /** 393 * Parse the author identity from the raw buffer. 394 * <p> 395 * This method parses and returns the content of the author line, after 396 * taking the commit's character set into account and decoding the author 397 * name and email address. This method is fairly expensive and produces a 398 * new PersonIdent instance on each invocation. Callers should invoke this 399 * method only if they are certain they will be outputting the result, and 400 * should cache the return value for as long as necessary to use all 401 * information from it. 402 * <p> 403 * RevFilter implementations should try to use 404 * {@link org.eclipse.jgit.util.RawParseUtils} to scan the 405 * {@link #getRawBuffer()} instead, as this will allow faster evaluation of 406 * commits. 407 * 408 * @return identity of the author (name, email) and the time the commit was 409 * made by the author; null if no author line was found. 410 */ 411 public final PersonIdent getAuthorIdent() { 412 final byte[] raw = buffer; 413 final int nameB = RawParseUtils.author(raw, 0); 414 if (nameB < 0) 415 return null; 416 return RawParseUtils.parsePersonIdent(raw, nameB); 417 } 418 419 /** 420 * Parse the committer identity from the raw buffer. 421 * <p> 422 * This method parses and returns the content of the committer line, after 423 * taking the commit's character set into account and decoding the committer 424 * name and email address. This method is fairly expensive and produces a 425 * new PersonIdent instance on each invocation. Callers should invoke this 426 * method only if they are certain they will be outputting the result, and 427 * should cache the return value for as long as necessary to use all 428 * information from it. 429 * <p> 430 * RevFilter implementations should try to use 431 * {@link org.eclipse.jgit.util.RawParseUtils} to scan the 432 * {@link #getRawBuffer()} instead, as this will allow faster evaluation of 433 * commits. 434 * 435 * @return identity of the committer (name, email) and the time the commit 436 * was made by the committer; null if no committer line was found. 437 */ 438 public final PersonIdent getCommitterIdent() { 439 final byte[] raw = buffer; 440 final int nameB = RawParseUtils.committer(raw, 0); 441 if (nameB < 0) 442 return null; 443 return RawParseUtils.parsePersonIdent(raw, nameB); 444 } 445 446 /** 447 * Parse the complete commit message and decode it to a string. 448 * <p> 449 * This method parses and returns the message portion of the commit buffer, 450 * after taking the commit's character set into account and decoding the 451 * buffer using that character set. This method is a fairly expensive 452 * operation and produces a new string on each invocation. 453 * 454 * @return decoded commit message as a string. Never null. 455 */ 456 public final String getFullMessage() { 457 byte[] raw = buffer; 458 int msgB = RawParseUtils.commitMessage(raw, 0); 459 if (msgB < 0) { 460 return ""; //$NON-NLS-1$ 461 } 462 return RawParseUtils.decode(guessEncoding(), raw, msgB, raw.length); 463 } 464 465 /** 466 * Parse the commit message and return the first "line" of it. 467 * <p> 468 * The first line is everything up to the first pair of LFs. This is the 469 * "oneline" format, suitable for output in a single line display. 470 * <p> 471 * This method parses and returns the message portion of the commit buffer, 472 * after taking the commit's character set into account and decoding the 473 * buffer using that character set. This method is a fairly expensive 474 * operation and produces a new string on each invocation. 475 * 476 * @return decoded commit message as a string. Never null. The returned 477 * string does not contain any LFs, even if the first paragraph 478 * spanned multiple lines. Embedded LFs are converted to spaces. 479 */ 480 public final String getShortMessage() { 481 byte[] raw = buffer; 482 int msgB = RawParseUtils.commitMessage(raw, 0); 483 if (msgB < 0) { 484 return ""; //$NON-NLS-1$ 485 } 486 487 int msgE = RawParseUtils.endOfParagraph(raw, msgB); 488 String str = RawParseUtils.decode(guessEncoding(), raw, msgB, msgE); 489 if (hasLF(raw, msgB, msgE)) { 490 str = StringUtils.replaceLineBreaksWithSpace(str); 491 } 492 return str; 493 } 494 495 static boolean hasLF(byte[] r, int b, int e) { 496 while (b < e) 497 if (r[b++] == '\n') 498 return true; 499 return false; 500 } 501 502 /** 503 * Determine the encoding of the commit message buffer. 504 * <p> 505 * Locates the "encoding" header (if present) and returns its value. Due to 506 * corruption in the wild this may be an invalid encoding name that is not 507 * recognized by any character encoding library. 508 * <p> 509 * If no encoding header is present, null. 510 * 511 * @return the preferred encoding of {@link #getRawBuffer()}; or null. 512 * @since 4.2 513 */ 514 @Nullable 515 public final String getEncodingName() { 516 return RawParseUtils.parseEncodingName(buffer); 517 } 518 519 /** 520 * Determine the encoding of the commit message buffer. 521 * <p> 522 * Locates the "encoding" header (if present) and then returns the proper 523 * character set to apply to this buffer to evaluate its contents as 524 * character data. 525 * <p> 526 * If no encoding header is present {@code UTF-8} is assumed. 527 * 528 * @return the preferred encoding of {@link #getRawBuffer()}. 529 * @throws IllegalCharsetNameException 530 * if the character set requested by the encoding header is 531 * malformed and unsupportable. 532 * @throws UnsupportedCharsetException 533 * if the JRE does not support the character set requested by 534 * the encoding header. 535 */ 536 public final Charset getEncoding() { 537 return RawParseUtils.parseEncoding(buffer); 538 } 539 540 private Charset guessEncoding() { 541 try { 542 return getEncoding(); 543 } catch (IllegalCharsetNameException | UnsupportedCharsetException e) { 544 return UTF_8; 545 } 546 } 547 548 /** 549 * Parse the footer lines (e.g. "Signed-off-by") for machine processing. 550 * <p> 551 * This method splits all of the footer lines out of the last paragraph of 552 * the commit message, providing each line as a key-value pair, ordered by 553 * the order of the line's appearance in the commit message itself. 554 * <p> 555 * A footer line's key must match the pattern {@code ^[A-Za-z0-9-]+:}, while 556 * the value is free-form, but must not contain an LF. Very common keys seen 557 * in the wild are: 558 * <ul> 559 * <li>{@code Signed-off-by} (agrees to Developer Certificate of Origin) 560 * <li>{@code Acked-by} (thinks change looks sane in context) 561 * <li>{@code Reported-by} (originally found the issue this change fixes) 562 * <li>{@code Tested-by} (validated change fixes the issue for them) 563 * <li>{@code CC}, {@code Cc} (copy on all email related to this change) 564 * <li>{@code Bug} (link to project's bug tracking system) 565 * </ul> 566 * 567 * @return ordered list of footer lines; empty list if no footers found. 568 */ 569 public final List<FooterLine> getFooterLines() { 570 final byte[] raw = buffer; 571 int ptr = raw.length - 1; 572 while (raw[ptr] == '\n') // trim any trailing LFs, not interesting 573 ptr--; 574 575 final int msgB = RawParseUtils.commitMessage(raw, 0); 576 final ArrayList<FooterLine> r = new ArrayList<>(4); 577 final Charset enc = guessEncoding(); 578 for (;;) { 579 ptr = RawParseUtils.prevLF(raw, ptr); 580 if (ptr <= msgB) 581 break; // Don't parse commit headers as footer lines. 582 583 final int keyStart = ptr + 2; 584 if (raw[keyStart] == '\n') 585 break; // Stop at first paragraph break, no footers above it. 586 587 final int keyEnd = RawParseUtils.endOfFooterLineKey(raw, keyStart); 588 if (keyEnd < 0) 589 continue; // Not a well formed footer line, skip it. 590 591 // Skip over the ': *' at the end of the key before the value. 592 // 593 int valStart = keyEnd + 1; 594 while (valStart < raw.length && raw[valStart] == ' ') 595 valStart++; 596 597 // Value ends at the LF, and does not include it. 598 // 599 int valEnd = RawParseUtils.nextLF(raw, valStart); 600 if (raw[valEnd - 1] == '\n') 601 valEnd--; 602 603 r.add(new FooterLine(raw, enc, keyStart, keyEnd, valStart, valEnd)); 604 } 605 Collections.reverse(r); 606 return r; 607 } 608 609 /** 610 * Get the values of all footer lines with the given key. 611 * 612 * @param keyName 613 * footer key to find values of, case insensitive. 614 * @return values of footers with key of {@code keyName}, ordered by their 615 * order of appearance. Duplicates may be returned if the same 616 * footer appeared more than once. Empty list if no footers appear 617 * with the specified key, or there are no footers at all. 618 * @see #getFooterLines() 619 */ 620 public final List<String> getFooterLines(String keyName) { 621 return getFooterLines(new FooterKey(keyName)); 622 } 623 624 /** 625 * Get the values of all footer lines with the given key. 626 * 627 * @param keyName 628 * footer key to find values of, case insensitive. 629 * @return values of footers with key of {@code keyName}, ordered by their 630 * order of appearance. Duplicates may be returned if the same 631 * footer appeared more than once. Empty list if no footers appear 632 * with the specified key, or there are no footers at all. 633 * @see #getFooterLines() 634 */ 635 public final List<String> getFooterLines(FooterKey keyName) { 636 final List<FooterLine> src = getFooterLines(); 637 if (src.isEmpty()) 638 return Collections.emptyList(); 639 final ArrayList<String> r = new ArrayList<>(src.size()); 640 for (FooterLine f : src) { 641 if (f.matches(keyName)) 642 r.add(f.getValue()); 643 } 644 return r; 645 } 646 647 /** 648 * Reset this commit to allow another RevWalk with the same instances. 649 * <p> 650 * Subclasses <b>must</b> call <code>super.reset()</code> to ensure the 651 * basic information can be correctly cleared out. 652 */ 653 public void reset() { 654 inDegree = 0; 655 } 656 657 /** 658 * Discard the message buffer to reduce memory usage. 659 * <p> 660 * After discarding the memory usage of the {@code RevCommit} is reduced to 661 * only the {@link #getTree()} and {@link #getParents()} pointers and the 662 * time in {@link #getCommitTime()}. Accessing other properties such as 663 * {@link #getAuthorIdent()}, {@link #getCommitterIdent()} or either message 664 * function requires reloading the buffer by invoking 665 * {@link org.eclipse.jgit.revwalk.RevWalk#parseBody(RevObject)}. 666 * 667 * @since 4.0 668 */ 669 public final void disposeBody() { 670 buffer = null; 671 } 672 673 /** {@inheritDoc} */ 674 @Override 675 public String toString() { 676 final StringBuilder s = new StringBuilder(); 677 s.append(Constants.typeString(getType())); 678 s.append(' '); 679 s.append(name()); 680 s.append(' '); 681 s.append(commitTime); 682 s.append(' '); 683 appendCoreFlags(s); 684 return s.toString(); 685 } 686 }