1 /* 2 * Copyright (C) 2008-2009, Google Inc. 3 * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org> 4 * and other copyright owners as documented in the project's IP log. 5 * 6 * This program and the accompanying materials are made available 7 * under the terms of the Eclipse Distribution License v1.0 which 8 * accompanies this distribution, is reproduced below, and is 9 * available at http://www.eclipse.org/org/documents/edl-v10.php 10 * 11 * All rights reserved. 12 * 13 * Redistribution and use in source and binary forms, with or 14 * without modification, are permitted provided that the following 15 * conditions are met: 16 * 17 * - Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials provided 23 * with the distribution. 24 * 25 * - Neither the name of the Eclipse Foundation, Inc. nor the 26 * names of its contributors may be used to endorse or promote 27 * products derived from this software without specific prior 28 * written permission. 29 * 30 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND 31 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 32 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 33 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 34 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 35 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 36 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 37 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 38 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 39 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 40 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 41 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 42 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 43 */ 44 45 package org.eclipse.jgit.revwalk; 46 47 import static org.eclipse.jgit.lib.Constants.CHARSET; 48 49 import java.io.IOException; 50 import java.nio.charset.Charset; 51 import java.nio.charset.IllegalCharsetNameException; 52 import java.nio.charset.UnsupportedCharsetException; 53 import java.util.ArrayList; 54 import java.util.Collections; 55 import java.util.List; 56 57 import org.eclipse.jgit.annotations.Nullable; 58 import org.eclipse.jgit.errors.IncorrectObjectTypeException; 59 import org.eclipse.jgit.errors.MissingObjectException; 60 import org.eclipse.jgit.lib.AnyObjectId; 61 import org.eclipse.jgit.lib.Constants; 62 import org.eclipse.jgit.lib.MutableObjectId; 63 import org.eclipse.jgit.lib.ObjectInserter; 64 import org.eclipse.jgit.lib.ObjectReader; 65 import org.eclipse.jgit.lib.PersonIdent; 66 import org.eclipse.jgit.util.RawParseUtils; 67 import org.eclipse.jgit.util.StringUtils; 68 69 /** 70 * A commit reference to a commit in the DAG. 71 */ 72 public class RevCommit extends RevObject { 73 private static final int STACK_DEPTH = 500; 74 75 /** 76 * Parse a commit from its canonical format. 77 * 78 * This method constructs a temporary revision pool, parses the commit as 79 * supplied, and returns it to the caller. Since the commit was built inside 80 * of a private revision pool its parent pointers will be initialized, but 81 * will not have their headers loaded. 82 * 83 * Applications are discouraged from using this API. Callers usually need 84 * more than one commit. Use 85 * {@link org.eclipse.jgit.revwalk.RevWalk#parseCommit(AnyObjectId)} to 86 * obtain a RevCommit from an existing repository. 87 * 88 * @param raw 89 * the canonical formatted commit to be parsed. 90 * @return the parsed commit, in an isolated revision pool that is not 91 * available to the caller. 92 */ 93 public static RevCommit parse(byte[] raw) { 94 try { 95 return parse(new RevWalk((ObjectReader) null), raw); 96 } catch (IOException ex) { 97 throw new RuntimeException(ex); 98 } 99 } 100 101 /** 102 * Parse a commit from its canonical format. 103 * <p> 104 * This method inserts the commit directly into the caller supplied revision 105 * pool, making it appear as though the commit exists in the repository, 106 * even if it doesn't. The repository under the pool is not affected. 107 * <p> 108 * The body of the commit (message, author, committer) is always retained in 109 * the returned {@code RevCommit}, even if the supplied {@code RevWalk} has 110 * been configured with {@code setRetainBody(false)}. 111 * 112 * @param rw 113 * the revision pool to allocate the commit within. The commit's 114 * tree and parent pointers will be obtained from this pool. 115 * @param raw 116 * the canonical formatted commit to be parsed. This buffer will 117 * be retained by the returned {@code RevCommit} and must not be 118 * modified by the caller. 119 * @return the parsed commit, in an isolated revision pool that is not 120 * available to the caller. 121 * @throws java.io.IOException 122 * in case of RevWalk initialization fails 123 */ 124 public static RevCommit parse(RevWalk rw, byte[] raw) throws IOException { 125 try (ObjectInserter.Formatter fmt = new ObjectInserter.Formatter()) { 126 RevCommit r = rw.lookupCommit(fmt.idFor(Constants.OBJ_COMMIT, raw)); 127 r.parseCanonical(rw, raw); 128 r.buffer = raw; 129 return r; 130 } 131 } 132 133 static final RevCommit[] NO_PARENTS = {}; 134 135 private RevTree tree; 136 137 RevCommit[] parents; 138 139 int commitTime; // An int here for performance, overflows in 2038 140 141 int inDegree; 142 143 private byte[] buffer; 144 145 /** 146 * Create a new commit reference. 147 * 148 * @param id 149 * object name for the commit. 150 */ 151 protected RevCommit(AnyObjectId id) { 152 super(id); 153 } 154 155 @Override 156 void parseHeaders(RevWalk walk) throws MissingObjectException, 157 IncorrectObjectTypeException, IOException { 158 parseCanonical(walk, walk.getCachedBytes(this)); 159 } 160 161 @Override 162 void parseBody(RevWalk walk) throws MissingObjectException, 163 IncorrectObjectTypeException, IOException { 164 if (buffer == null) { 165 buffer = walk.getCachedBytes(this); 166 if ((flags & PARSED) == 0) 167 parseCanonical(walk, buffer); 168 } 169 } 170 171 void parseCanonical(RevWalk walk, byte[] raw) 172 throws IOException { 173 if (!walk.shallowCommitsInitialized) 174 walk.initializeShallowCommits(); 175 176 final MutableObjectId idBuffer = walk.idBuffer; 177 idBuffer.fromString(raw, 5); 178 tree = walk.lookupTree(idBuffer); 179 180 int ptr = 46; 181 if (parents == null) { 182 RevCommit[] pList = new RevCommit[1]; 183 int nParents = 0; 184 for (;;) { 185 if (raw[ptr] != 'p') 186 break; 187 idBuffer.fromString(raw, ptr + 7); 188 final RevCommit p = walk.lookupCommit(idBuffer); 189 if (nParents == 0) 190 pList[nParents++] = p; 191 else if (nParents == 1) { 192 pList = new RevCommit[] { pList[0], p }; 193 nParents = 2; 194 } else { 195 if (pList.length <= nParents) { 196 RevCommit[] old = pList; 197 pList = new RevCommit[pList.length + 32]; 198 System.arraycopy(old, 0, pList, 0, nParents); 199 } 200 pList[nParents++] = p; 201 } 202 ptr += 48; 203 } 204 if (nParents != pList.length) { 205 RevCommit[] old = pList; 206 pList = new RevCommit[nParents]; 207 System.arraycopy(old, 0, pList, 0, nParents); 208 } 209 parents = pList; 210 } 211 212 // extract time from "committer " 213 ptr = RawParseUtils.committer(raw, ptr); 214 if (ptr > 0) { 215 ptr = RawParseUtils.nextLF(raw, ptr, '>'); 216 217 // In 2038 commitTime will overflow unless it is changed to long. 218 commitTime = RawParseUtils.parseBase10(raw, ptr, null); 219 } 220 221 if (walk.isRetainBody()) 222 buffer = raw; 223 flags |= PARSED; 224 } 225 226 /** {@inheritDoc} */ 227 @Override 228 public final int getType() { 229 return Constants.OBJ_COMMIT; 230 } 231 232 static void carryFlags(RevCommit c, int carry) { 233 FIFORevQueue q = carryFlags1(c, carry, 0); 234 if (q != null) 235 slowCarryFlags(q, carry); 236 } 237 238 private static FIFORevQueue carryFlags1(RevCommit c, int carry, int depth) { 239 for(;;) { 240 RevCommit[] pList = c.parents; 241 if (pList == null || pList.length == 0) 242 return null; 243 if (pList.length != 1) { 244 if (depth == STACK_DEPTH) 245 return defer(c); 246 for (int i = 1; i < pList.length; i++) { 247 RevCommit p = pList[i]; 248 if ((p.flags & carry) == carry) 249 continue; 250 p.flags |= carry; 251 FIFORevQueue q = carryFlags1(p, carry, depth + 1); 252 if (q != null) 253 return defer(q, carry, pList, i + 1); 254 } 255 } 256 257 c = pList[0]; 258 if ((c.flags & carry) == carry) 259 return null; 260 c.flags |= carry; 261 } 262 } 263 264 private static FIFORevQueue defer(RevCommit c) { 265 FIFORevQueue q = new FIFORevQueue(); 266 q.add(c); 267 return q; 268 } 269 270 private static FIFORevQueue defer(FIFORevQueue q, int carry, 271 RevCommit[] pList, int i) { 272 // In normal case the caller will run pList[0] in a tail recursive 273 // fashion by updating the variable. However the caller is unwinding 274 // the stack and will skip that pList[0] execution step. 275 carryOneStep(q, carry, pList[0]); 276 277 // Remaining parents (if any) need to have flags checked and be 278 // enqueued if they have ancestors. 279 for (; i < pList.length; i++) 280 carryOneStep(q, carry, pList[i]); 281 return q; 282 } 283 284 private static void slowCarryFlags(FIFORevQueue q, int carry) { 285 // Commits in q have non-null parent arrays and have set all 286 // flags in carry. This loop finishes copying over the graph. 287 for (RevCommit c; (c = q.next()) != null;) { 288 for (RevCommit p : c.parents) 289 carryOneStep(q, carry, p); 290 } 291 } 292 293 private static void carryOneStep(FIFORevQueue q, int carry, RevCommit c) { 294 if ((c.flags & carry) != carry) { 295 c.flags |= carry; 296 if (c.parents != null) 297 q.add(c); 298 } 299 } 300 301 /** 302 * Carry a RevFlag set on this commit to its parents. 303 * <p> 304 * If this commit is parsed, has parents, and has the supplied flag set on 305 * it we automatically add it to the parents, grand-parents, and so on until 306 * an unparsed commit or a commit with no parents is discovered. This 307 * permits applications to force a flag through the history chain when 308 * necessary. 309 * 310 * @param flag 311 * the single flag value to carry back onto parents. 312 */ 313 public void carry(RevFlag flag) { 314 final int carry = flags & flag.mask; 315 if (carry != 0) 316 carryFlags(this, carry); 317 } 318 319 /** 320 * Time from the "committer " line of the buffer. 321 * 322 * @return commit time 323 */ 324 public final int getCommitTime() { 325 return commitTime; 326 } 327 328 /** 329 * Get a reference to this commit's tree. 330 * 331 * @return tree of this commit. 332 */ 333 public final RevTree getTree() { 334 return tree; 335 } 336 337 /** 338 * Get the number of parent commits listed in this commit. 339 * 340 * @return number of parents; always a positive value but can be 0. 341 */ 342 public final int getParentCount() { 343 return parents.length; 344 } 345 346 /** 347 * Get the nth parent from this commit's parent list. 348 * 349 * @param nth 350 * parent index to obtain. Must be in the range 0 through 351 * {@link #getParentCount()}-1. 352 * @return the specified parent. 353 * @throws java.lang.ArrayIndexOutOfBoundsException 354 * an invalid parent index was specified. 355 */ 356 public final RevCommit getParent(int nth) { 357 return parents[nth]; 358 } 359 360 /** 361 * Obtain an array of all parents (<b>NOTE - THIS IS NOT A COPY</b>). 362 * <p> 363 * This method is exposed only to provide very fast, efficient access to 364 * this commit's parent list. Applications relying on this list should be 365 * very careful to ensure they do not modify its contents during their use 366 * of it. 367 * 368 * @return the array of parents. 369 */ 370 public final RevCommit[] getParents() { 371 return parents; 372 } 373 374 /** 375 * Obtain the raw unparsed commit body (<b>NOTE - THIS IS NOT A COPY</b>). 376 * <p> 377 * This method is exposed only to provide very fast, efficient access to 378 * this commit's message buffer within a RevFilter. Applications relying on 379 * this buffer should be very careful to ensure they do not modify its 380 * contents during their use of it. 381 * 382 * @return the raw unparsed commit body. This is <b>NOT A COPY</b>. 383 * Altering the contents of this buffer may alter the walker's 384 * knowledge of this commit, and the results it produces. 385 */ 386 public final byte[] getRawBuffer() { 387 return buffer; 388 } 389 390 /** 391 * Parse the author identity from the raw buffer. 392 * <p> 393 * This method parses and returns the content of the author line, after 394 * taking the commit's character set into account and decoding the author 395 * name and email address. This method is fairly expensive and produces a 396 * new PersonIdent instance on each invocation. Callers should invoke this 397 * method only if they are certain they will be outputting the result, and 398 * should cache the return value for as long as necessary to use all 399 * information from it. 400 * <p> 401 * RevFilter implementations should try to use 402 * {@link org.eclipse.jgit.util.RawParseUtils} to scan the 403 * {@link #getRawBuffer()} instead, as this will allow faster evaluation of 404 * commits. 405 * 406 * @return identity of the author (name, email) and the time the commit was 407 * made by the author; null if no author line was found. 408 */ 409 public final PersonIdent getAuthorIdent() { 410 final byte[] raw = buffer; 411 final int nameB = RawParseUtils.author(raw, 0); 412 if (nameB < 0) 413 return null; 414 return RawParseUtils.parsePersonIdent(raw, nameB); 415 } 416 417 /** 418 * Parse the committer identity from the raw buffer. 419 * <p> 420 * This method parses and returns the content of the committer line, after 421 * taking the commit's character set into account and decoding the committer 422 * name and email address. This method is fairly expensive and produces a 423 * new PersonIdent instance on each invocation. Callers should invoke this 424 * method only if they are certain they will be outputting the result, and 425 * should cache the return value for as long as necessary to use all 426 * information from it. 427 * <p> 428 * RevFilter implementations should try to use 429 * {@link org.eclipse.jgit.util.RawParseUtils} to scan the 430 * {@link #getRawBuffer()} instead, as this will allow faster evaluation of 431 * commits. 432 * 433 * @return identity of the committer (name, email) and the time the commit 434 * was made by the committer; null if no committer line was found. 435 */ 436 public final PersonIdent getCommitterIdent() { 437 final byte[] raw = buffer; 438 final int nameB = RawParseUtils.committer(raw, 0); 439 if (nameB < 0) 440 return null; 441 return RawParseUtils.parsePersonIdent(raw, nameB); 442 } 443 444 /** 445 * Parse the complete commit message and decode it to a string. 446 * <p> 447 * This method parses and returns the message portion of the commit buffer, 448 * after taking the commit's character set into account and decoding the 449 * buffer using that character set. This method is a fairly expensive 450 * operation and produces a new string on each invocation. 451 * 452 * @return decoded commit message as a string. Never null. 453 */ 454 public final String getFullMessage() { 455 byte[] raw = buffer; 456 int msgB = RawParseUtils.commitMessage(raw, 0); 457 if (msgB < 0) { 458 return ""; //$NON-NLS-1$ 459 } 460 return RawParseUtils.decode(guessEncoding(), raw, msgB, raw.length); 461 } 462 463 /** 464 * Parse the commit message and return the first "line" of it. 465 * <p> 466 * The first line is everything up to the first pair of LFs. This is the 467 * "oneline" format, suitable for output in a single line display. 468 * <p> 469 * This method parses and returns the message portion of the commit buffer, 470 * after taking the commit's character set into account and decoding the 471 * buffer using that character set. This method is a fairly expensive 472 * operation and produces a new string on each invocation. 473 * 474 * @return decoded commit message as a string. Never null. The returned 475 * string does not contain any LFs, even if the first paragraph 476 * spanned multiple lines. Embedded LFs are converted to spaces. 477 */ 478 public final String getShortMessage() { 479 byte[] raw = buffer; 480 int msgB = RawParseUtils.commitMessage(raw, 0); 481 if (msgB < 0) { 482 return ""; //$NON-NLS-1$ 483 } 484 485 int msgE = RawParseUtils.endOfParagraph(raw, msgB); 486 String str = RawParseUtils.decode(guessEncoding(), raw, msgB, msgE); 487 if (hasLF(raw, msgB, msgE)) { 488 str = StringUtils.replaceLineBreaksWithSpace(str); 489 } 490 return str; 491 } 492 493 static boolean hasLF(byte[] r, int b, int e) { 494 while (b < e) 495 if (r[b++] == '\n') 496 return true; 497 return false; 498 } 499 500 /** 501 * Determine the encoding of the commit message buffer. 502 * <p> 503 * Locates the "encoding" header (if present) and returns its value. Due to 504 * corruption in the wild this may be an invalid encoding name that is not 505 * recognized by any character encoding library. 506 * <p> 507 * If no encoding header is present, null. 508 * 509 * @return the preferred encoding of {@link #getRawBuffer()}; or null. 510 * @since 4.2 511 */ 512 @Nullable 513 public final String getEncodingName() { 514 return RawParseUtils.parseEncodingName(buffer); 515 } 516 517 /** 518 * Determine the encoding of the commit message buffer. 519 * <p> 520 * Locates the "encoding" header (if present) and then returns the proper 521 * character set to apply to this buffer to evaluate its contents as 522 * character data. 523 * <p> 524 * If no encoding header is present {@code UTF-8} is assumed. 525 * 526 * @return the preferred encoding of {@link #getRawBuffer()}. 527 * @throws IllegalCharsetNameException 528 * if the character set requested by the encoding header is 529 * malformed and unsupportable. 530 * @throws UnsupportedCharsetException 531 * if the JRE does not support the character set requested by 532 * the encoding header. 533 */ 534 public final Charset getEncoding() { 535 return RawParseUtils.parseEncoding(buffer); 536 } 537 538 private Charset guessEncoding() { 539 try { 540 return getEncoding(); 541 } catch (IllegalCharsetNameException | UnsupportedCharsetException e) { 542 return CHARSET; 543 } 544 } 545 546 /** 547 * Parse the footer lines (e.g. "Signed-off-by") for machine processing. 548 * <p> 549 * This method splits all of the footer lines out of the last paragraph of 550 * the commit message, providing each line as a key-value pair, ordered by 551 * the order of the line's appearance in the commit message itself. 552 * <p> 553 * A footer line's key must match the pattern {@code ^[A-Za-z0-9-]+:}, while 554 * the value is free-form, but must not contain an LF. Very common keys seen 555 * in the wild are: 556 * <ul> 557 * <li>{@code Signed-off-by} (agrees to Developer Certificate of Origin) 558 * <li>{@code Acked-by} (thinks change looks sane in context) 559 * <li>{@code Reported-by} (originally found the issue this change fixes) 560 * <li>{@code Tested-by} (validated change fixes the issue for them) 561 * <li>{@code CC}, {@code Cc} (copy on all email related to this change) 562 * <li>{@code Bug} (link to project's bug tracking system) 563 * </ul> 564 * 565 * @return ordered list of footer lines; empty list if no footers found. 566 */ 567 public final List<FooterLine> getFooterLines() { 568 final byte[] raw = buffer; 569 int ptr = raw.length - 1; 570 while (raw[ptr] == '\n') // trim any trailing LFs, not interesting 571 ptr--; 572 573 final int msgB = RawParseUtils.commitMessage(raw, 0); 574 final ArrayList<FooterLine> r = new ArrayList<>(4); 575 final Charset enc = guessEncoding(); 576 for (;;) { 577 ptr = RawParseUtils.prevLF(raw, ptr); 578 if (ptr <= msgB) 579 break; // Don't parse commit headers as footer lines. 580 581 final int keyStart = ptr + 2; 582 if (raw[keyStart] == '\n') 583 break; // Stop at first paragraph break, no footers above it. 584 585 final int keyEnd = RawParseUtils.endOfFooterLineKey(raw, keyStart); 586 if (keyEnd < 0) 587 continue; // Not a well formed footer line, skip it. 588 589 // Skip over the ': *' at the end of the key before the value. 590 // 591 int valStart = keyEnd + 1; 592 while (valStart < raw.length && raw[valStart] == ' ') 593 valStart++; 594 595 // Value ends at the LF, and does not include it. 596 // 597 int valEnd = RawParseUtils.nextLF(raw, valStart); 598 if (raw[valEnd - 1] == '\n') 599 valEnd--; 600 601 r.add(new FooterLine(raw, enc, keyStart, keyEnd, valStart, valEnd)); 602 } 603 Collections.reverse(r); 604 return r; 605 } 606 607 /** 608 * Get the values of all footer lines with the given key. 609 * 610 * @param keyName 611 * footer key to find values of, case insensitive. 612 * @return values of footers with key of {@code keyName}, ordered by their 613 * order of appearance. Duplicates may be returned if the same 614 * footer appeared more than once. Empty list if no footers appear 615 * with the specified key, or there are no footers at all. 616 * @see #getFooterLines() 617 */ 618 public final List<String> getFooterLines(String keyName) { 619 return getFooterLines(new FooterKey(keyName)); 620 } 621 622 /** 623 * Get the values of all footer lines with the given key. 624 * 625 * @param keyName 626 * footer key to find values of, case insensitive. 627 * @return values of footers with key of {@code keyName}, ordered by their 628 * order of appearance. Duplicates may be returned if the same 629 * footer appeared more than once. Empty list if no footers appear 630 * with the specified key, or there are no footers at all. 631 * @see #getFooterLines() 632 */ 633 public final List<String> getFooterLines(FooterKey keyName) { 634 final List<FooterLine> src = getFooterLines(); 635 if (src.isEmpty()) 636 return Collections.emptyList(); 637 final ArrayList<String> r = new ArrayList<>(src.size()); 638 for (FooterLine f : src) { 639 if (f.matches(keyName)) 640 r.add(f.getValue()); 641 } 642 return r; 643 } 644 645 /** 646 * Reset this commit to allow another RevWalk with the same instances. 647 * <p> 648 * Subclasses <b>must</b> call <code>super.reset()</code> to ensure the 649 * basic information can be correctly cleared out. 650 */ 651 public void reset() { 652 inDegree = 0; 653 } 654 655 /** 656 * Discard the message buffer to reduce memory usage. 657 * <p> 658 * After discarding the memory usage of the {@code RevCommit} is reduced to 659 * only the {@link #getTree()} and {@link #getParents()} pointers and the 660 * time in {@link #getCommitTime()}. Accessing other properties such as 661 * {@link #getAuthorIdent()}, {@link #getCommitterIdent()} or either message 662 * function requires reloading the buffer by invoking 663 * {@link org.eclipse.jgit.revwalk.RevWalk#parseBody(RevObject)}. 664 * 665 * @since 4.0 666 */ 667 public final void disposeBody() { 668 buffer = null; 669 } 670 671 /** {@inheritDoc} */ 672 @Override 673 public String toString() { 674 final StringBuilder s = new StringBuilder(); 675 s.append(Constants.typeString(getType())); 676 s.append(' '); 677 s.append(name()); 678 s.append(' '); 679 s.append(commitTime); 680 s.append(' '); 681 appendCoreFlags(s); 682 return s.toString(); 683 } 684 }