1 /* 2 * Copyright (C) 2008-2009, Google Inc. 3 * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org> 4 * and other copyright owners as documented in the project's IP log. 5 * 6 * This program and the accompanying materials are made available 7 * under the terms of the Eclipse Distribution License v1.0 which 8 * accompanies this distribution, is reproduced below, and is 9 * available at http://www.eclipse.org/org/documents/edl-v10.php 10 * 11 * All rights reserved. 12 * 13 * Redistribution and use in source and binary forms, with or 14 * without modification, are permitted provided that the following 15 * conditions are met: 16 * 17 * - Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials provided 23 * with the distribution. 24 * 25 * - Neither the name of the Eclipse Foundation, Inc. nor the 26 * names of its contributors may be used to endorse or promote 27 * products derived from this software without specific prior 28 * written permission. 29 * 30 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND 31 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 32 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 33 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 34 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 35 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 36 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 37 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 38 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 39 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 40 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 41 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 42 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 43 */ 44 45 package org.eclipse.jgit.revwalk; 46 47 import java.io.IOException; 48 import java.nio.charset.Charset; 49 import java.util.ArrayList; 50 import java.util.Collections; 51 import java.util.List; 52 53 import org.eclipse.jgit.errors.IncorrectObjectTypeException; 54 import org.eclipse.jgit.errors.MissingObjectException; 55 import org.eclipse.jgit.lib.AnyObjectId; 56 import org.eclipse.jgit.lib.Constants; 57 import org.eclipse.jgit.lib.MutableObjectId; 58 import org.eclipse.jgit.lib.ObjectInserter; 59 import org.eclipse.jgit.lib.ObjectReader; 60 import org.eclipse.jgit.lib.PersonIdent; 61 import org.eclipse.jgit.util.RawParseUtils; 62 import org.eclipse.jgit.util.StringUtils; 63 64 /** A commit reference to a commit in the DAG. */ 65 public class RevCommit extends RevObject { 66 private static final int STACK_DEPTH = 500; 67 68 /** 69 * Parse a commit from its canonical format. 70 * 71 * This method constructs a temporary revision pool, parses the commit as 72 * supplied, and returns it to the caller. Since the commit was built inside 73 * of a private revision pool its parent pointers will be initialized, but 74 * will not have their headers loaded. 75 * 76 * Applications are discouraged from using this API. Callers usually need 77 * more than one commit. Use {@link RevWalk#parseCommit(AnyObjectId)} to 78 * obtain a RevCommit from an existing repository. 79 * 80 * @param raw 81 * the canonical formatted commit to be parsed. 82 * @return the parsed commit, in an isolated revision pool that is not 83 * available to the caller. 84 */ 85 public static RevCommit parse(byte[] raw) { 86 try { 87 return parse(new RevWalk((ObjectReader) null), raw); 88 } catch (IOException ex) { 89 throw new RuntimeException(ex); 90 } 91 } 92 93 /** 94 * Parse a commit from its canonical format. 95 * <p> 96 * This method inserts the commit directly into the caller supplied revision 97 * pool, making it appear as though the commit exists in the repository, 98 * even if it doesn't. The repository under the pool is not affected. 99 * <p> 100 * The body of the commit (message, author, committer) is always retained in 101 * the returned {@code RevCommit}, even if the supplied {@code RevWalk} has 102 * been configured with {@code setRetainBody(false)}. 103 * 104 * @param rw 105 * the revision pool to allocate the commit within. The commit's 106 * tree and parent pointers will be obtained from this pool. 107 * @param raw 108 * the canonical formatted commit to be parsed. This buffer will 109 * be retained by the returned {@code RevCommit} and must not be 110 * modified by the caller. 111 * @return the parsed commit, in an isolated revision pool that is not 112 * available to the caller. 113 * @throws IOException 114 * in case of RevWalk initialization fails 115 */ 116 public static RevCommit parse(RevWalk rw, byte[] raw) throws IOException { 117 try (ObjectInserter.Formatter fmt = new ObjectInserter.Formatter()) { 118 RevCommit r = rw.lookupCommit(fmt.idFor(Constants.OBJ_COMMIT, raw)); 119 r.parseCanonical(rw, raw); 120 r.buffer = raw; 121 return r; 122 } 123 } 124 125 static final RevCommit[] NO_PARENTS = {}; 126 127 private RevTree tree; 128 129 RevCommit[] parents; 130 131 int commitTime; // An int here for performance, overflows in 2038 132 133 int inDegree; 134 135 private byte[] buffer; 136 137 /** 138 * Create a new commit reference. 139 * 140 * @param id 141 * object name for the commit. 142 */ 143 protected RevCommit(final AnyObjectId id) { 144 super(id); 145 } 146 147 @Override 148 void parseHeaders(final RevWalk walk) throws MissingObjectException, 149 IncorrectObjectTypeException, IOException { 150 parseCanonical(walk, walk.getCachedBytes(this)); 151 } 152 153 @Override 154 void parseBody(final RevWalk walk) throws MissingObjectException, 155 IncorrectObjectTypeException, IOException { 156 if (buffer == null) { 157 buffer = walk.getCachedBytes(this); 158 if ((flags & PARSED) == 0) 159 parseCanonical(walk, buffer); 160 } 161 } 162 163 void parseCanonical(final RevWalk walk, final byte[] raw) 164 throws IOException { 165 if (!walk.shallowCommitsInitialized) 166 walk.initializeShallowCommits(); 167 168 final MutableObjectId idBuffer = walk.idBuffer; 169 idBuffer.fromString(raw, 5); 170 tree = walk.lookupTree(idBuffer); 171 172 int ptr = 46; 173 if (parents == null) { 174 RevCommit[] pList = new RevCommit[1]; 175 int nParents = 0; 176 for (;;) { 177 if (raw[ptr] != 'p') 178 break; 179 idBuffer.fromString(raw, ptr + 7); 180 final RevCommit p = walk.lookupCommit(idBuffer); 181 if (nParents == 0) 182 pList[nParents++] = p; 183 else if (nParents == 1) { 184 pList = new RevCommit[] { pList[0], p }; 185 nParents = 2; 186 } else { 187 if (pList.length <= nParents) { 188 RevCommit[] old = pList; 189 pList = new RevCommit[pList.length + 32]; 190 System.arraycopy(old, 0, pList, 0, nParents); 191 } 192 pList[nParents++] = p; 193 } 194 ptr += 48; 195 } 196 if (nParents != pList.length) { 197 RevCommit[] old = pList; 198 pList = new RevCommit[nParents]; 199 System.arraycopy(old, 0, pList, 0, nParents); 200 } 201 parents = pList; 202 } 203 204 // extract time from "committer " 205 ptr = RawParseUtils.committer(raw, ptr); 206 if (ptr > 0) { 207 ptr = RawParseUtils.nextLF(raw, ptr, '>'); 208 209 // In 2038 commitTime will overflow unless it is changed to long. 210 commitTime = RawParseUtils.parseBase10(raw, ptr, null); 211 } 212 213 if (walk.isRetainBody()) 214 buffer = raw; 215 flags |= PARSED; 216 } 217 218 @Override 219 public final int getType() { 220 return Constants.OBJ_COMMIT; 221 } 222 223 static void carryFlags(RevCommit c, int carry) { 224 FIFORevQueue q = carryFlags1(c, carry, 0); 225 if (q != null) 226 slowCarryFlags(q, carry); 227 } 228 229 private static FIFORevQueue carryFlags1(RevCommit c, int carry, int depth) { 230 for(;;) { 231 RevCommit[] pList = c.parents; 232 if (pList == null || pList.length == 0) 233 return null; 234 if (pList.length != 1) { 235 if (depth == STACK_DEPTH) 236 return defer(c); 237 for (int i = 1; i < pList.length; i++) { 238 RevCommit p = pList[i]; 239 if ((p.flags & carry) == carry) 240 continue; 241 p.flags |= carry; 242 FIFORevQueue q = carryFlags1(c, carry, depth + 1); 243 if (q != null) 244 return defer(q, carry, pList, i + 1); 245 } 246 } 247 248 c = pList[0]; 249 if ((c.flags & carry) == carry) 250 return null; 251 c.flags |= carry; 252 } 253 } 254 255 private static FIFORevQueue defer(RevCommit c) { 256 FIFORevQueue q = new FIFORevQueue(); 257 q.add(c); 258 return q; 259 } 260 261 private static FIFORevQueue defer(FIFORevQueue q, int carry, 262 RevCommit[] pList, int i) { 263 // In normal case the caller will run pList[0] in a tail recursive 264 // fashion by updating the variable. However the caller is unwinding 265 // the stack and will skip that pList[0] execution step. 266 carryOneStep(q, carry, pList[0]); 267 268 // Remaining parents (if any) need to have flags checked and be 269 // enqueued if they have ancestors. 270 for (; i < pList.length; i++) 271 carryOneStep(q, carry, pList[i]); 272 return q; 273 } 274 275 private static void slowCarryFlags(FIFORevQueue q, int carry) { 276 // Commits in q have non-null parent arrays and have set all 277 // flags in carry. This loop finishes copying over the graph. 278 for (RevCommit c; (c = q.next()) != null;) { 279 for (RevCommit p : c.parents) 280 carryOneStep(q, carry, p); 281 } 282 } 283 284 private static void carryOneStep(FIFORevQueue q, int carry, RevCommit c) { 285 if ((c.flags & carry) != carry) { 286 c.flags |= carry; 287 if (c.parents != null) 288 q.add(c); 289 } 290 } 291 292 /** 293 * Carry a RevFlag set on this commit to its parents. 294 * <p> 295 * If this commit is parsed, has parents, and has the supplied flag set on 296 * it we automatically add it to the parents, grand-parents, and so on until 297 * an unparsed commit or a commit with no parents is discovered. This 298 * permits applications to force a flag through the history chain when 299 * necessary. 300 * 301 * @param flag 302 * the single flag value to carry back onto parents. 303 */ 304 public void carry(final RevFlag flag) { 305 final int carry = flags & flag.mask; 306 if (carry != 0) 307 carryFlags(this, carry); 308 } 309 310 /** 311 * Time from the "committer " line of the buffer. 312 * 313 * @return time, expressed as seconds since the epoch. 314 */ 315 public final int getCommitTime() { 316 return commitTime; 317 } 318 319 /** 320 * Get a reference to this commit's tree. 321 * 322 * @return tree of this commit. 323 */ 324 public final RevTree getTree() { 325 return tree; 326 } 327 328 /** 329 * Get the number of parent commits listed in this commit. 330 * 331 * @return number of parents; always a positive value but can be 0. 332 */ 333 public final int getParentCount() { 334 return parents.length; 335 } 336 337 /** 338 * Get the nth parent from this commit's parent list. 339 * 340 * @param nth 341 * parent index to obtain. Must be in the range 0 through 342 * {@link #getParentCount()}-1. 343 * @return the specified parent. 344 * @throws ArrayIndexOutOfBoundsException 345 * an invalid parent index was specified. 346 */ 347 public final RevCommit getParent(final int nth) { 348 return parents[nth]; 349 } 350 351 /** 352 * Obtain an array of all parents (<b>NOTE - THIS IS NOT A COPY</b>). 353 * <p> 354 * This method is exposed only to provide very fast, efficient access to 355 * this commit's parent list. Applications relying on this list should be 356 * very careful to ensure they do not modify its contents during their use 357 * of it. 358 * 359 * @return the array of parents. 360 */ 361 public final RevCommit[] getParents() { 362 return parents; 363 } 364 365 /** 366 * Obtain the raw unparsed commit body (<b>NOTE - THIS IS NOT A COPY</b>). 367 * <p> 368 * This method is exposed only to provide very fast, efficient access to 369 * this commit's message buffer within a RevFilter. Applications relying on 370 * this buffer should be very careful to ensure they do not modify its 371 * contents during their use of it. 372 * 373 * @return the raw unparsed commit body. This is <b>NOT A COPY</b>. 374 * Altering the contents of this buffer may alter the walker's 375 * knowledge of this commit, and the results it produces. 376 */ 377 public final byte[] getRawBuffer() { 378 return buffer; 379 } 380 381 /** 382 * Parse the author identity from the raw buffer. 383 * <p> 384 * This method parses and returns the content of the author line, after 385 * taking the commit's character set into account and decoding the author 386 * name and email address. This method is fairly expensive and produces a 387 * new PersonIdent instance on each invocation. Callers should invoke this 388 * method only if they are certain they will be outputting the result, and 389 * should cache the return value for as long as necessary to use all 390 * information from it. 391 * <p> 392 * RevFilter implementations should try to use {@link RawParseUtils} to scan 393 * the {@link #getRawBuffer()} instead, as this will allow faster evaluation 394 * of commits. 395 * 396 * @return identity of the author (name, email) and the time the commit was 397 * made by the author; null if no author line was found. 398 */ 399 public final PersonIdent getAuthorIdent() { 400 final byte[] raw = buffer; 401 final int nameB = RawParseUtils.author(raw, 0); 402 if (nameB < 0) 403 return null; 404 return RawParseUtils.parsePersonIdent(raw, nameB); 405 } 406 407 /** 408 * Parse the committer identity from the raw buffer. 409 * <p> 410 * This method parses and returns the content of the committer line, after 411 * taking the commit's character set into account and decoding the committer 412 * name and email address. This method is fairly expensive and produces a 413 * new PersonIdent instance on each invocation. Callers should invoke this 414 * method only if they are certain they will be outputting the result, and 415 * should cache the return value for as long as necessary to use all 416 * information from it. 417 * <p> 418 * RevFilter implementations should try to use {@link RawParseUtils} to scan 419 * the {@link #getRawBuffer()} instead, as this will allow faster evaluation 420 * of commits. 421 * 422 * @return identity of the committer (name, email) and the time the commit 423 * was made by the committer; null if no committer line was found. 424 */ 425 public final PersonIdent getCommitterIdent() { 426 final byte[] raw = buffer; 427 final int nameB = RawParseUtils.committer(raw, 0); 428 if (nameB < 0) 429 return null; 430 return RawParseUtils.parsePersonIdent(raw, nameB); 431 } 432 433 /** 434 * Parse the complete commit message and decode it to a string. 435 * <p> 436 * This method parses and returns the message portion of the commit buffer, 437 * after taking the commit's character set into account and decoding the 438 * buffer using that character set. This method is a fairly expensive 439 * operation and produces a new string on each invocation. 440 * 441 * @return decoded commit message as a string. Never null. 442 */ 443 public final String getFullMessage() { 444 final byte[] raw = buffer; 445 final int msgB = RawParseUtils.commitMessage(raw, 0); 446 if (msgB < 0) 447 return ""; //$NON-NLS-1$ 448 final Charset enc = RawParseUtils.parseEncoding(raw); 449 return RawParseUtils.decode(enc, raw, msgB, raw.length); 450 } 451 452 /** 453 * Parse the commit message and return the first "line" of it. 454 * <p> 455 * The first line is everything up to the first pair of LFs. This is the 456 * "oneline" format, suitable for output in a single line display. 457 * <p> 458 * This method parses and returns the message portion of the commit buffer, 459 * after taking the commit's character set into account and decoding the 460 * buffer using that character set. This method is a fairly expensive 461 * operation and produces a new string on each invocation. 462 * 463 * @return decoded commit message as a string. Never null. The returned 464 * string does not contain any LFs, even if the first paragraph 465 * spanned multiple lines. Embedded LFs are converted to spaces. 466 */ 467 public final String getShortMessage() { 468 final byte[] raw = buffer; 469 final int msgB = RawParseUtils.commitMessage(raw, 0); 470 if (msgB < 0) 471 return ""; //$NON-NLS-1$ 472 473 final Charset enc = RawParseUtils.parseEncoding(raw); 474 final int msgE = RawParseUtils.endOfParagraph(raw, msgB); 475 String str = RawParseUtils.decode(enc, raw, msgB, msgE); 476 if (hasLF(raw, msgB, msgE)) 477 str = StringUtils.replaceLineBreaksWithSpace(str); 478 return str; 479 } 480 481 static boolean hasLF(final byte[] r, int b, final int e) { 482 while (b < e) 483 if (r[b++] == '\n') 484 return true; 485 return false; 486 } 487 488 /** 489 * Determine the encoding of the commit message buffer. 490 * <p> 491 * Locates the "encoding" header (if present) and then returns the proper 492 * character set to apply to this buffer to evaluate its contents as 493 * character data. 494 * <p> 495 * If no encoding header is present, {@link Constants#CHARSET} is assumed. 496 * 497 * @return the preferred encoding of {@link #getRawBuffer()}. 498 */ 499 public final Charset getEncoding() { 500 return RawParseUtils.parseEncoding(buffer); 501 } 502 503 /** 504 * Parse the footer lines (e.g. "Signed-off-by") for machine processing. 505 * <p> 506 * This method splits all of the footer lines out of the last paragraph of 507 * the commit message, providing each line as a key-value pair, ordered by 508 * the order of the line's appearance in the commit message itself. 509 * <p> 510 * A footer line's key must match the pattern {@code ^[A-Za-z0-9-]+:}, while 511 * the value is free-form, but must not contain an LF. Very common keys seen 512 * in the wild are: 513 * <ul> 514 * <li>{@code Signed-off-by} (agrees to Developer Certificate of Origin) 515 * <li>{@code Acked-by} (thinks change looks sane in context) 516 * <li>{@code Reported-by} (originally found the issue this change fixes) 517 * <li>{@code Tested-by} (validated change fixes the issue for them) 518 * <li>{@code CC}, {@code Cc} (copy on all email related to this change) 519 * <li>{@code Bug} (link to project's bug tracking system) 520 * </ul> 521 * 522 * @return ordered list of footer lines; empty list if no footers found. 523 */ 524 public final List<FooterLine> getFooterLines() { 525 final byte[] raw = buffer; 526 int ptr = raw.length - 1; 527 while (raw[ptr] == '\n') // trim any trailing LFs, not interesting 528 ptr--; 529 530 final int msgB = RawParseUtils.commitMessage(raw, 0); 531 final ArrayList<FooterLine> r = new ArrayList<FooterLine>(4); 532 final Charset enc = getEncoding(); 533 for (;;) { 534 ptr = RawParseUtils.prevLF(raw, ptr); 535 if (ptr <= msgB) 536 break; // Don't parse commit headers as footer lines. 537 538 final int keyStart = ptr + 2; 539 if (raw[keyStart] == '\n') 540 break; // Stop at first paragraph break, no footers above it. 541 542 final int keyEnd = RawParseUtils.endOfFooterLineKey(raw, keyStart); 543 if (keyEnd < 0) 544 continue; // Not a well formed footer line, skip it. 545 546 // Skip over the ': *' at the end of the key before the value. 547 // 548 int valStart = keyEnd + 1; 549 while (valStart < raw.length && raw[valStart] == ' ') 550 valStart++; 551 552 // Value ends at the LF, and does not include it. 553 // 554 int valEnd = RawParseUtils.nextLF(raw, valStart); 555 if (raw[valEnd - 1] == '\n') 556 valEnd--; 557 558 r.add(new FooterLine(raw, enc, keyStart, keyEnd, valStart, valEnd)); 559 } 560 Collections.reverse(r); 561 return r; 562 } 563 564 /** 565 * Get the values of all footer lines with the given key. 566 * 567 * @param keyName 568 * footer key to find values of, case insensitive. 569 * @return values of footers with key of {@code keyName}, ordered by their 570 * order of appearance. Duplicates may be returned if the same 571 * footer appeared more than once. Empty list if no footers appear 572 * with the specified key, or there are no footers at all. 573 * @see #getFooterLines() 574 */ 575 public final List<String> getFooterLines(final String keyName) { 576 return getFooterLines(new FooterKey(keyName)); 577 } 578 579 /** 580 * Get the values of all footer lines with the given key. 581 * 582 * @param keyName 583 * footer key to find values of, case insensitive. 584 * @return values of footers with key of {@code keyName}, ordered by their 585 * order of appearance. Duplicates may be returned if the same 586 * footer appeared more than once. Empty list if no footers appear 587 * with the specified key, or there are no footers at all. 588 * @see #getFooterLines() 589 */ 590 public final List<String> getFooterLines(final FooterKey keyName) { 591 final List<FooterLine> src = getFooterLines(); 592 if (src.isEmpty()) 593 return Collections.emptyList(); 594 final ArrayList<String> r = new ArrayList<String>(src.size()); 595 for (final FooterLine f : src) { 596 if (f.matches(keyName)) 597 r.add(f.getValue()); 598 } 599 return r; 600 } 601 602 /** 603 * Reset this commit to allow another RevWalk with the same instances. 604 * <p> 605 * Subclasses <b>must</b> call <code>super.reset()</code> to ensure the 606 * basic information can be correctly cleared out. 607 */ 608 public void reset() { 609 inDegree = 0; 610 } 611 612 /** 613 * Discard the message buffer to reduce memory usage. 614 * <p> 615 * After discarding the memory usage of the {@code RevCommit} is reduced to 616 * only the {@link #getTree()} and {@link #getParents()} pointers and the 617 * time in {@link #getCommitTime()}. Accessing other properties such as 618 * {@link #getAuthorIdent()}, {@link #getCommitterIdent()} or either message 619 * function requires reloading the buffer by invoking 620 * {@link RevWalk#parseBody(RevObject)}. 621 * 622 * @since 4.0 623 */ 624 public final void disposeBody() { 625 buffer = null; 626 } 627 628 @Override 629 public String toString() { 630 final StringBuilder s = new StringBuilder(); 631 s.append(Constants.typeString(getType())); 632 s.append(' '); 633 s.append(name()); 634 s.append(' '); 635 s.append(commitTime); 636 s.append(' '); 637 appendCoreFlags(s); 638 return s.toString(); 639 } 640 }