1 /*
2 * Copyright (C) 2008-2009, Google Inc.
3 * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org>
4 * and other copyright owners as documented in the project's IP log.
5 *
6 * This program and the accompanying materials are made available
7 * under the terms of the Eclipse Distribution License v1.0 which
8 * accompanies this distribution, is reproduced below, and is
9 * available at http://www.eclipse.org/org/documents/edl-v10.php
10 *
11 * All rights reserved.
12 *
13 * Redistribution and use in source and binary forms, with or
14 * without modification, are permitted provided that the following
15 * conditions are met:
16 *
17 * - Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 *
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials provided
23 * with the distribution.
24 *
25 * - Neither the name of the Eclipse Foundation, Inc. nor the
26 * names of its contributors may be used to endorse or promote
27 * products derived from this software without specific prior
28 * written permission.
29 *
30 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
31 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
32 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
33 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
35 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
36 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
37 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
38 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
39 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
40 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
41 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
42 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 */
44
45 package org.eclipse.jgit.revwalk;
46
47 import static org.eclipse.jgit.lib.Constants.CHARSET;
48
49 import java.io.IOException;
50 import java.nio.charset.Charset;
51 import java.nio.charset.IllegalCharsetNameException;
52 import java.nio.charset.UnsupportedCharsetException;
53 import java.util.ArrayList;
54 import java.util.Collections;
55 import java.util.List;
56
57 import org.eclipse.jgit.annotations.Nullable;
58 import org.eclipse.jgit.errors.IncorrectObjectTypeException;
59 import org.eclipse.jgit.errors.MissingObjectException;
60 import org.eclipse.jgit.lib.AnyObjectId;
61 import org.eclipse.jgit.lib.Constants;
62 import org.eclipse.jgit.lib.MutableObjectId;
63 import org.eclipse.jgit.lib.ObjectInserter;
64 import org.eclipse.jgit.lib.ObjectReader;
65 import org.eclipse.jgit.lib.PersonIdent;
66 import org.eclipse.jgit.util.RawParseUtils;
67 import org.eclipse.jgit.util.StringUtils;
68
69 /**
70 * A commit reference to a commit in the DAG.
71 */
72 public class RevCommit extends RevObject {
73 private static final int STACK_DEPTH = 500;
74
75 /**
76 * Parse a commit from its canonical format.
77 *
78 * This method constructs a temporary revision pool, parses the commit as
79 * supplied, and returns it to the caller. Since the commit was built inside
80 * of a private revision pool its parent pointers will be initialized, but
81 * will not have their headers loaded.
82 *
83 * Applications are discouraged from using this API. Callers usually need
84 * more than one commit. Use
85 * {@link org.eclipse.jgit.revwalk.RevWalk#parseCommit(AnyObjectId)} to
86 * obtain a RevCommit from an existing repository.
87 *
88 * @param raw
89 * the canonical formatted commit to be parsed.
90 * @return the parsed commit, in an isolated revision pool that is not
91 * available to the caller.
92 */
93 public static RevCommit parse(byte[] raw) {
94 try {
95 return parse(new RevWalk((ObjectReader) null), raw);
96 } catch (IOException ex) {
97 throw new RuntimeException(ex);
98 }
99 }
100
101 /**
102 * Parse a commit from its canonical format.
103 * <p>
104 * This method inserts the commit directly into the caller supplied revision
105 * pool, making it appear as though the commit exists in the repository,
106 * even if it doesn't. The repository under the pool is not affected.
107 * <p>
108 * The body of the commit (message, author, committer) is always retained in
109 * the returned {@code RevCommit}, even if the supplied {@code RevWalk} has
110 * been configured with {@code setRetainBody(false)}.
111 *
112 * @param rw
113 * the revision pool to allocate the commit within. The commit's
114 * tree and parent pointers will be obtained from this pool.
115 * @param raw
116 * the canonical formatted commit to be parsed. This buffer will
117 * be retained by the returned {@code RevCommit} and must not be
118 * modified by the caller.
119 * @return the parsed commit, in an isolated revision pool that is not
120 * available to the caller.
121 * @throws java.io.IOException
122 * in case of RevWalk initialization fails
123 */
124 public static RevCommit parse(RevWalk rw, byte[] raw) throws IOException {
125 try (ObjectInserter.Formatter fmt = new ObjectInserter.Formatter()) {
126 RevCommit r = rw.lookupCommit(fmt.idFor(Constants.OBJ_COMMIT, raw));
127 r.parseCanonical(rw, raw);
128 r.buffer = raw;
129 return r;
130 }
131 }
132
133 static final RevCommit[] NO_PARENTS = {};
134
135 private RevTree tree;
136
137 RevCommit[] parents;
138
139 int commitTime; // An int here for performance, overflows in 2038
140
141 int inDegree;
142
143 private byte[] buffer;
144
145 /**
146 * Create a new commit reference.
147 *
148 * @param id
149 * object name for the commit.
150 */
151 protected RevCommit(AnyObjectId id) {
152 super(id);
153 }
154
155 @Override
156 void parseHeaders(RevWalk walk) throws MissingObjectException,
157 IncorrectObjectTypeException, IOException {
158 parseCanonical(walk, walk.getCachedBytes(this));
159 }
160
161 @Override
162 void parseBody(RevWalk walk) throws MissingObjectException,
163 IncorrectObjectTypeException, IOException {
164 if (buffer == null) {
165 buffer = walk.getCachedBytes(this);
166 if ((flags & PARSED) == 0)
167 parseCanonical(walk, buffer);
168 }
169 }
170
171 void parseCanonical(RevWalk walk, byte[] raw)
172 throws IOException {
173 if (!walk.shallowCommitsInitialized)
174 walk.initializeShallowCommits();
175
176 final MutableObjectId idBuffer = walk.idBuffer;
177 idBuffer.fromString(raw, 5);
178 tree = walk.lookupTree(idBuffer);
179
180 int ptr = 46;
181 if (parents == null) {
182 RevCommit[] pList = new RevCommit[1];
183 int nParents = 0;
184 for (;;) {
185 if (raw[ptr] != 'p')
186 break;
187 idBuffer.fromString(raw, ptr + 7);
188 final RevCommit p = walk.lookupCommit(idBuffer);
189 if (nParents == 0)
190 pList[nParents++] = p;
191 else if (nParents == 1) {
192 pList = new RevCommit[] { pList[0], p };
193 nParents = 2;
194 } else {
195 if (pList.length <= nParents) {
196 RevCommit[] old = pList;
197 pList = new RevCommit[pList.length + 32];
198 System.arraycopy(old, 0, pList, 0, nParents);
199 }
200 pList[nParents++] = p;
201 }
202 ptr += 48;
203 }
204 if (nParents != pList.length) {
205 RevCommit[] old = pList;
206 pList = new RevCommit[nParents];
207 System.arraycopy(old, 0, pList, 0, nParents);
208 }
209 parents = pList;
210 }
211
212 // extract time from "committer "
213 ptr = RawParseUtils.committer(raw, ptr);
214 if (ptr > 0) {
215 ptr = RawParseUtils.nextLF(raw, ptr, '>');
216
217 // In 2038 commitTime will overflow unless it is changed to long.
218 commitTime = RawParseUtils.parseBase10(raw, ptr, null);
219 }
220
221 if (walk.isRetainBody())
222 buffer = raw;
223 flags |= PARSED;
224 }
225
226 /** {@inheritDoc} */
227 @Override
228 public final int getType() {
229 return Constants.OBJ_COMMIT;
230 }
231
232 static void carryFlags(RevCommit c, int carry) {
233 FIFORevQueue q = carryFlags1(c, carry, 0);
234 if (q != null)
235 slowCarryFlags(q, carry);
236 }
237
238 private static FIFORevQueue carryFlags1(RevCommit c, int carry, int depth) {
239 for(;;) {
240 RevCommit[] pList = c.parents;
241 if (pList == null || pList.length == 0)
242 return null;
243 if (pList.length != 1) {
244 if (depth == STACK_DEPTH)
245 return defer(c);
246 for (int i = 1; i < pList.length; i++) {
247 RevCommit p = pList[i];
248 if ((p.flags & carry) == carry)
249 continue;
250 p.flags |= carry;
251 FIFORevQueue q = carryFlags1(p, carry, depth + 1);
252 if (q != null)
253 return defer(q, carry, pList, i + 1);
254 }
255 }
256
257 c = pList[0];
258 if ((c.flags & carry) == carry)
259 return null;
260 c.flags |= carry;
261 }
262 }
263
264 private static FIFORevQueue defer(RevCommit c) {
265 FIFORevQueue q = new FIFORevQueue();
266 q.add(c);
267 return q;
268 }
269
270 private static FIFORevQueue defer(FIFORevQueue q, int carry,
271 RevCommit[] pList, int i) {
272 // In normal case the caller will run pList[0] in a tail recursive
273 // fashion by updating the variable. However the caller is unwinding
274 // the stack and will skip that pList[0] execution step.
275 carryOneStep(q, carry, pList[0]);
276
277 // Remaining parents (if any) need to have flags checked and be
278 // enqueued if they have ancestors.
279 for (; i < pList.length; i++)
280 carryOneStep(q, carry, pList[i]);
281 return q;
282 }
283
284 private static void slowCarryFlags(FIFORevQueue q, int carry) {
285 // Commits in q have non-null parent arrays and have set all
286 // flags in carry. This loop finishes copying over the graph.
287 for (RevCommit c; (c = q.next()) != null;) {
288 for (RevCommit p : c.parents)
289 carryOneStep(q, carry, p);
290 }
291 }
292
293 private static void carryOneStep(FIFORevQueue q, int carry, RevCommit c) {
294 if ((c.flags & carry) != carry) {
295 c.flags |= carry;
296 if (c.parents != null)
297 q.add(c);
298 }
299 }
300
301 /**
302 * Carry a RevFlag set on this commit to its parents.
303 * <p>
304 * If this commit is parsed, has parents, and has the supplied flag set on
305 * it we automatically add it to the parents, grand-parents, and so on until
306 * an unparsed commit or a commit with no parents is discovered. This
307 * permits applications to force a flag through the history chain when
308 * necessary.
309 *
310 * @param flag
311 * the single flag value to carry back onto parents.
312 */
313 public void carry(RevFlag flag) {
314 final int carry = flags & flag.mask;
315 if (carry != 0)
316 carryFlags(this, carry);
317 }
318
319 /**
320 * Time from the "committer " line of the buffer.
321 *
322 * @return commit time
323 */
324 public final int getCommitTime() {
325 return commitTime;
326 }
327
328 /**
329 * Get a reference to this commit's tree.
330 *
331 * @return tree of this commit.
332 */
333 public final RevTree getTree() {
334 return tree;
335 }
336
337 /**
338 * Get the number of parent commits listed in this commit.
339 *
340 * @return number of parents; always a positive value but can be 0.
341 */
342 public final int getParentCount() {
343 return parents.length;
344 }
345
346 /**
347 * Get the nth parent from this commit's parent list.
348 *
349 * @param nth
350 * parent index to obtain. Must be in the range 0 through
351 * {@link #getParentCount()}-1.
352 * @return the specified parent.
353 * @throws java.lang.ArrayIndexOutOfBoundsException
354 * an invalid parent index was specified.
355 */
356 public final RevCommit getParent(int nth) {
357 return parents[nth];
358 }
359
360 /**
361 * Obtain an array of all parents (<b>NOTE - THIS IS NOT A COPY</b>).
362 * <p>
363 * This method is exposed only to provide very fast, efficient access to
364 * this commit's parent list. Applications relying on this list should be
365 * very careful to ensure they do not modify its contents during their use
366 * of it.
367 *
368 * @return the array of parents.
369 */
370 public final RevCommit[] getParents() {
371 return parents;
372 }
373
374 /**
375 * Obtain the raw unparsed commit body (<b>NOTE - THIS IS NOT A COPY</b>).
376 * <p>
377 * This method is exposed only to provide very fast, efficient access to
378 * this commit's message buffer within a RevFilter. Applications relying on
379 * this buffer should be very careful to ensure they do not modify its
380 * contents during their use of it.
381 *
382 * @return the raw unparsed commit body. This is <b>NOT A COPY</b>.
383 * Altering the contents of this buffer may alter the walker's
384 * knowledge of this commit, and the results it produces.
385 */
386 public final byte[] getRawBuffer() {
387 return buffer;
388 }
389
390 /**
391 * Parse the author identity from the raw buffer.
392 * <p>
393 * This method parses and returns the content of the author line, after
394 * taking the commit's character set into account and decoding the author
395 * name and email address. This method is fairly expensive and produces a
396 * new PersonIdent instance on each invocation. Callers should invoke this
397 * method only if they are certain they will be outputting the result, and
398 * should cache the return value for as long as necessary to use all
399 * information from it.
400 * <p>
401 * RevFilter implementations should try to use
402 * {@link org.eclipse.jgit.util.RawParseUtils} to scan the
403 * {@link #getRawBuffer()} instead, as this will allow faster evaluation of
404 * commits.
405 *
406 * @return identity of the author (name, email) and the time the commit was
407 * made by the author; null if no author line was found.
408 */
409 public final PersonIdent getAuthorIdent() {
410 final byte[] raw = buffer;
411 final int nameB = RawParseUtils.author(raw, 0);
412 if (nameB < 0)
413 return null;
414 return RawParseUtils.parsePersonIdent(raw, nameB);
415 }
416
417 /**
418 * Parse the committer identity from the raw buffer.
419 * <p>
420 * This method parses and returns the content of the committer line, after
421 * taking the commit's character set into account and decoding the committer
422 * name and email address. This method is fairly expensive and produces a
423 * new PersonIdent instance on each invocation. Callers should invoke this
424 * method only if they are certain they will be outputting the result, and
425 * should cache the return value for as long as necessary to use all
426 * information from it.
427 * <p>
428 * RevFilter implementations should try to use
429 * {@link org.eclipse.jgit.util.RawParseUtils} to scan the
430 * {@link #getRawBuffer()} instead, as this will allow faster evaluation of
431 * commits.
432 *
433 * @return identity of the committer (name, email) and the time the commit
434 * was made by the committer; null if no committer line was found.
435 */
436 public final PersonIdent getCommitterIdent() {
437 final byte[] raw = buffer;
438 final int nameB = RawParseUtils.committer(raw, 0);
439 if (nameB < 0)
440 return null;
441 return RawParseUtils.parsePersonIdent(raw, nameB);
442 }
443
444 /**
445 * Parse the complete commit message and decode it to a string.
446 * <p>
447 * This method parses and returns the message portion of the commit buffer,
448 * after taking the commit's character set into account and decoding the
449 * buffer using that character set. This method is a fairly expensive
450 * operation and produces a new string on each invocation.
451 *
452 * @return decoded commit message as a string. Never null.
453 */
454 public final String getFullMessage() {
455 byte[] raw = buffer;
456 int msgB = RawParseUtils.commitMessage(raw, 0);
457 if (msgB < 0) {
458 return ""; //$NON-NLS-1$
459 }
460 return RawParseUtils.decode(guessEncoding(), raw, msgB, raw.length);
461 }
462
463 /**
464 * Parse the commit message and return the first "line" of it.
465 * <p>
466 * The first line is everything up to the first pair of LFs. This is the
467 * "oneline" format, suitable for output in a single line display.
468 * <p>
469 * This method parses and returns the message portion of the commit buffer,
470 * after taking the commit's character set into account and decoding the
471 * buffer using that character set. This method is a fairly expensive
472 * operation and produces a new string on each invocation.
473 *
474 * @return decoded commit message as a string. Never null. The returned
475 * string does not contain any LFs, even if the first paragraph
476 * spanned multiple lines. Embedded LFs are converted to spaces.
477 */
478 public final String getShortMessage() {
479 byte[] raw = buffer;
480 int msgB = RawParseUtils.commitMessage(raw, 0);
481 if (msgB < 0) {
482 return ""; //$NON-NLS-1$
483 }
484
485 int msgE = RawParseUtils.endOfParagraph(raw, msgB);
486 String str = RawParseUtils.decode(guessEncoding(), raw, msgB, msgE);
487 if (hasLF(raw, msgB, msgE)) {
488 str = StringUtils.replaceLineBreaksWithSpace(str);
489 }
490 return str;
491 }
492
493 static boolean hasLF(byte[] r, int b, int e) {
494 while (b < e)
495 if (r[b++] == '\n')
496 return true;
497 return false;
498 }
499
500 /**
501 * Determine the encoding of the commit message buffer.
502 * <p>
503 * Locates the "encoding" header (if present) and returns its value. Due to
504 * corruption in the wild this may be an invalid encoding name that is not
505 * recognized by any character encoding library.
506 * <p>
507 * If no encoding header is present, null.
508 *
509 * @return the preferred encoding of {@link #getRawBuffer()}; or null.
510 * @since 4.2
511 */
512 @Nullable
513 public final String getEncodingName() {
514 return RawParseUtils.parseEncodingName(buffer);
515 }
516
517 /**
518 * Determine the encoding of the commit message buffer.
519 * <p>
520 * Locates the "encoding" header (if present) and then returns the proper
521 * character set to apply to this buffer to evaluate its contents as
522 * character data.
523 * <p>
524 * If no encoding header is present {@code UTF-8} is assumed.
525 *
526 * @return the preferred encoding of {@link #getRawBuffer()}.
527 * @throws IllegalCharsetNameException
528 * if the character set requested by the encoding header is
529 * malformed and unsupportable.
530 * @throws UnsupportedCharsetException
531 * if the JRE does not support the character set requested by
532 * the encoding header.
533 */
534 public final Charset getEncoding() {
535 return RawParseUtils.parseEncoding(buffer);
536 }
537
538 private Charset guessEncoding() {
539 try {
540 return getEncoding();
541 } catch (IllegalCharsetNameException | UnsupportedCharsetException e) {
542 return CHARSET;
543 }
544 }
545
546 /**
547 * Parse the footer lines (e.g. "Signed-off-by") for machine processing.
548 * <p>
549 * This method splits all of the footer lines out of the last paragraph of
550 * the commit message, providing each line as a key-value pair, ordered by
551 * the order of the line's appearance in the commit message itself.
552 * <p>
553 * A footer line's key must match the pattern {@code ^[A-Za-z0-9-]+:}, while
554 * the value is free-form, but must not contain an LF. Very common keys seen
555 * in the wild are:
556 * <ul>
557 * <li>{@code Signed-off-by} (agrees to Developer Certificate of Origin)
558 * <li>{@code Acked-by} (thinks change looks sane in context)
559 * <li>{@code Reported-by} (originally found the issue this change fixes)
560 * <li>{@code Tested-by} (validated change fixes the issue for them)
561 * <li>{@code CC}, {@code Cc} (copy on all email related to this change)
562 * <li>{@code Bug} (link to project's bug tracking system)
563 * </ul>
564 *
565 * @return ordered list of footer lines; empty list if no footers found.
566 */
567 public final List<FooterLine> getFooterLines() {
568 final byte[] raw = buffer;
569 int ptr = raw.length - 1;
570 while (raw[ptr] == '\n') // trim any trailing LFs, not interesting
571 ptr--;
572
573 final int msgB = RawParseUtils.commitMessage(raw, 0);
574 final ArrayList<FooterLine> r = new ArrayList<>(4);
575 final Charset enc = guessEncoding();
576 for (;;) {
577 ptr = RawParseUtils.prevLF(raw, ptr);
578 if (ptr <= msgB)
579 break; // Don't parse commit headers as footer lines.
580
581 final int keyStart = ptr + 2;
582 if (raw[keyStart] == '\n')
583 break; // Stop at first paragraph break, no footers above it.
584
585 final int keyEnd = RawParseUtils.endOfFooterLineKey(raw, keyStart);
586 if (keyEnd < 0)
587 continue; // Not a well formed footer line, skip it.
588
589 // Skip over the ': *' at the end of the key before the value.
590 //
591 int valStart = keyEnd + 1;
592 while (valStart < raw.length && raw[valStart] == ' ')
593 valStart++;
594
595 // Value ends at the LF, and does not include it.
596 //
597 int valEnd = RawParseUtils.nextLF(raw, valStart);
598 if (raw[valEnd - 1] == '\n')
599 valEnd--;
600
601 r.add(new FooterLine(raw, enc, keyStart, keyEnd, valStart, valEnd));
602 }
603 Collections.reverse(r);
604 return r;
605 }
606
607 /**
608 * Get the values of all footer lines with the given key.
609 *
610 * @param keyName
611 * footer key to find values of, case insensitive.
612 * @return values of footers with key of {@code keyName}, ordered by their
613 * order of appearance. Duplicates may be returned if the same
614 * footer appeared more than once. Empty list if no footers appear
615 * with the specified key, or there are no footers at all.
616 * @see #getFooterLines()
617 */
618 public final List<String> getFooterLines(String keyName) {
619 return getFooterLines(new FooterKey(keyName));
620 }
621
622 /**
623 * Get the values of all footer lines with the given key.
624 *
625 * @param keyName
626 * footer key to find values of, case insensitive.
627 * @return values of footers with key of {@code keyName}, ordered by their
628 * order of appearance. Duplicates may be returned if the same
629 * footer appeared more than once. Empty list if no footers appear
630 * with the specified key, or there are no footers at all.
631 * @see #getFooterLines()
632 */
633 public final List<String> getFooterLines(FooterKey keyName) {
634 final List<FooterLine> src = getFooterLines();
635 if (src.isEmpty())
636 return Collections.emptyList();
637 final ArrayList<String> r = new ArrayList<>(src.size());
638 for (FooterLine f : src) {
639 if (f.matches(keyName))
640 r.add(f.getValue());
641 }
642 return r;
643 }
644
645 /**
646 * Reset this commit to allow another RevWalk with the same instances.
647 * <p>
648 * Subclasses <b>must</b> call <code>super.reset()</code> to ensure the
649 * basic information can be correctly cleared out.
650 */
651 public void reset() {
652 inDegree = 0;
653 }
654
655 /**
656 * Discard the message buffer to reduce memory usage.
657 * <p>
658 * After discarding the memory usage of the {@code RevCommit} is reduced to
659 * only the {@link #getTree()} and {@link #getParents()} pointers and the
660 * time in {@link #getCommitTime()}. Accessing other properties such as
661 * {@link #getAuthorIdent()}, {@link #getCommitterIdent()} or either message
662 * function requires reloading the buffer by invoking
663 * {@link org.eclipse.jgit.revwalk.RevWalk#parseBody(RevObject)}.
664 *
665 * @since 4.0
666 */
667 public final void disposeBody() {
668 buffer = null;
669 }
670
671 /** {@inheritDoc} */
672 @Override
673 public String toString() {
674 final StringBuilder s = new StringBuilder();
675 s.append(Constants.typeString(getType()));
676 s.append(' ');
677 s.append(name());
678 s.append(' ');
679 s.append(commitTime);
680 s.append(' ');
681 appendCoreFlags(s);
682 return s.toString();
683 }
684 }