1 /*
2 * Copyright (C) 2008-2009, Google Inc.
3 * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org> and others
4 *
5 * This program and the accompanying materials are made available under the
6 * terms of the Eclipse Distribution License v. 1.0 which is available at
7 * https://www.eclipse.org/org/documents/edl-v10.php.
8 *
9 * SPDX-License-Identifier: BSD-3-Clause
10 */
11
12 package org.eclipse.jgit.revwalk;
13
14 import static java.nio.charset.StandardCharsets.UTF_8;
15
16 import java.io.IOException;
17 import java.nio.charset.Charset;
18 import java.nio.charset.IllegalCharsetNameException;
19 import java.nio.charset.UnsupportedCharsetException;
20 import java.util.ArrayList;
21 import java.util.Arrays;
22 import java.util.Collections;
23 import java.util.List;
24
25 import org.eclipse.jgit.annotations.Nullable;
26 import org.eclipse.jgit.errors.IncorrectObjectTypeException;
27 import org.eclipse.jgit.errors.MissingObjectException;
28 import org.eclipse.jgit.lib.AnyObjectId;
29 import org.eclipse.jgit.lib.Constants;
30 import org.eclipse.jgit.lib.MutableObjectId;
31 import org.eclipse.jgit.lib.ObjectInserter;
32 import org.eclipse.jgit.lib.ObjectReader;
33 import org.eclipse.jgit.lib.PersonIdent;
34 import org.eclipse.jgit.util.RawParseUtils;
35 import org.eclipse.jgit.util.StringUtils;
36
37 /**
38 * A commit reference to a commit in the DAG.
39 */
40 public class RevCommit extends RevObject {
41 private static final int STACK_DEPTH = 500;
42
43 /**
44 * Parse a commit from its canonical format.
45 *
46 * This method constructs a temporary revision pool, parses the commit as
47 * supplied, and returns it to the caller. Since the commit was built inside
48 * of a private revision pool its parent pointers will be initialized, but
49 * will not have their headers loaded.
50 *
51 * Applications are discouraged from using this API. Callers usually need
52 * more than one commit. Use
53 * {@link org.eclipse.jgit.revwalk.RevWalk#parseCommit(AnyObjectId)} to
54 * obtain a RevCommit from an existing repository.
55 *
56 * @param raw
57 * the canonical formatted commit to be parsed.
58 * @return the parsed commit, in an isolated revision pool that is not
59 * available to the caller.
60 */
61 public static RevCommit parse(byte[] raw) {
62 try {
63 return parse(new RevWalk((ObjectReader) null), raw);
64 } catch (IOException ex) {
65 throw new RuntimeException(ex);
66 }
67 }
68
69 /**
70 * Parse a commit from its canonical format.
71 * <p>
72 * This method inserts the commit directly into the caller supplied revision
73 * pool, making it appear as though the commit exists in the repository,
74 * even if it doesn't. The repository under the pool is not affected.
75 * <p>
76 * The body of the commit (message, author, committer) is always retained in
77 * the returned {@code RevCommit}, even if the supplied {@code RevWalk} has
78 * been configured with {@code setRetainBody(false)}.
79 *
80 * @param rw
81 * the revision pool to allocate the commit within. The commit's
82 * tree and parent pointers will be obtained from this pool.
83 * @param raw
84 * the canonical formatted commit to be parsed. This buffer will
85 * be retained by the returned {@code RevCommit} and must not be
86 * modified by the caller.
87 * @return the parsed commit, in an isolated revision pool that is not
88 * available to the caller.
89 * @throws java.io.IOException
90 * in case of RevWalk initialization fails
91 */
92 public static RevCommit parse(RevWalk rw, byte[] raw) throws IOException {
93 try (ObjectInserter.Formatter fmt = new ObjectInserter.Formatter()) {
94 RevCommit r = rw.lookupCommit(fmt.idFor(Constants.OBJ_COMMIT, raw));
95 r.parseCanonical(rw, raw);
96 r.buffer = raw;
97 return r;
98 }
99 }
100
101 static final RevCommit[] NO_PARENTS = {};
102
103 private RevTree tree;
104
105 RevCommit[] parents;
106
107 int commitTime; // An int here for performance, overflows in 2038
108
109 int inDegree;
110
111 private byte[] buffer;
112
113 /**
114 * Create a new commit reference.
115 *
116 * @param id
117 * object name for the commit.
118 */
119 protected RevCommit(AnyObjectId id) {
120 super(id);
121 }
122
123 @Override
124 void parseHeaders(RevWalk walk) throws MissingObjectException,
125 IncorrectObjectTypeException, IOException {
126 parseCanonical(walk, walk.getCachedBytes(this));
127 }
128
129 @Override
130 void parseBody(RevWalk walk) throws MissingObjectException,
131 IncorrectObjectTypeException, IOException {
132 if (buffer == null) {
133 buffer = walk.getCachedBytes(this);
134 if ((flags & PARSED) == 0)
135 parseCanonical(walk, buffer);
136 }
137 }
138
139 void parseCanonical(RevWalk walk, byte[] raw) throws IOException {
140 if (!walk.shallowCommitsInitialized) {
141 walk.initializeShallowCommits(this);
142 }
143
144 final MutableObjectId idBuffer = walk.idBuffer;
145 idBuffer.fromString(raw, 5);
146 tree = walk.lookupTree(idBuffer);
147
148 int ptr = 46;
149 if (parents == null) {
150 RevCommit[] pList = new RevCommit[1];
151 int nParents = 0;
152 for (;;) {
153 if (raw[ptr] != 'p') {
154 break;
155 }
156 idBuffer.fromString(raw, ptr + 7);
157 final RevCommit p = walk.lookupCommit(idBuffer);
158 switch (nParents) {
159 case 0:
160 pList[nParents++] = p;
161 break;
162 case 1:
163 pList = new RevCommit[] { pList[0], p };
164 nParents = 2;
165 break;
166 default:
167 if (pList.length <= nParents) {
168 RevCommit[] old = pList;
169 pList = new RevCommit[pList.length + 32];
170 System.arraycopy(old, 0, pList, 0, nParents);
171 }
172 pList[nParents++] = p;
173 break;
174 }
175 ptr += 48;
176 }
177 if (nParents != pList.length) {
178 RevCommit[] old = pList;
179 pList = new RevCommit[nParents];
180 System.arraycopy(old, 0, pList, 0, nParents);
181 }
182 parents = pList;
183 }
184
185 // extract time from "committer "
186 ptr = RawParseUtils.committer(raw, ptr);
187 if (ptr > 0) {
188 ptr = RawParseUtils.nextLF(raw, ptr, '>');
189
190 // In 2038 commitTime will overflow unless it is changed to long.
191 commitTime = RawParseUtils.parseBase10(raw, ptr, null);
192 }
193
194 if (walk.isRetainBody()) {
195 buffer = raw;
196 }
197 flags |= PARSED;
198 }
199
200 /** {@inheritDoc} */
201 @Override
202 public final int getType() {
203 return Constants.OBJ_COMMIT;
204 }
205
206 static void carryFlags(RevCommit c, int carry) {
207 FIFORevQueue q = carryFlags1(c, carry, 0);
208 if (q != null)
209 slowCarryFlags(q, carry);
210 }
211
212 private static FIFORevQueue carryFlags1(RevCommit c, int carry, int depth) {
213 for(;;) {
214 RevCommit[] pList = c.parents;
215 if (pList == null || pList.length == 0)
216 return null;
217 if (pList.length != 1) {
218 if (depth == STACK_DEPTH)
219 return defer(c);
220 for (int i = 1; i < pList.length; i++) {
221 RevCommit p = pList[i];
222 if ((p.flags & carry) == carry)
223 continue;
224 p.flags |= carry;
225 FIFORevQueue q = carryFlags1(p, carry, depth + 1);
226 if (q != null)
227 return defer(q, carry, pList, i + 1);
228 }
229 }
230
231 c = pList[0];
232 if ((c.flags & carry) == carry)
233 return null;
234 c.flags |= carry;
235 }
236 }
237
238 private static FIFORevQueue defer(RevCommit c) {
239 FIFORevQueue q = new FIFORevQueue();
240 q.add(c);
241 return q;
242 }
243
244 private static FIFORevQueue/../../../org/eclipse/jgit/revwalk/FIFORevQueue.html#FIFORevQueue">FIFORevQueue defer(FIFORevQueue q, int carry,
245 RevCommit[] pList, int i) {
246 // In normal case the caller will run pList[0] in a tail recursive
247 // fashion by updating the variable. However the caller is unwinding
248 // the stack and will skip that pList[0] execution step.
249 carryOneStep(q, carry, pList[0]);
250
251 // Remaining parents (if any) need to have flags checked and be
252 // enqueued if they have ancestors.
253 for (; i < pList.length; i++)
254 carryOneStep(q, carry, pList[i]);
255 return q;
256 }
257
258 private static void slowCarryFlags(FIFORevQueue q, int carry) {
259 // Commits in q have non-null parent arrays and have set all
260 // flags in carry. This loop finishes copying over the graph.
261 for (RevCommit c; (c = q.next()) != null;) {
262 for (RevCommit p : c.parents)
263 carryOneStep(q, carry, p);
264 }
265 }
266
267 private static void carryOneStep(FIFORevQueue q, int carry, RevCommit c) {
268 if ((c.flags & carry) != carry) {
269 c.flags |= carry;
270 if (c.parents != null)
271 q.add(c);
272 }
273 }
274
275 /**
276 * Carry a RevFlag set on this commit to its parents.
277 * <p>
278 * If this commit is parsed, has parents, and has the supplied flag set on
279 * it we automatically add it to the parents, grand-parents, and so on until
280 * an unparsed commit or a commit with no parents is discovered. This
281 * permits applications to force a flag through the history chain when
282 * necessary.
283 *
284 * @param flag
285 * the single flag value to carry back onto parents.
286 */
287 public void carry(RevFlag flag) {
288 final int carry = flags & flag.mask;
289 if (carry != 0)
290 carryFlags(this, carry);
291 }
292
293 /**
294 * Time from the "committer " line of the buffer.
295 *
296 * @return commit time
297 */
298 public final int getCommitTime() {
299 return commitTime;
300 }
301
302 /**
303 * Get a reference to this commit's tree.
304 *
305 * @return tree of this commit.
306 */
307 public final RevTree getTree() {
308 return tree;
309 }
310
311 /**
312 * Get the number of parent commits listed in this commit.
313 *
314 * @return number of parents; always a positive value but can be 0.
315 */
316 public final int getParentCount() {
317 return parents.length;
318 }
319
320 /**
321 * Get the nth parent from this commit's parent list.
322 *
323 * @param nth
324 * parent index to obtain. Must be in the range 0 through
325 * {@link #getParentCount()}-1.
326 * @return the specified parent.
327 * @throws java.lang.ArrayIndexOutOfBoundsException
328 * an invalid parent index was specified.
329 */
330 public final RevCommit getParent(int nth) {
331 return parents[nth];
332 }
333
334 /**
335 * Obtain an array of all parents (<b>NOTE - THIS IS NOT A COPY</b>).
336 * <p>
337 * This method is exposed only to provide very fast, efficient access to
338 * this commit's parent list. Applications relying on this list should be
339 * very careful to ensure they do not modify its contents during their use
340 * of it.
341 *
342 * @return the array of parents.
343 */
344 public final RevCommit[] getParents() {
345 return parents;
346 }
347
348 /**
349 * Obtain the raw unparsed commit body (<b>NOTE - THIS IS NOT A COPY</b>).
350 * <p>
351 * This method is exposed only to provide very fast, efficient access to
352 * this commit's message buffer within a RevFilter. Applications relying on
353 * this buffer should be very careful to ensure they do not modify its
354 * contents during their use of it.
355 *
356 * @return the raw unparsed commit body. This is <b>NOT A COPY</b>.
357 * Altering the contents of this buffer may alter the walker's
358 * knowledge of this commit, and the results it produces.
359 */
360 public final byte[] getRawBuffer() {
361 return buffer;
362 }
363
364 /**
365 * Parse the gpg signature from the raw buffer.
366 * <p>
367 * This method parses and returns the raw content of the gpgsig lines. This
368 * method is fairly expensive and produces a new byte[] instance on each
369 * invocation. Callers should invoke this method only if they are certain
370 * they will need, and should cache the return value for as long as
371 * necessary to use all information from it.
372 * <p>
373 * RevFilter implementations should try to use
374 * {@link org.eclipse.jgit.util.RawParseUtils} to scan the
375 * {@link #getRawBuffer()} instead, as this will allow faster evaluation of
376 * commits.
377 *
378 * @return contents of the gpg signature; null if the commit was not signed.
379 * @since 5.1
380 */
381 public final byte[] getRawGpgSignature() {
382 final byte[] raw = buffer;
383 final byte[] header = {'g', 'p', 'g', 's', 'i', 'g'};
384 final int start = RawParseUtils.headerStart(header, raw, 0);
385 if (start < 0) {
386 return null;
387 }
388 final int end = RawParseUtils.headerEnd(raw, start);
389 return Arrays.copyOfRange(raw, start, end);
390 }
391
392 /**
393 * Parse the author identity from the raw buffer.
394 * <p>
395 * This method parses and returns the content of the author line, after
396 * taking the commit's character set into account and decoding the author
397 * name and email address. This method is fairly expensive and produces a
398 * new PersonIdent instance on each invocation. Callers should invoke this
399 * method only if they are certain they will be outputting the result, and
400 * should cache the return value for as long as necessary to use all
401 * information from it.
402 * <p>
403 * RevFilter implementations should try to use
404 * {@link org.eclipse.jgit.util.RawParseUtils} to scan the
405 * {@link #getRawBuffer()} instead, as this will allow faster evaluation of
406 * commits.
407 *
408 * @return identity of the author (name, email) and the time the commit was
409 * made by the author; null if no author line was found.
410 */
411 public final PersonIdent getAuthorIdent() {
412 final byte[] raw = buffer;
413 final int nameB = RawParseUtils.author(raw, 0);
414 if (nameB < 0)
415 return null;
416 return RawParseUtils.parsePersonIdent(raw, nameB);
417 }
418
419 /**
420 * Parse the committer identity from the raw buffer.
421 * <p>
422 * This method parses and returns the content of the committer line, after
423 * taking the commit's character set into account and decoding the committer
424 * name and email address. This method is fairly expensive and produces a
425 * new PersonIdent instance on each invocation. Callers should invoke this
426 * method only if they are certain they will be outputting the result, and
427 * should cache the return value for as long as necessary to use all
428 * information from it.
429 * <p>
430 * RevFilter implementations should try to use
431 * {@link org.eclipse.jgit.util.RawParseUtils} to scan the
432 * {@link #getRawBuffer()} instead, as this will allow faster evaluation of
433 * commits.
434 *
435 * @return identity of the committer (name, email) and the time the commit
436 * was made by the committer; null if no committer line was found.
437 */
438 public final PersonIdent getCommitterIdent() {
439 final byte[] raw = buffer;
440 final int nameB = RawParseUtils.committer(raw, 0);
441 if (nameB < 0)
442 return null;
443 return RawParseUtils.parsePersonIdent(raw, nameB);
444 }
445
446 /**
447 * Parse the complete commit message and decode it to a string.
448 * <p>
449 * This method parses and returns the message portion of the commit buffer,
450 * after taking the commit's character set into account and decoding the
451 * buffer using that character set. This method is a fairly expensive
452 * operation and produces a new string on each invocation.
453 *
454 * @return decoded commit message as a string. Never null.
455 */
456 public final String getFullMessage() {
457 byte[] raw = buffer;
458 int msgB = RawParseUtils.commitMessage(raw, 0);
459 if (msgB < 0) {
460 return ""; //$NON-NLS-1$
461 }
462 return RawParseUtils.decode(guessEncoding(), raw, msgB, raw.length);
463 }
464
465 /**
466 * Parse the commit message and return the first "line" of it.
467 * <p>
468 * The first line is everything up to the first pair of LFs. This is the
469 * "oneline" format, suitable for output in a single line display.
470 * <p>
471 * This method parses and returns the message portion of the commit buffer,
472 * after taking the commit's character set into account and decoding the
473 * buffer using that character set. This method is a fairly expensive
474 * operation and produces a new string on each invocation.
475 *
476 * @return decoded commit message as a string. Never null. The returned
477 * string does not contain any LFs, even if the first paragraph
478 * spanned multiple lines. Embedded LFs are converted to spaces.
479 */
480 public final String getShortMessage() {
481 byte[] raw = buffer;
482 int msgB = RawParseUtils.commitMessage(raw, 0);
483 if (msgB < 0) {
484 return ""; //$NON-NLS-1$
485 }
486
487 int msgE = RawParseUtils.endOfParagraph(raw, msgB);
488 String str = RawParseUtils.decode(guessEncoding(), raw, msgB, msgE);
489 if (hasLF(raw, msgB, msgE)) {
490 str = StringUtils.replaceLineBreaksWithSpace(str);
491 }
492 return str;
493 }
494
495 static boolean hasLF(byte[] r, int b, int e) {
496 while (b < e)
497 if (r[b++] == '\n')
498 return true;
499 return false;
500 }
501
502 /**
503 * Determine the encoding of the commit message buffer.
504 * <p>
505 * Locates the "encoding" header (if present) and returns its value. Due to
506 * corruption in the wild this may be an invalid encoding name that is not
507 * recognized by any character encoding library.
508 * <p>
509 * If no encoding header is present, null.
510 *
511 * @return the preferred encoding of {@link #getRawBuffer()}; or null.
512 * @since 4.2
513 */
514 @Nullable
515 public final String getEncodingName() {
516 return RawParseUtils.parseEncodingName(buffer);
517 }
518
519 /**
520 * Determine the encoding of the commit message buffer.
521 * <p>
522 * Locates the "encoding" header (if present) and then returns the proper
523 * character set to apply to this buffer to evaluate its contents as
524 * character data.
525 * <p>
526 * If no encoding header is present {@code UTF-8} is assumed.
527 *
528 * @return the preferred encoding of {@link #getRawBuffer()}.
529 * @throws IllegalCharsetNameException
530 * if the character set requested by the encoding header is
531 * malformed and unsupportable.
532 * @throws UnsupportedCharsetException
533 * if the JRE does not support the character set requested by
534 * the encoding header.
535 */
536 public final Charset getEncoding() {
537 return RawParseUtils.parseEncoding(buffer);
538 }
539
540 private Charset guessEncoding() {
541 try {
542 return getEncoding();
543 } catch (IllegalCharsetNameException | UnsupportedCharsetException e) {
544 return UTF_8;
545 }
546 }
547
548 /**
549 * Parse the footer lines (e.g. "Signed-off-by") for machine processing.
550 * <p>
551 * This method splits all of the footer lines out of the last paragraph of
552 * the commit message, providing each line as a key-value pair, ordered by
553 * the order of the line's appearance in the commit message itself.
554 * <p>
555 * A footer line's key must match the pattern {@code ^[A-Za-z0-9-]+:}, while
556 * the value is free-form, but must not contain an LF. Very common keys seen
557 * in the wild are:
558 * <ul>
559 * <li>{@code Signed-off-by} (agrees to Developer Certificate of Origin)
560 * <li>{@code Acked-by} (thinks change looks sane in context)
561 * <li>{@code Reported-by} (originally found the issue this change fixes)
562 * <li>{@code Tested-by} (validated change fixes the issue for them)
563 * <li>{@code CC}, {@code Cc} (copy on all email related to this change)
564 * <li>{@code Bug} (link to project's bug tracking system)
565 * </ul>
566 *
567 * @return ordered list of footer lines; empty list if no footers found.
568 */
569 public final List<FooterLine> getFooterLines() {
570 final byte[] raw = buffer;
571 int ptr = raw.length - 1;
572 while (raw[ptr] == '\n') // trim any trailing LFs, not interesting
573 ptr--;
574
575 final int msgB = RawParseUtils.commitMessage(raw, 0);
576 final ArrayList<FooterLine> r = new ArrayList<>(4);
577 final Charset enc = guessEncoding();
578 for (;;) {
579 ptr = RawParseUtils.prevLF(raw, ptr);
580 if (ptr <= msgB)
581 break; // Don't parse commit headers as footer lines.
582
583 final int keyStart = ptr + 2;
584 if (raw[keyStart] == '\n')
585 break; // Stop at first paragraph break, no footers above it.
586
587 final int keyEnd = RawParseUtils.endOfFooterLineKey(raw, keyStart);
588 if (keyEnd < 0)
589 continue; // Not a well formed footer line, skip it.
590
591 // Skip over the ': *' at the end of the key before the value.
592 //
593 int valStart = keyEnd + 1;
594 while (valStart < raw.length && raw[valStart] == ' ')
595 valStart++;
596
597 // Value ends at the LF, and does not include it.
598 //
599 int valEnd = RawParseUtils.nextLF(raw, valStart);
600 if (raw[valEnd - 1] == '\n')
601 valEnd--;
602
603 r.add(new FooterLine(raw, enc, keyStart, keyEnd, valStart, valEnd));
604 }
605 Collections.reverse(r);
606 return r;
607 }
608
609 /**
610 * Get the values of all footer lines with the given key.
611 *
612 * @param keyName
613 * footer key to find values of, case insensitive.
614 * @return values of footers with key of {@code keyName}, ordered by their
615 * order of appearance. Duplicates may be returned if the same
616 * footer appeared more than once. Empty list if no footers appear
617 * with the specified key, or there are no footers at all.
618 * @see #getFooterLines()
619 */
620 public final List<String> getFooterLines(String keyName) {
621 return getFooterLines(new FooterKey(keyName));
622 }
623
624 /**
625 * Get the values of all footer lines with the given key.
626 *
627 * @param keyName
628 * footer key to find values of, case insensitive.
629 * @return values of footers with key of {@code keyName}, ordered by their
630 * order of appearance. Duplicates may be returned if the same
631 * footer appeared more than once. Empty list if no footers appear
632 * with the specified key, or there are no footers at all.
633 * @see #getFooterLines()
634 */
635 public final List<String> getFooterLines(FooterKey keyName) {
636 final List<FooterLine> src = getFooterLines();
637 if (src.isEmpty())
638 return Collections.emptyList();
639 final ArrayList<String> r = new ArrayList<>(src.size());
640 for (FooterLine f : src) {
641 if (f.matches(keyName))
642 r.add(f.getValue());
643 }
644 return r;
645 }
646
647 /**
648 * Reset this commit to allow another RevWalk with the same instances.
649 * <p>
650 * Subclasses <b>must</b> call <code>super.reset()</code> to ensure the
651 * basic information can be correctly cleared out.
652 */
653 public void reset() {
654 inDegree = 0;
655 }
656
657 /**
658 * Discard the message buffer to reduce memory usage.
659 * <p>
660 * After discarding the memory usage of the {@code RevCommit} is reduced to
661 * only the {@link #getTree()} and {@link #getParents()} pointers and the
662 * time in {@link #getCommitTime()}. Accessing other properties such as
663 * {@link #getAuthorIdent()}, {@link #getCommitterIdent()} or either message
664 * function requires reloading the buffer by invoking
665 * {@link org.eclipse.jgit.revwalk.RevWalk#parseBody(RevObject)}.
666 *
667 * @since 4.0
668 */
669 public final void disposeBody() {
670 buffer = null;
671 }
672
673 /** {@inheritDoc} */
674 @Override
675 public String toString() {
676 final StringBuilder s = new StringBuilder();
677 s.append(Constants.typeString(getType()));
678 s.append(' ');
679 s.append(name());
680 s.append(' ');
681 s.append(commitTime);
682 s.append(' ');
683 appendCoreFlags(s);
684 return s.toString();
685 }
686 }