1 /*
2 * Copyright (C) 2008-2009, Google Inc.
3 * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org>
4 * and other copyright owners as documented in the project's IP log.
5 *
6 * This program and the accompanying materials are made available
7 * under the terms of the Eclipse Distribution License v1.0 which
8 * accompanies this distribution, is reproduced below, and is
9 * available at http://www.eclipse.org/org/documents/edl-v10.php
10 *
11 * All rights reserved.
12 *
13 * Redistribution and use in source and binary forms, with or
14 * without modification, are permitted provided that the following
15 * conditions are met:
16 *
17 * - Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 *
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials provided
23 * with the distribution.
24 *
25 * - Neither the name of the Eclipse Foundation, Inc. nor the
26 * names of its contributors may be used to endorse or promote
27 * products derived from this software without specific prior
28 * written permission.
29 *
30 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
31 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
32 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
33 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
35 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
36 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
37 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
38 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
39 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
40 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
41 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
42 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 */
44
45 package org.eclipse.jgit.revwalk;
46
47 import static java.nio.charset.StandardCharsets.UTF_8;
48
49 import java.io.IOException;
50 import java.nio.charset.Charset;
51 import java.nio.charset.IllegalCharsetNameException;
52 import java.nio.charset.UnsupportedCharsetException;
53 import java.util.ArrayList;
54 import java.util.Collections;
55 import java.util.List;
56
57 import org.eclipse.jgit.annotations.Nullable;
58 import org.eclipse.jgit.errors.IncorrectObjectTypeException;
59 import org.eclipse.jgit.errors.MissingObjectException;
60 import org.eclipse.jgit.lib.AnyObjectId;
61 import org.eclipse.jgit.lib.Constants;
62 import org.eclipse.jgit.lib.MutableObjectId;
63 import org.eclipse.jgit.lib.ObjectInserter;
64 import org.eclipse.jgit.lib.ObjectReader;
65 import org.eclipse.jgit.lib.PersonIdent;
66 import org.eclipse.jgit.util.RawParseUtils;
67 import org.eclipse.jgit.util.StringUtils;
68
69 /** A commit reference to a commit in the DAG. */
70 public class RevCommit extends RevObject {
71 private static final int STACK_DEPTH = 500;
72
73 /**
74 * Parse a commit from its canonical format.
75 *
76 * This method constructs a temporary revision pool, parses the commit as
77 * supplied, and returns it to the caller. Since the commit was built inside
78 * of a private revision pool its parent pointers will be initialized, but
79 * will not have their headers loaded.
80 *
81 * Applications are discouraged from using this API. Callers usually need
82 * more than one commit. Use {@link RevWalk#parseCommit(AnyObjectId)} to
83 * obtain a RevCommit from an existing repository.
84 *
85 * @param raw
86 * the canonical formatted commit to be parsed.
87 * @return the parsed commit, in an isolated revision pool that is not
88 * available to the caller.
89 */
90 public static RevCommit parse(byte[] raw) {
91 try {
92 return parse(new RevWalk((ObjectReader) null), raw);
93 } catch (IOException ex) {
94 throw new RuntimeException(ex);
95 }
96 }
97
98 /**
99 * Parse a commit from its canonical format.
100 * <p>
101 * This method inserts the commit directly into the caller supplied revision
102 * pool, making it appear as though the commit exists in the repository,
103 * even if it doesn't. The repository under the pool is not affected.
104 * <p>
105 * The body of the commit (message, author, committer) is always retained in
106 * the returned {@code RevCommit}, even if the supplied {@code RevWalk} has
107 * been configured with {@code setRetainBody(false)}.
108 *
109 * @param rw
110 * the revision pool to allocate the commit within. The commit's
111 * tree and parent pointers will be obtained from this pool.
112 * @param raw
113 * the canonical formatted commit to be parsed. This buffer will
114 * be retained by the returned {@code RevCommit} and must not be
115 * modified by the caller.
116 * @return the parsed commit, in an isolated revision pool that is not
117 * available to the caller.
118 * @throws IOException
119 * in case of RevWalk initialization fails
120 */
121 public static RevCommit parse(RevWalk rw, byte[] raw) throws IOException {
122 try (ObjectInserter.Formatter fmt = new ObjectInserter.Formatter()) {
123 RevCommit r = rw.lookupCommit(fmt.idFor(Constants.OBJ_COMMIT, raw));
124 r.parseCanonical(rw, raw);
125 r.buffer = raw;
126 return r;
127 }
128 }
129
130 static final RevCommit[] NO_PARENTS = {};
131
132 private RevTree tree;
133
134 RevCommit[] parents;
135
136 int commitTime; // An int here for performance, overflows in 2038
137
138 int inDegree;
139
140 private byte[] buffer;
141
142 /**
143 * Create a new commit reference.
144 *
145 * @param id
146 * object name for the commit.
147 */
148 protected RevCommit(final AnyObjectId id) {
149 super(id);
150 }
151
152 @Override
153 void parseHeaders(final RevWalk walk) throws MissingObjectException,
154 IncorrectObjectTypeException, IOException {
155 parseCanonical(walk, walk.getCachedBytes(this));
156 }
157
158 @Override
159 void parseBody(final RevWalk walk) throws MissingObjectException,
160 IncorrectObjectTypeException, IOException {
161 if (buffer == null) {
162 buffer = walk.getCachedBytes(this);
163 if ((flags & PARSED) == 0)
164 parseCanonical(walk, buffer);
165 }
166 }
167
168 void parseCanonical(final RevWalk walk, final byte[] raw)
169 throws IOException {
170 if (!walk.shallowCommitsInitialized)
171 walk.initializeShallowCommits();
172
173 final MutableObjectId idBuffer = walk.idBuffer;
174 idBuffer.fromString(raw, 5);
175 tree = walk.lookupTree(idBuffer);
176
177 int ptr = 46;
178 if (parents == null) {
179 RevCommit[] pList = new RevCommit[1];
180 int nParents = 0;
181 for (;;) {
182 if (raw[ptr] != 'p')
183 break;
184 idBuffer.fromString(raw, ptr + 7);
185 final RevCommit p = walk.lookupCommit(idBuffer);
186 if (nParents == 0)
187 pList[nParents++] = p;
188 else if (nParents == 1) {
189 pList = new RevCommit[] { pList[0], p };
190 nParents = 2;
191 } else {
192 if (pList.length <= nParents) {
193 RevCommit[] old = pList;
194 pList = new RevCommit[pList.length + 32];
195 System.arraycopy(old, 0, pList, 0, nParents);
196 }
197 pList[nParents++] = p;
198 }
199 ptr += 48;
200 }
201 if (nParents != pList.length) {
202 RevCommit[] old = pList;
203 pList = new RevCommit[nParents];
204 System.arraycopy(old, 0, pList, 0, nParents);
205 }
206 parents = pList;
207 }
208
209 // extract time from "committer "
210 ptr = RawParseUtils.committer(raw, ptr);
211 if (ptr > 0) {
212 ptr = RawParseUtils.nextLF(raw, ptr, '>');
213
214 // In 2038 commitTime will overflow unless it is changed to long.
215 commitTime = RawParseUtils.parseBase10(raw, ptr, null);
216 }
217
218 if (walk.isRetainBody())
219 buffer = raw;
220 flags |= PARSED;
221 }
222
223 @Override
224 public final int getType() {
225 return Constants.OBJ_COMMIT;
226 }
227
228 static void carryFlags(RevCommit c, int carry) {
229 FIFORevQueue q = carryFlags1(c, carry, 0);
230 if (q != null)
231 slowCarryFlags(q, carry);
232 }
233
234 private static FIFORevQueue carryFlags1(RevCommit c, int carry, int depth) {
235 for(;;) {
236 RevCommit[] pList = c.parents;
237 if (pList == null || pList.length == 0)
238 return null;
239 if (pList.length != 1) {
240 if (depth == STACK_DEPTH)
241 return defer(c);
242 for (int i = 1; i < pList.length; i++) {
243 RevCommit p = pList[i];
244 if ((p.flags & carry) == carry)
245 continue;
246 p.flags |= carry;
247 FIFORevQueue q = carryFlags1(p, carry, depth + 1);
248 if (q != null)
249 return defer(q, carry, pList, i + 1);
250 }
251 }
252
253 c = pList[0];
254 if ((c.flags & carry) == carry)
255 return null;
256 c.flags |= carry;
257 }
258 }
259
260 private static FIFORevQueue defer(RevCommit c) {
261 FIFORevQueue q = new FIFORevQueue();
262 q.add(c);
263 return q;
264 }
265
266 private static FIFORevQueue defer(FIFORevQueue q, int carry,
267 RevCommit[] pList, int i) {
268 // In normal case the caller will run pList[0] in a tail recursive
269 // fashion by updating the variable. However the caller is unwinding
270 // the stack and will skip that pList[0] execution step.
271 carryOneStep(q, carry, pList[0]);
272
273 // Remaining parents (if any) need to have flags checked and be
274 // enqueued if they have ancestors.
275 for (; i < pList.length; i++)
276 carryOneStep(q, carry, pList[i]);
277 return q;
278 }
279
280 private static void slowCarryFlags(FIFORevQueue q, int carry) {
281 // Commits in q have non-null parent arrays and have set all
282 // flags in carry. This loop finishes copying over the graph.
283 for (RevCommit c; (c = q.next()) != null;) {
284 for (RevCommit p : c.parents)
285 carryOneStep(q, carry, p);
286 }
287 }
288
289 private static void carryOneStep(FIFORevQueue q, int carry, RevCommit c) {
290 if ((c.flags & carry) != carry) {
291 c.flags |= carry;
292 if (c.parents != null)
293 q.add(c);
294 }
295 }
296
297 /**
298 * Carry a RevFlag set on this commit to its parents.
299 * <p>
300 * If this commit is parsed, has parents, and has the supplied flag set on
301 * it we automatically add it to the parents, grand-parents, and so on until
302 * an unparsed commit or a commit with no parents is discovered. This
303 * permits applications to force a flag through the history chain when
304 * necessary.
305 *
306 * @param flag
307 * the single flag value to carry back onto parents.
308 */
309 public void carry(final RevFlag flag) {
310 final int carry = flags & flag.mask;
311 if (carry != 0)
312 carryFlags(this, carry);
313 }
314
315 /**
316 * Time from the "committer " line of the buffer.
317 *
318 * @return time, expressed as seconds since the epoch.
319 */
320 public final int getCommitTime() {
321 return commitTime;
322 }
323
324 /**
325 * Get a reference to this commit's tree.
326 *
327 * @return tree of this commit.
328 */
329 public final RevTree getTree() {
330 return tree;
331 }
332
333 /**
334 * Get the number of parent commits listed in this commit.
335 *
336 * @return number of parents; always a positive value but can be 0.
337 */
338 public final int getParentCount() {
339 return parents.length;
340 }
341
342 /**
343 * Get the nth parent from this commit's parent list.
344 *
345 * @param nth
346 * parent index to obtain. Must be in the range 0 through
347 * {@link #getParentCount()}-1.
348 * @return the specified parent.
349 * @throws ArrayIndexOutOfBoundsException
350 * an invalid parent index was specified.
351 */
352 public final RevCommit getParent(final int nth) {
353 return parents[nth];
354 }
355
356 /**
357 * Obtain an array of all parents (<b>NOTE - THIS IS NOT A COPY</b>).
358 * <p>
359 * This method is exposed only to provide very fast, efficient access to
360 * this commit's parent list. Applications relying on this list should be
361 * very careful to ensure they do not modify its contents during their use
362 * of it.
363 *
364 * @return the array of parents.
365 */
366 public final RevCommit[] getParents() {
367 return parents;
368 }
369
370 /**
371 * Obtain the raw unparsed commit body (<b>NOTE - THIS IS NOT A COPY</b>).
372 * <p>
373 * This method is exposed only to provide very fast, efficient access to
374 * this commit's message buffer within a RevFilter. Applications relying on
375 * this buffer should be very careful to ensure they do not modify its
376 * contents during their use of it.
377 *
378 * @return the raw unparsed commit body. This is <b>NOT A COPY</b>.
379 * Altering the contents of this buffer may alter the walker's
380 * knowledge of this commit, and the results it produces.
381 */
382 public final byte[] getRawBuffer() {
383 return buffer;
384 }
385
386 /**
387 * Parse the author identity from the raw buffer.
388 * <p>
389 * This method parses and returns the content of the author line, after
390 * taking the commit's character set into account and decoding the author
391 * name and email address. This method is fairly expensive and produces a
392 * new PersonIdent instance on each invocation. Callers should invoke this
393 * method only if they are certain they will be outputting the result, and
394 * should cache the return value for as long as necessary to use all
395 * information from it.
396 * <p>
397 * RevFilter implementations should try to use {@link RawParseUtils} to scan
398 * the {@link #getRawBuffer()} instead, as this will allow faster evaluation
399 * of commits.
400 *
401 * @return identity of the author (name, email) and the time the commit was
402 * made by the author; null if no author line was found.
403 */
404 public final PersonIdent getAuthorIdent() {
405 final byte[] raw = buffer;
406 final int nameB = RawParseUtils.author(raw, 0);
407 if (nameB < 0)
408 return null;
409 return RawParseUtils.parsePersonIdent(raw, nameB);
410 }
411
412 /**
413 * Parse the committer identity from the raw buffer.
414 * <p>
415 * This method parses and returns the content of the committer line, after
416 * taking the commit's character set into account and decoding the committer
417 * name and email address. This method is fairly expensive and produces a
418 * new PersonIdent instance on each invocation. Callers should invoke this
419 * method only if they are certain they will be outputting the result, and
420 * should cache the return value for as long as necessary to use all
421 * information from it.
422 * <p>
423 * RevFilter implementations should try to use {@link RawParseUtils} to scan
424 * the {@link #getRawBuffer()} instead, as this will allow faster evaluation
425 * of commits.
426 *
427 * @return identity of the committer (name, email) and the time the commit
428 * was made by the committer; null if no committer line was found.
429 */
430 public final PersonIdent getCommitterIdent() {
431 final byte[] raw = buffer;
432 final int nameB = RawParseUtils.committer(raw, 0);
433 if (nameB < 0)
434 return null;
435 return RawParseUtils.parsePersonIdent(raw, nameB);
436 }
437
438 /**
439 * Parse the complete commit message and decode it to a string.
440 * <p>
441 * This method parses and returns the message portion of the commit buffer,
442 * after taking the commit's character set into account and decoding the
443 * buffer using that character set. This method is a fairly expensive
444 * operation and produces a new string on each invocation.
445 *
446 * @return decoded commit message as a string. Never null.
447 */
448 public final String getFullMessage() {
449 byte[] raw = buffer;
450 int msgB = RawParseUtils.commitMessage(raw, 0);
451 if (msgB < 0) {
452 return ""; //$NON-NLS-1$
453 }
454 return RawParseUtils.decode(guessEncoding(), raw, msgB, raw.length);
455 }
456
457 /**
458 * Parse the commit message and return the first "line" of it.
459 * <p>
460 * The first line is everything up to the first pair of LFs. This is the
461 * "oneline" format, suitable for output in a single line display.
462 * <p>
463 * This method parses and returns the message portion of the commit buffer,
464 * after taking the commit's character set into account and decoding the
465 * buffer using that character set. This method is a fairly expensive
466 * operation and produces a new string on each invocation.
467 *
468 * @return decoded commit message as a string. Never null. The returned
469 * string does not contain any LFs, even if the first paragraph
470 * spanned multiple lines. Embedded LFs are converted to spaces.
471 */
472 public final String getShortMessage() {
473 byte[] raw = buffer;
474 int msgB = RawParseUtils.commitMessage(raw, 0);
475 if (msgB < 0) {
476 return ""; //$NON-NLS-1$
477 }
478
479 int msgE = RawParseUtils.endOfParagraph(raw, msgB);
480 String str = RawParseUtils.decode(guessEncoding(), raw, msgB, msgE);
481 if (hasLF(raw, msgB, msgE)) {
482 str = StringUtils.replaceLineBreaksWithSpace(str);
483 }
484 return str;
485 }
486
487 static boolean hasLF(final byte[] r, int b, final int e) {
488 while (b < e)
489 if (r[b++] == '\n')
490 return true;
491 return false;
492 }
493
494 /**
495 * Determine the encoding of the commit message buffer.
496 * <p>
497 * Locates the "encoding" header (if present) and returns its value. Due to
498 * corruption in the wild this may be an invalid encoding name that is not
499 * recognized by any character encoding library.
500 * <p>
501 * If no encoding header is present, null.
502 *
503 * @return the preferred encoding of {@link #getRawBuffer()}; or null.
504 * @since 4.2
505 */
506 @Nullable
507 public final String getEncodingName() {
508 return RawParseUtils.parseEncodingName(buffer);
509 }
510
511 /**
512 * Determine the encoding of the commit message buffer.
513 * <p>
514 * Locates the "encoding" header (if present) and then returns the proper
515 * character set to apply to this buffer to evaluate its contents as
516 * character data.
517 * <p>
518 * If no encoding header is present {@code UTF-8} is assumed.
519 *
520 * @return the preferred encoding of {@link #getRawBuffer()}.
521 * @throws IllegalCharsetNameException
522 * if the character set requested by the encoding header is
523 * malformed and unsupportable.
524 * @throws UnsupportedCharsetException
525 * if the JRE does not support the character set requested by
526 * the encoding header.
527 */
528 public final Charset getEncoding() {
529 return RawParseUtils.parseEncoding(buffer);
530 }
531
532 private Charset guessEncoding() {
533 try {
534 return getEncoding();
535 } catch (IllegalCharsetNameException | UnsupportedCharsetException e) {
536 return UTF_8;
537 }
538 }
539
540 /**
541 * Parse the footer lines (e.g. "Signed-off-by") for machine processing.
542 * <p>
543 * This method splits all of the footer lines out of the last paragraph of
544 * the commit message, providing each line as a key-value pair, ordered by
545 * the order of the line's appearance in the commit message itself.
546 * <p>
547 * A footer line's key must match the pattern {@code ^[A-Za-z0-9-]+:}, while
548 * the value is free-form, but must not contain an LF. Very common keys seen
549 * in the wild are:
550 * <ul>
551 * <li>{@code Signed-off-by} (agrees to Developer Certificate of Origin)
552 * <li>{@code Acked-by} (thinks change looks sane in context)
553 * <li>{@code Reported-by} (originally found the issue this change fixes)
554 * <li>{@code Tested-by} (validated change fixes the issue for them)
555 * <li>{@code CC}, {@code Cc} (copy on all email related to this change)
556 * <li>{@code Bug} (link to project's bug tracking system)
557 * </ul>
558 *
559 * @return ordered list of footer lines; empty list if no footers found.
560 */
561 public final List<FooterLine> getFooterLines() {
562 final byte[] raw = buffer;
563 int ptr = raw.length - 1;
564 while (raw[ptr] == '\n') // trim any trailing LFs, not interesting
565 ptr--;
566
567 final int msgB = RawParseUtils.commitMessage(raw, 0);
568 final ArrayList<FooterLine> r = new ArrayList<>(4);
569 final Charset enc = guessEncoding();
570 for (;;) {
571 ptr = RawParseUtils.prevLF(raw, ptr);
572 if (ptr <= msgB)
573 break; // Don't parse commit headers as footer lines.
574
575 final int keyStart = ptr + 2;
576 if (raw[keyStart] == '\n')
577 break; // Stop at first paragraph break, no footers above it.
578
579 final int keyEnd = RawParseUtils.endOfFooterLineKey(raw, keyStart);
580 if (keyEnd < 0)
581 continue; // Not a well formed footer line, skip it.
582
583 // Skip over the ': *' at the end of the key before the value.
584 //
585 int valStart = keyEnd + 1;
586 while (valStart < raw.length && raw[valStart] == ' ')
587 valStart++;
588
589 // Value ends at the LF, and does not include it.
590 //
591 int valEnd = RawParseUtils.nextLF(raw, valStart);
592 if (raw[valEnd - 1] == '\n')
593 valEnd--;
594
595 r.add(new FooterLine(raw, enc, keyStart, keyEnd, valStart, valEnd));
596 }
597 Collections.reverse(r);
598 return r;
599 }
600
601 /**
602 * Get the values of all footer lines with the given key.
603 *
604 * @param keyName
605 * footer key to find values of, case insensitive.
606 * @return values of footers with key of {@code keyName}, ordered by their
607 * order of appearance. Duplicates may be returned if the same
608 * footer appeared more than once. Empty list if no footers appear
609 * with the specified key, or there are no footers at all.
610 * @see #getFooterLines()
611 */
612 public final List<String> getFooterLines(final String keyName) {
613 return getFooterLines(new FooterKey(keyName));
614 }
615
616 /**
617 * Get the values of all footer lines with the given key.
618 *
619 * @param keyName
620 * footer key to find values of, case insensitive.
621 * @return values of footers with key of {@code keyName}, ordered by their
622 * order of appearance. Duplicates may be returned if the same
623 * footer appeared more than once. Empty list if no footers appear
624 * with the specified key, or there are no footers at all.
625 * @see #getFooterLines()
626 */
627 public final List<String> getFooterLines(final FooterKey keyName) {
628 final List<FooterLine> src = getFooterLines();
629 if (src.isEmpty())
630 return Collections.emptyList();
631 final ArrayList<String> r = new ArrayList<>(src.size());
632 for (final FooterLine f : src) {
633 if (f.matches(keyName))
634 r.add(f.getValue());
635 }
636 return r;
637 }
638
639 /**
640 * Reset this commit to allow another RevWalk with the same instances.
641 * <p>
642 * Subclasses <b>must</b> call <code>super.reset()</code> to ensure the
643 * basic information can be correctly cleared out.
644 */
645 public void reset() {
646 inDegree = 0;
647 }
648
649 /**
650 * Discard the message buffer to reduce memory usage.
651 * <p>
652 * After discarding the memory usage of the {@code RevCommit} is reduced to
653 * only the {@link #getTree()} and {@link #getParents()} pointers and the
654 * time in {@link #getCommitTime()}. Accessing other properties such as
655 * {@link #getAuthorIdent()}, {@link #getCommitterIdent()} or either message
656 * function requires reloading the buffer by invoking
657 * {@link RevWalk#parseBody(RevObject)}.
658 *
659 * @since 4.0
660 */
661 public final void disposeBody() {
662 buffer = null;
663 }
664
665 @Override
666 public String toString() {
667 final StringBuilder s = new StringBuilder();
668 s.append(Constants.typeString(getType()));
669 s.append(' ');
670 s.append(name());
671 s.append(' ');
672 s.append(commitTime);
673 s.append(' ');
674 appendCoreFlags(s);
675 return s.toString();
676 }
677 }