1 /*
2 * Copyright (C) 2008-2009, Google Inc.
3 * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org>
4 * and other copyright owners as documented in the project's IP log.
5 *
6 * This program and the accompanying materials are made available
7 * under the terms of the Eclipse Distribution License v1.0 which
8 * accompanies this distribution, is reproduced below, and is
9 * available at http://www.eclipse.org/org/documents/edl-v10.php
10 *
11 * All rights reserved.
12 *
13 * Redistribution and use in source and binary forms, with or
14 * without modification, are permitted provided that the following
15 * conditions are met:
16 *
17 * - Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 *
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials provided
23 * with the distribution.
24 *
25 * - Neither the name of the Eclipse Foundation, Inc. nor the
26 * names of its contributors may be used to endorse or promote
27 * products derived from this software without specific prior
28 * written permission.
29 *
30 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
31 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
32 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
33 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
35 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
36 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
37 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
38 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
39 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
40 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
41 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
42 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 */
44
45 package org.eclipse.jgit.revwalk;
46
47 import java.io.IOException;
48 import java.nio.charset.Charset;
49 import java.util.ArrayList;
50 import java.util.Collections;
51 import java.util.List;
52
53 import org.eclipse.jgit.errors.IncorrectObjectTypeException;
54 import org.eclipse.jgit.errors.MissingObjectException;
55 import org.eclipse.jgit.lib.AnyObjectId;
56 import org.eclipse.jgit.lib.Constants;
57 import org.eclipse.jgit.lib.MutableObjectId;
58 import org.eclipse.jgit.lib.ObjectInserter;
59 import org.eclipse.jgit.lib.ObjectReader;
60 import org.eclipse.jgit.lib.PersonIdent;
61 import org.eclipse.jgit.util.RawParseUtils;
62 import org.eclipse.jgit.util.StringUtils;
63
64 /** A commit reference to a commit in the DAG. */
65 public class RevCommit extends RevObject {
66 private static final int STACK_DEPTH = 500;
67
68 /**
69 * Parse a commit from its canonical format.
70 *
71 * This method constructs a temporary revision pool, parses the commit as
72 * supplied, and returns it to the caller. Since the commit was built inside
73 * of a private revision pool its parent pointers will be initialized, but
74 * will not have their headers loaded.
75 *
76 * Applications are discouraged from using this API. Callers usually need
77 * more than one commit. Use {@link RevWalk#parseCommit(AnyObjectId)} to
78 * obtain a RevCommit from an existing repository.
79 *
80 * @param raw
81 * the canonical formatted commit to be parsed.
82 * @return the parsed commit, in an isolated revision pool that is not
83 * available to the caller.
84 */
85 public static RevCommit parse(byte[] raw) {
86 try {
87 return parse(new RevWalk((ObjectReader) null), raw);
88 } catch (IOException ex) {
89 throw new RuntimeException(ex);
90 }
91 }
92
93 /**
94 * Parse a commit from its canonical format.
95 * <p>
96 * This method inserts the commit directly into the caller supplied revision
97 * pool, making it appear as though the commit exists in the repository,
98 * even if it doesn't. The repository under the pool is not affected.
99 * <p>
100 * The body of the commit (message, author, committer) is always retained in
101 * the returned {@code RevCommit}, even if the supplied {@code RevWalk} has
102 * been configured with {@code setRetainBody(false)}.
103 *
104 * @param rw
105 * the revision pool to allocate the commit within. The commit's
106 * tree and parent pointers will be obtained from this pool.
107 * @param raw
108 * the canonical formatted commit to be parsed. This buffer will
109 * be retained by the returned {@code RevCommit} and must not be
110 * modified by the caller.
111 * @return the parsed commit, in an isolated revision pool that is not
112 * available to the caller.
113 * @throws IOException
114 * in case of RevWalk initialization fails
115 */
116 public static RevCommit parse(RevWalk rw, byte[] raw) throws IOException {
117 try (ObjectInserter.Formatter fmt = new ObjectInserter.Formatter()) {
118 RevCommit r = rw.lookupCommit(fmt.idFor(Constants.OBJ_COMMIT, raw));
119 r.parseCanonical(rw, raw);
120 r.buffer = raw;
121 return r;
122 }
123 }
124
125 static final RevCommit[] NO_PARENTS = {};
126
127 private RevTree tree;
128
129 RevCommit[] parents;
130
131 int commitTime; // An int here for performance, overflows in 2038
132
133 int inDegree;
134
135 private byte[] buffer;
136
137 /**
138 * Create a new commit reference.
139 *
140 * @param id
141 * object name for the commit.
142 */
143 protected RevCommit(final AnyObjectId id) {
144 super(id);
145 }
146
147 @Override
148 void parseHeaders(final RevWalk walk) throws MissingObjectException,
149 IncorrectObjectTypeException, IOException {
150 parseCanonical(walk, walk.getCachedBytes(this));
151 }
152
153 @Override
154 void parseBody(final RevWalk walk) throws MissingObjectException,
155 IncorrectObjectTypeException, IOException {
156 if (buffer == null) {
157 buffer = walk.getCachedBytes(this);
158 if ((flags & PARSED) == 0)
159 parseCanonical(walk, buffer);
160 }
161 }
162
163 void parseCanonical(final RevWalk walk, final byte[] raw)
164 throws IOException {
165 if (!walk.shallowCommitsInitialized)
166 walk.initializeShallowCommits();
167
168 final MutableObjectId idBuffer = walk.idBuffer;
169 idBuffer.fromString(raw, 5);
170 tree = walk.lookupTree(idBuffer);
171
172 int ptr = 46;
173 if (parents == null) {
174 RevCommit[] pList = new RevCommit[1];
175 int nParents = 0;
176 for (;;) {
177 if (raw[ptr] != 'p')
178 break;
179 idBuffer.fromString(raw, ptr + 7);
180 final RevCommit p = walk.lookupCommit(idBuffer);
181 if (nParents == 0)
182 pList[nParents++] = p;
183 else if (nParents == 1) {
184 pList = new RevCommit[] { pList[0], p };
185 nParents = 2;
186 } else {
187 if (pList.length <= nParents) {
188 RevCommit[] old = pList;
189 pList = new RevCommit[pList.length + 32];
190 System.arraycopy(old, 0, pList, 0, nParents);
191 }
192 pList[nParents++] = p;
193 }
194 ptr += 48;
195 }
196 if (nParents != pList.length) {
197 RevCommit[] old = pList;
198 pList = new RevCommit[nParents];
199 System.arraycopy(old, 0, pList, 0, nParents);
200 }
201 parents = pList;
202 }
203
204 // extract time from "committer "
205 ptr = RawParseUtils.committer(raw, ptr);
206 if (ptr > 0) {
207 ptr = RawParseUtils.nextLF(raw, ptr, '>');
208
209 // In 2038 commitTime will overflow unless it is changed to long.
210 commitTime = RawParseUtils.parseBase10(raw, ptr, null);
211 }
212
213 if (walk.isRetainBody())
214 buffer = raw;
215 flags |= PARSED;
216 }
217
218 @Override
219 public final int getType() {
220 return Constants.OBJ_COMMIT;
221 }
222
223 static void carryFlags(RevCommit c, int carry) {
224 FIFORevQueue q = carryFlags1(c, carry, 0);
225 if (q != null)
226 slowCarryFlags(q, carry);
227 }
228
229 private static FIFORevQueue carryFlags1(RevCommit c, int carry, int depth) {
230 for(;;) {
231 RevCommit[] pList = c.parents;
232 if (pList == null || pList.length == 0)
233 return null;
234 if (pList.length != 1) {
235 if (depth == STACK_DEPTH)
236 return defer(c);
237 for (int i = 1; i < pList.length; i++) {
238 RevCommit p = pList[i];
239 if ((p.flags & carry) == carry)
240 continue;
241 p.flags |= carry;
242 FIFORevQueue q = carryFlags1(c, carry, depth + 1);
243 if (q != null)
244 return defer(q, carry, pList, i + 1);
245 }
246 }
247
248 c = pList[0];
249 if ((c.flags & carry) == carry)
250 return null;
251 c.flags |= carry;
252 }
253 }
254
255 private static FIFORevQueue defer(RevCommit c) {
256 FIFORevQueue q = new FIFORevQueue();
257 q.add(c);
258 return q;
259 }
260
261 private static FIFORevQueue defer(FIFORevQueue q, int carry,
262 RevCommit[] pList, int i) {
263 // In normal case the caller will run pList[0] in a tail recursive
264 // fashion by updating the variable. However the caller is unwinding
265 // the stack and will skip that pList[0] execution step.
266 carryOneStep(q, carry, pList[0]);
267
268 // Remaining parents (if any) need to have flags checked and be
269 // enqueued if they have ancestors.
270 for (; i < pList.length; i++)
271 carryOneStep(q, carry, pList[i]);
272 return q;
273 }
274
275 private static void slowCarryFlags(FIFORevQueue q, int carry) {
276 // Commits in q have non-null parent arrays and have set all
277 // flags in carry. This loop finishes copying over the graph.
278 for (RevCommit c; (c = q.next()) != null;) {
279 for (RevCommit p : c.parents)
280 carryOneStep(q, carry, p);
281 }
282 }
283
284 private static void carryOneStep(FIFORevQueue q, int carry, RevCommit c) {
285 if ((c.flags & carry) != carry) {
286 c.flags |= carry;
287 if (c.parents != null)
288 q.add(c);
289 }
290 }
291
292 /**
293 * Carry a RevFlag set on this commit to its parents.
294 * <p>
295 * If this commit is parsed, has parents, and has the supplied flag set on
296 * it we automatically add it to the parents, grand-parents, and so on until
297 * an unparsed commit or a commit with no parents is discovered. This
298 * permits applications to force a flag through the history chain when
299 * necessary.
300 *
301 * @param flag
302 * the single flag value to carry back onto parents.
303 */
304 public void carry(final RevFlag flag) {
305 final int carry = flags & flag.mask;
306 if (carry != 0)
307 carryFlags(this, carry);
308 }
309
310 /**
311 * Time from the "committer " line of the buffer.
312 *
313 * @return time, expressed as seconds since the epoch.
314 */
315 public final int getCommitTime() {
316 return commitTime;
317 }
318
319 /**
320 * Get a reference to this commit's tree.
321 *
322 * @return tree of this commit.
323 */
324 public final RevTree getTree() {
325 return tree;
326 }
327
328 /**
329 * Get the number of parent commits listed in this commit.
330 *
331 * @return number of parents; always a positive value but can be 0.
332 */
333 public final int getParentCount() {
334 return parents.length;
335 }
336
337 /**
338 * Get the nth parent from this commit's parent list.
339 *
340 * @param nth
341 * parent index to obtain. Must be in the range 0 through
342 * {@link #getParentCount()}-1.
343 * @return the specified parent.
344 * @throws ArrayIndexOutOfBoundsException
345 * an invalid parent index was specified.
346 */
347 public final RevCommit getParent(final int nth) {
348 return parents[nth];
349 }
350
351 /**
352 * Obtain an array of all parents (<b>NOTE - THIS IS NOT A COPY</b>).
353 * <p>
354 * This method is exposed only to provide very fast, efficient access to
355 * this commit's parent list. Applications relying on this list should be
356 * very careful to ensure they do not modify its contents during their use
357 * of it.
358 *
359 * @return the array of parents.
360 */
361 public final RevCommit[] getParents() {
362 return parents;
363 }
364
365 /**
366 * Obtain the raw unparsed commit body (<b>NOTE - THIS IS NOT A COPY</b>).
367 * <p>
368 * This method is exposed only to provide very fast, efficient access to
369 * this commit's message buffer within a RevFilter. Applications relying on
370 * this buffer should be very careful to ensure they do not modify its
371 * contents during their use of it.
372 *
373 * @return the raw unparsed commit body. This is <b>NOT A COPY</b>.
374 * Altering the contents of this buffer may alter the walker's
375 * knowledge of this commit, and the results it produces.
376 */
377 public final byte[] getRawBuffer() {
378 return buffer;
379 }
380
381 /**
382 * Parse the author identity from the raw buffer.
383 * <p>
384 * This method parses and returns the content of the author line, after
385 * taking the commit's character set into account and decoding the author
386 * name and email address. This method is fairly expensive and produces a
387 * new PersonIdent instance on each invocation. Callers should invoke this
388 * method only if they are certain they will be outputting the result, and
389 * should cache the return value for as long as necessary to use all
390 * information from it.
391 * <p>
392 * RevFilter implementations should try to use {@link RawParseUtils} to scan
393 * the {@link #getRawBuffer()} instead, as this will allow faster evaluation
394 * of commits.
395 *
396 * @return identity of the author (name, email) and the time the commit was
397 * made by the author; null if no author line was found.
398 */
399 public final PersonIdent getAuthorIdent() {
400 final byte[] raw = buffer;
401 final int nameB = RawParseUtils.author(raw, 0);
402 if (nameB < 0)
403 return null;
404 return RawParseUtils.parsePersonIdent(raw, nameB);
405 }
406
407 /**
408 * Parse the committer identity from the raw buffer.
409 * <p>
410 * This method parses and returns the content of the committer line, after
411 * taking the commit's character set into account and decoding the committer
412 * name and email address. This method is fairly expensive and produces a
413 * new PersonIdent instance on each invocation. Callers should invoke this
414 * method only if they are certain they will be outputting the result, and
415 * should cache the return value for as long as necessary to use all
416 * information from it.
417 * <p>
418 * RevFilter implementations should try to use {@link RawParseUtils} to scan
419 * the {@link #getRawBuffer()} instead, as this will allow faster evaluation
420 * of commits.
421 *
422 * @return identity of the committer (name, email) and the time the commit
423 * was made by the committer; null if no committer line was found.
424 */
425 public final PersonIdent getCommitterIdent() {
426 final byte[] raw = buffer;
427 final int nameB = RawParseUtils.committer(raw, 0);
428 if (nameB < 0)
429 return null;
430 return RawParseUtils.parsePersonIdent(raw, nameB);
431 }
432
433 /**
434 * Parse the complete commit message and decode it to a string.
435 * <p>
436 * This method parses and returns the message portion of the commit buffer,
437 * after taking the commit's character set into account and decoding the
438 * buffer using that character set. This method is a fairly expensive
439 * operation and produces a new string on each invocation.
440 *
441 * @return decoded commit message as a string. Never null.
442 */
443 public final String getFullMessage() {
444 final byte[] raw = buffer;
445 final int msgB = RawParseUtils.commitMessage(raw, 0);
446 if (msgB < 0)
447 return ""; //$NON-NLS-1$
448 final Charset enc = RawParseUtils.parseEncoding(raw);
449 return RawParseUtils.decode(enc, raw, msgB, raw.length);
450 }
451
452 /**
453 * Parse the commit message and return the first "line" of it.
454 * <p>
455 * The first line is everything up to the first pair of LFs. This is the
456 * "oneline" format, suitable for output in a single line display.
457 * <p>
458 * This method parses and returns the message portion of the commit buffer,
459 * after taking the commit's character set into account and decoding the
460 * buffer using that character set. This method is a fairly expensive
461 * operation and produces a new string on each invocation.
462 *
463 * @return decoded commit message as a string. Never null. The returned
464 * string does not contain any LFs, even if the first paragraph
465 * spanned multiple lines. Embedded LFs are converted to spaces.
466 */
467 public final String getShortMessage() {
468 final byte[] raw = buffer;
469 final int msgB = RawParseUtils.commitMessage(raw, 0);
470 if (msgB < 0)
471 return ""; //$NON-NLS-1$
472
473 final Charset enc = RawParseUtils.parseEncoding(raw);
474 final int msgE = RawParseUtils.endOfParagraph(raw, msgB);
475 String str = RawParseUtils.decode(enc, raw, msgB, msgE);
476 if (hasLF(raw, msgB, msgE))
477 str = StringUtils.replaceLineBreaksWithSpace(str);
478 return str;
479 }
480
481 static boolean hasLF(final byte[] r, int b, final int e) {
482 while (b < e)
483 if (r[b++] == '\n')
484 return true;
485 return false;
486 }
487
488 /**
489 * Determine the encoding of the commit message buffer.
490 * <p>
491 * Locates the "encoding" header (if present) and then returns the proper
492 * character set to apply to this buffer to evaluate its contents as
493 * character data.
494 * <p>
495 * If no encoding header is present, {@link Constants#CHARSET} is assumed.
496 *
497 * @return the preferred encoding of {@link #getRawBuffer()}.
498 */
499 public final Charset getEncoding() {
500 return RawParseUtils.parseEncoding(buffer);
501 }
502
503 /**
504 * Parse the footer lines (e.g. "Signed-off-by") for machine processing.
505 * <p>
506 * This method splits all of the footer lines out of the last paragraph of
507 * the commit message, providing each line as a key-value pair, ordered by
508 * the order of the line's appearance in the commit message itself.
509 * <p>
510 * A footer line's key must match the pattern {@code ^[A-Za-z0-9-]+:}, while
511 * the value is free-form, but must not contain an LF. Very common keys seen
512 * in the wild are:
513 * <ul>
514 * <li>{@code Signed-off-by} (agrees to Developer Certificate of Origin)
515 * <li>{@code Acked-by} (thinks change looks sane in context)
516 * <li>{@code Reported-by} (originally found the issue this change fixes)
517 * <li>{@code Tested-by} (validated change fixes the issue for them)
518 * <li>{@code CC}, {@code Cc} (copy on all email related to this change)
519 * <li>{@code Bug} (link to project's bug tracking system)
520 * </ul>
521 *
522 * @return ordered list of footer lines; empty list if no footers found.
523 */
524 public final List<FooterLine> getFooterLines() {
525 final byte[] raw = buffer;
526 int ptr = raw.length - 1;
527 while (raw[ptr] == '\n') // trim any trailing LFs, not interesting
528 ptr--;
529
530 final int msgB = RawParseUtils.commitMessage(raw, 0);
531 final ArrayList<FooterLine> r = new ArrayList<FooterLine>(4);
532 final Charset enc = getEncoding();
533 for (;;) {
534 ptr = RawParseUtils.prevLF(raw, ptr);
535 if (ptr <= msgB)
536 break; // Don't parse commit headers as footer lines.
537
538 final int keyStart = ptr + 2;
539 if (raw[keyStart] == '\n')
540 break; // Stop at first paragraph break, no footers above it.
541
542 final int keyEnd = RawParseUtils.endOfFooterLineKey(raw, keyStart);
543 if (keyEnd < 0)
544 continue; // Not a well formed footer line, skip it.
545
546 // Skip over the ': *' at the end of the key before the value.
547 //
548 int valStart = keyEnd + 1;
549 while (valStart < raw.length && raw[valStart] == ' ')
550 valStart++;
551
552 // Value ends at the LF, and does not include it.
553 //
554 int valEnd = RawParseUtils.nextLF(raw, valStart);
555 if (raw[valEnd - 1] == '\n')
556 valEnd--;
557
558 r.add(new FooterLine(raw, enc, keyStart, keyEnd, valStart, valEnd));
559 }
560 Collections.reverse(r);
561 return r;
562 }
563
564 /**
565 * Get the values of all footer lines with the given key.
566 *
567 * @param keyName
568 * footer key to find values of, case insensitive.
569 * @return values of footers with key of {@code keyName}, ordered by their
570 * order of appearance. Duplicates may be returned if the same
571 * footer appeared more than once. Empty list if no footers appear
572 * with the specified key, or there are no footers at all.
573 * @see #getFooterLines()
574 */
575 public final List<String> getFooterLines(final String keyName) {
576 return getFooterLines(new FooterKey(keyName));
577 }
578
579 /**
580 * Get the values of all footer lines with the given key.
581 *
582 * @param keyName
583 * footer key to find values of, case insensitive.
584 * @return values of footers with key of {@code keyName}, ordered by their
585 * order of appearance. Duplicates may be returned if the same
586 * footer appeared more than once. Empty list if no footers appear
587 * with the specified key, or there are no footers at all.
588 * @see #getFooterLines()
589 */
590 public final List<String> getFooterLines(final FooterKey keyName) {
591 final List<FooterLine> src = getFooterLines();
592 if (src.isEmpty())
593 return Collections.emptyList();
594 final ArrayList<String> r = new ArrayList<String>(src.size());
595 for (final FooterLine f : src) {
596 if (f.matches(keyName))
597 r.add(f.getValue());
598 }
599 return r;
600 }
601
602 /**
603 * Reset this commit to allow another RevWalk with the same instances.
604 * <p>
605 * Subclasses <b>must</b> call <code>super.reset()</code> to ensure the
606 * basic information can be correctly cleared out.
607 */
608 public void reset() {
609 inDegree = 0;
610 }
611
612 /**
613 * Discard the message buffer to reduce memory usage.
614 * <p>
615 * After discarding the memory usage of the {@code RevCommit} is reduced to
616 * only the {@link #getTree()} and {@link #getParents()} pointers and the
617 * time in {@link #getCommitTime()}. Accessing other properties such as
618 * {@link #getAuthorIdent()}, {@link #getCommitterIdent()} or either message
619 * function requires reloading the buffer by invoking
620 * {@link RevWalk#parseBody(RevObject)}.
621 *
622 * @since 4.0
623 */
624 public final void disposeBody() {
625 buffer = null;
626 }
627
628 @Override
629 public String toString() {
630 final StringBuilder s = new StringBuilder();
631 s.append(Constants.typeString(getType()));
632 s.append(' ');
633 s.append(name());
634 s.append(' ');
635 s.append(commitTime);
636 s.append(' ');
637 appendCoreFlags(s);
638 return s.toString();
639 }
640 }