1   /*
2    * Copyright (C) 2016, Google Inc.
3    * and other copyright owners as documented in the project's IP log.
4    *
5    * This program and the accompanying materials are made available
6    * under the terms of the Eclipse Distribution License v1.0 which
7    * accompanies this distribution, is reproduced below, and is
8    * available at http://www.eclipse.org/org/documents/edl-v10.php
9    *
10   * All rights reserved.
11   *
12   * Redistribution and use in source and binary forms, with or
13   * without modification, are permitted provided that the following
14   * conditions are met:
15   *
16   * - Redistributions of source code must retain the above copyright
17   *   notice, this list of conditions and the following disclaimer.
18   *
19   * - Redistributions in binary form must reproduce the above
20   *   copyright notice, this list of conditions and the following
21   *   disclaimer in the documentation and/or other materials provided
22   *   with the distribution.
23   *
24   * - Neither the name of the Eclipse Foundation, Inc. nor the
25   *   names of its contributors may be used to endorse or promote
26   *   products derived from this software without specific prior
27   *   written permission.
28   *
29   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
30   * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
31   * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
32   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
34   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
36   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
37   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
38   * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
39   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
41   * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42   */
43  
44  package org.eclipse.jgit.internal.ketch;
45  
46  import static org.eclipse.jgit.lib.FileMode.TYPE_GITLINK;
47  
48  import java.io.IOException;
49  import java.util.ArrayList;
50  import java.util.HashSet;
51  import java.util.List;
52  import java.util.Set;
53  
54  import org.eclipse.jgit.annotations.Nullable;
55  import org.eclipse.jgit.lib.AnyObjectId;
56  import org.eclipse.jgit.lib.CommitBuilder;
57  import org.eclipse.jgit.lib.ObjectId;
58  import org.eclipse.jgit.lib.ObjectInserter;
59  import org.eclipse.jgit.lib.PersonIdent;
60  import org.eclipse.jgit.lib.Repository;
61  import org.eclipse.jgit.revwalk.RevCommit;
62  import org.eclipse.jgit.revwalk.RevObject;
63  import org.eclipse.jgit.revwalk.RevWalk;
64  import org.eclipse.jgit.transport.ReceiveCommand;
65  import org.eclipse.jgit.treewalk.EmptyTreeIterator;
66  import org.eclipse.jgit.treewalk.TreeWalk;
67  import org.eclipse.jgit.treewalk.filter.TreeFilter;
68  
69  /** Constructs a set of commands to stage content during a proposal. */
70  public class StageBuilder {
71  	/**
72  	 * Acceptable number of references to send in a single stage transaction.
73  	 * <p>
74  	 * If the number of unique objects exceeds this amount the builder will
75  	 * attempt to decrease the reference count by chaining commits..
76  	 */
77  	private static final int SMALL_BATCH_SIZE = 5;
78  
79  	/**
80  	 * Acceptable number of commits to chain together using parent pointers.
81  	 * <p>
82  	 * When staging many unique commits the {@link StageBuilder} batches
83  	 * together unrelated commits as parents of a temporary commit. After the
84  	 * proposal completes the temporary commit is discarded and can be garbage
85  	 * collected by all replicas.
86  	 */
87  	private static final int TEMP_PARENT_BATCH_SIZE = 128;
88  
89  	private static final byte[] PEEL = { ' ', '^' };
90  
91  	private final String txnStage;
92  	private final String txnId;
93  
94  	/**
95  	 * Construct a stage builder for a transaction.
96  	 *
97  	 * @param txnStageNamespace
98  	 *            namespace for transaction references to build
99  	 *            {@code "txnStageNamespace/txnId.n"} style names.
100 	 * @param txnId
101 	 *            identifier used to name temporary staging refs.
102 	 */
103 	public StageBuilder(String txnStageNamespace, ObjectId txnId) {
104 		this.txnStage = txnStageNamespace;
105 		this.txnId = txnId.name();
106 	}
107 
108 	/**
109 	 * Compare two RefTrees and return commands to stage new objects.
110 	 * <p>
111 	 * This method ignores the lineage between the two RefTrees and does a
112 	 * straight diff on the two trees. New objects will be staged. The diff
113 	 * strategy is useful to catch-up a lagging replica, without sending every
114 	 * intermediate step. This may mean the replica does not have the same
115 	 * object set as other replicas if there are rewinds or branch deletes.
116 	 *
117 	 * @param git
118 	 *            source repository to read {@code oldTree} and {@code newTree}
119 	 *            from.
120 	 * @param oldTree
121 	 *            accepted RefTree on the replica ({@code refs/txn/accepted}).
122 	 *            Use {@link ObjectId#zeroId()} if the remote does not have any
123 	 *            ref tree, e.g. a new replica catching up.
124 	 * @param newTree
125 	 *            RefTree being sent to the replica. The trees will be compared.
126 	 * @return list of commands to create {@code "refs/txn/stage/..."}
127 	 *         references on replicas anchoring new objects into the repository
128 	 *         while a transaction gains consensus.
129 	 * @throws IOException
130 	 *             {@code git} cannot be accessed to compare {@code oldTree} and
131 	 *             {@code newTree} to build the object set.
132 	 */
133 	public List<ReceiveCommand> makeStageList(Repository git, ObjectId oldTree,
134 			ObjectId newTree) throws IOException {
135 		try (RevWalk rw = new RevWalk(git);
136 				TreeWalk tw = new TreeWalk(rw.getObjectReader());
137 				ObjectInserter ins = git.newObjectInserter()) {
138 			if (AnyObjectId.equals(oldTree, ObjectId.zeroId())) {
139 				tw.addTree(new EmptyTreeIterator());
140 			} else {
141 				tw.addTree(rw.parseTree(oldTree));
142 			}
143 			tw.addTree(rw.parseTree(newTree));
144 			tw.setFilter(TreeFilter.ANY_DIFF);
145 			tw.setRecursive(true);
146 
147 			Set<ObjectId> newObjs = new HashSet<>();
148 			while (tw.next()) {
149 				if (tw.getRawMode(1) == TYPE_GITLINK
150 						&& !tw.isPathSuffix(PEEL, 2)) {
151 					newObjs.add(tw.getObjectId(1));
152 				}
153 			}
154 
155 			List<ReceiveCommand> cmds = makeStageList(newObjs, git, ins);
156 			ins.flush();
157 			return cmds;
158 		}
159 	}
160 
161 	/**
162 	 * Construct a set of commands to stage objects on a replica.
163 	 *
164 	 * @param newObjs
165 	 *            objects to send to a replica.
166 	 * @param git
167 	 *            local repository to read source objects from. Required to
168 	 *            perform minification of {@code newObjs}.
169 	 * @param inserter
170 	 *            inserter to write temporary commit objects during minification
171 	 *            if many new branches are created by {@code newObjs}.
172 	 * @return list of commands to create {@code "refs/txn/stage/..."}
173 	 *         references on replicas anchoring {@code newObjs} into the
174 	 *         repository while a transaction gains consensus.
175 	 * @throws IOException
176 	 *             {@code git} cannot be accessed to perform minification of
177 	 *             {@code newObjs}.
178 	 */
179 	public List<ReceiveCommand> makeStageList(Set<ObjectId> newObjs,
180 			@Nullable Repository git, @Nullable ObjectInserter inserter)
181 					throws IOException {
182 		if (git == null || newObjs.size() <= SMALL_BATCH_SIZE) {
183 			// Without a source repository can only construct unique set.
184 			List<ReceiveCommand> cmds = new ArrayList<>(newObjs.size());
185 			for (ObjectId id : newObjs) {
186 				stage(cmds, id);
187 			}
188 			return cmds;
189 		}
190 
191 		List<ReceiveCommand> cmds = new ArrayList<>();
192 		List<RevCommit> commits = new ArrayList<>();
193 		reduceObjects(cmds, commits, git, newObjs);
194 
195 		if (inserter == null || commits.size() <= 1
196 				|| (cmds.size() + commits.size()) <= SMALL_BATCH_SIZE) {
197 			// Without an inserter to aggregate commits, or for a small set of
198 			// commits just send one stage ref per commit.
199 			for (RevCommit c : commits) {
200 				stage(cmds, c.copy());
201 			}
202 			return cmds;
203 		}
204 
205 		// 'commits' is sorted most recent to least recent commit.
206 		// Group batches of commits and build a chain.
207 		// TODO(sop) Cluster by restricted graphs to support filtering.
208 		ObjectId tip = null;
209 		for (int end = commits.size(); end > 0;) {
210 			int start = Math.max(0, end - TEMP_PARENT_BATCH_SIZE);
211 			List<RevCommit> batch = commits.subList(start, end);
212 			List<ObjectId> parents = new ArrayList<>(1 + batch.size());
213 			if (tip != null) {
214 				parents.add(tip);
215 			}
216 			parents.addAll(batch);
217 
218 			CommitBuilder b = new CommitBuilder();
219 			b.setTreeId(batch.get(0).getTree());
220 			b.setParentIds(parents);
221 			b.setAuthor(tmpAuthor(batch));
222 			b.setCommitter(b.getAuthor());
223 			tip = inserter.insert(b);
224 			end = start;
225 		}
226 		stage(cmds, tip);
227 		return cmds;
228 	}
229 
230 	private static PersonIdent tmpAuthor(List<RevCommit> commits) {
231 		// Construct a predictable author using most recent commit time.
232 		int t = 0;
233 		for (int i = 0; i < commits.size();) {
234 			t = Math.max(t, commits.get(i).getCommitTime());
235 		}
236 		String name = "Ketch Stage"; //$NON-NLS-1$
237 		String email = "tmp@tmp"; //$NON-NLS-1$
238 		return new PersonIdent(name, email, t * 1000L, 0);
239 	}
240 
241 	private void reduceObjects(List<ReceiveCommand> cmds,
242 			List<RevCommit> commits, Repository git,
243 			Set<ObjectId> newObjs) throws IOException {
244 		try (RevWalk rw = new RevWalk(git)) {
245 			rw.setRetainBody(false);
246 
247 			for (ObjectId id : newObjs) {
248 				RevObject obj = rw.parseAny(id);
249 				if (obj instanceof RevCommit) {
250 					rw.markStart((RevCommit) obj);
251 				} else {
252 					stage(cmds, id);
253 				}
254 			}
255 
256 			for (RevCommit c; (c = rw.next()) != null;) {
257 				commits.add(c);
258 				rw.markUninteresting(c);
259 			}
260 		}
261 	}
262 
263 	private void stage(List<ReceiveCommand> cmds, ObjectId id) {
264 		int estLen = txnStage.length() + txnId.length() + 5;
265 		StringBuilder n = new StringBuilder(estLen);
266 		n.append(txnStage).append(txnId).append('.');
267 		n.append(Integer.toHexString(cmds.size()));
268 		cmds.add(new ReceiveCommand(ObjectId.zeroId(), id, n.toString()));
269 	}
270 }