View Javadoc
1   /*
2    * Copyright (C) 2016, Google Inc.
3    * and other copyright owners as documented in the project's IP log.
4    *
5    * This program and the accompanying materials are made available
6    * under the terms of the Eclipse Distribution License v1.0 which
7    * accompanies this distribution, is reproduced below, and is
8    * available at http://www.eclipse.org/org/documents/edl-v10.php
9    *
10   * All rights reserved.
11   *
12   * Redistribution and use in source and binary forms, with or
13   * without modification, are permitted provided that the following
14   * conditions are met:
15   *
16   * - Redistributions of source code must retain the above copyright
17   *   notice, this list of conditions and the following disclaimer.
18   *
19   * - Redistributions in binary form must reproduce the above
20   *   copyright notice, this list of conditions and the following
21   *   disclaimer in the documentation and/or other materials provided
22   *   with the distribution.
23   *
24   * - Neither the name of the Eclipse Foundation, Inc. nor the
25   *   names of its contributors may be used to endorse or promote
26   *   products derived from this software without specific prior
27   *   written permission.
28   *
29   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
30   * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
31   * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
32   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
34   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
36   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
37   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
38   * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
39   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
41   * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42   */
43  
44  package org.eclipse.jgit.internal.ketch;
45  
46  import static org.eclipse.jgit.lib.FileMode.TYPE_GITLINK;
47  
48  import java.io.IOException;
49  import java.util.ArrayList;
50  import java.util.HashSet;
51  import java.util.List;
52  import java.util.Set;
53  
54  import org.eclipse.jgit.annotations.Nullable;
55  import org.eclipse.jgit.lib.AnyObjectId;
56  import org.eclipse.jgit.lib.CommitBuilder;
57  import org.eclipse.jgit.lib.ObjectId;
58  import org.eclipse.jgit.lib.ObjectInserter;
59  import org.eclipse.jgit.lib.PersonIdent;
60  import org.eclipse.jgit.lib.Repository;
61  import org.eclipse.jgit.revwalk.RevCommit;
62  import org.eclipse.jgit.revwalk.RevObject;
63  import org.eclipse.jgit.revwalk.RevWalk;
64  import org.eclipse.jgit.transport.ReceiveCommand;
65  import org.eclipse.jgit.treewalk.EmptyTreeIterator;
66  import org.eclipse.jgit.treewalk.TreeWalk;
67  import org.eclipse.jgit.treewalk.filter.TreeFilter;
68  
69  /**
70   * Constructs a set of commands to stage content during a proposal.
71   */
72  public class StageBuilder {
73  	/**
74  	 * Acceptable number of references to send in a single stage transaction.
75  	 * <p>
76  	 * If the number of unique objects exceeds this amount the builder will
77  	 * attempt to decrease the reference count by chaining commits..
78  	 */
79  	private static final int SMALL_BATCH_SIZE = 5;
80  
81  	/**
82  	 * Acceptable number of commits to chain together using parent pointers.
83  	 * <p>
84  	 * When staging many unique commits the {@link StageBuilder} batches
85  	 * together unrelated commits as parents of a temporary commit. After the
86  	 * proposal completes the temporary commit is discarded and can be garbage
87  	 * collected by all replicas.
88  	 */
89  	private static final int TEMP_PARENT_BATCH_SIZE = 128;
90  
91  	private static final byte[] PEEL = { ' ', '^' };
92  
93  	private final String txnStage;
94  	private final String txnId;
95  
96  	/**
97  	 * Construct a stage builder for a transaction.
98  	 *
99  	 * @param txnStageNamespace
100 	 *            namespace for transaction references to build
101 	 *            {@code "txnStageNamespace/txnId.n"} style names.
102 	 * @param txnId
103 	 *            identifier used to name temporary staging refs.
104 	 */
105 	public StageBuilder(String txnStageNamespace, ObjectId txnId) {
106 		this.txnStage = txnStageNamespace;
107 		this.txnId = txnId.name();
108 	}
109 
110 	/**
111 	 * Compare two RefTrees and return commands to stage new objects.
112 	 * <p>
113 	 * This method ignores the lineage between the two RefTrees and does a
114 	 * straight diff on the two trees. New objects will be staged. The diff
115 	 * strategy is useful to catch-up a lagging replica, without sending every
116 	 * intermediate step. This may mean the replica does not have the same
117 	 * object set as other replicas if there are rewinds or branch deletes.
118 	 *
119 	 * @param git
120 	 *            source repository to read {@code oldTree} and {@code newTree}
121 	 *            from.
122 	 * @param oldTree
123 	 *            accepted RefTree on the replica ({@code refs/txn/accepted}).
124 	 *            Use {@link org.eclipse.jgit.lib.ObjectId#zeroId()} if the
125 	 *            remote does not have any ref tree, e.g. a new replica catching
126 	 *            up.
127 	 * @param newTree
128 	 *            RefTree being sent to the replica. The trees will be compared.
129 	 * @return list of commands to create {@code "refs/txn/stage/..."}
130 	 *         references on replicas anchoring new objects into the repository
131 	 *         while a transaction gains consensus.
132 	 * @throws java.io.IOException
133 	 *             {@code git} cannot be accessed to compare {@code oldTree} and
134 	 *             {@code newTree} to build the object set.
135 	 */
136 	public List<ReceiveCommand> makeStageList(Repository git, ObjectId oldTree,
137 			ObjectId newTree) throws IOException {
138 		try (RevWalklk/RevWalk.html#RevWalk">RevWalk rw = new RevWalk(git);
139 				TreeWalk tw = new TreeWalk(rw.getObjectReader());
140 				ObjectInserter ins = git.newObjectInserter()) {
141 			if (AnyObjectId.equals(oldTree, ObjectId.zeroId())) {
142 				tw.addTree(new EmptyTreeIterator());
143 			} else {
144 				tw.addTree(rw.parseTree(oldTree));
145 			}
146 			tw.addTree(rw.parseTree(newTree));
147 			tw.setFilter(TreeFilter.ANY_DIFF);
148 			tw.setRecursive(true);
149 
150 			Set<ObjectId> newObjs = new HashSet<>();
151 			while (tw.next()) {
152 				if (tw.getRawMode(1) == TYPE_GITLINK
153 						&& !tw.isPathSuffix(PEEL, 2)) {
154 					newObjs.add(tw.getObjectId(1));
155 				}
156 			}
157 
158 			List<ReceiveCommand> cmds = makeStageList(newObjs, git, ins);
159 			ins.flush();
160 			return cmds;
161 		}
162 	}
163 
164 	/**
165 	 * Construct a set of commands to stage objects on a replica.
166 	 *
167 	 * @param newObjs
168 	 *            objects to send to a replica.
169 	 * @param git
170 	 *            local repository to read source objects from. Required to
171 	 *            perform minification of {@code newObjs}.
172 	 * @param inserter
173 	 *            inserter to write temporary commit objects during minification
174 	 *            if many new branches are created by {@code newObjs}.
175 	 * @return list of commands to create {@code "refs/txn/stage/..."}
176 	 *         references on replicas anchoring {@code newObjs} into the
177 	 *         repository while a transaction gains consensus.
178 	 * @throws java.io.IOException
179 	 *             {@code git} cannot be accessed to perform minification of
180 	 *             {@code newObjs}.
181 	 */
182 	public List<ReceiveCommand> makeStageList(Set<ObjectId> newObjs,
183 			@Nullable Repository git, @Nullable ObjectInserter inserter)
184 					throws IOException {
185 		if (git == null || newObjs.size() <= SMALL_BATCH_SIZE) {
186 			// Without a source repository can only construct unique set.
187 			List<ReceiveCommand> cmds = new ArrayList<>(newObjs.size());
188 			for (ObjectId id : newObjs) {
189 				stage(cmds, id);
190 			}
191 			return cmds;
192 		}
193 
194 		List<ReceiveCommand> cmds = new ArrayList<>();
195 		List<RevCommit> commits = new ArrayList<>();
196 		reduceObjects(cmds, commits, git, newObjs);
197 
198 		if (inserter == null || commits.size() <= 1
199 				|| (cmds.size() + commits.size()) <= SMALL_BATCH_SIZE) {
200 			// Without an inserter to aggregate commits, or for a small set of
201 			// commits just send one stage ref per commit.
202 			for (RevCommit c : commits) {
203 				stage(cmds, c.copy());
204 			}
205 			return cmds;
206 		}
207 
208 		// 'commits' is sorted most recent to least recent commit.
209 		// Group batches of commits and build a chain.
210 		// TODO(sop) Cluster by restricted graphs to support filtering.
211 		ObjectId tip = null;
212 		for (int end = commits.size(); end > 0;) {
213 			int start = Math.max(0, end - TEMP_PARENT_BATCH_SIZE);
214 			List<RevCommit> batch = commits.subList(start, end);
215 			List<ObjectId> parents = new ArrayList<>(1 + batch.size());
216 			if (tip != null) {
217 				parents.add(tip);
218 			}
219 			parents.addAll(batch);
220 
221 			CommitBuilder b = new CommitBuilder();
222 			b.setTreeId(batch.get(0).getTree());
223 			b.setParentIds(parents);
224 			b.setAuthor(tmpAuthor(batch));
225 			b.setCommitter(b.getAuthor());
226 			tip = inserter.insert(b);
227 			end = start;
228 		}
229 		stage(cmds, tip);
230 		return cmds;
231 	}
232 
233 	private static PersonIdent tmpAuthor(List<RevCommit> commits) {
234 		// Construct a predictable author using most recent commit time.
235 		int t = 0;
236 		for (int i = 0; i < commits.size();) {
237 			t = Math.max(t, commits.get(i).getCommitTime());
238 		}
239 		String name = "Ketch Stage"; //$NON-NLS-1$
240 		String email = "tmp@tmp"; //$NON-NLS-1$
241 		return new PersonIdent(name, email, t * 1000L, 0);
242 	}
243 
244 	private void reduceObjects(List<ReceiveCommand> cmds,
245 			List<RevCommit> commits, Repository git,
246 			Set<ObjectId> newObjs) throws IOException {
247 		try (RevWalklk/RevWalk.html#RevWalk">RevWalk rw = new RevWalk(git)) {
248 			rw.setRetainBody(false);
249 
250 			for (ObjectId id : newObjs) {
251 				RevObject obj = rw.parseAny(id);
252 				if (obj instanceof RevCommit) {
253 					rw.markStart((RevCommit) obj);
254 				} else {
255 					stage(cmds, id);
256 				}
257 			}
258 
259 			for (RevCommit c; (c = rw.next()) != null;) {
260 				commits.add(c);
261 				rw.markUninteresting(c);
262 			}
263 		}
264 	}
265 
266 	private void stage(List<ReceiveCommand> cmds, ObjectId id) {
267 		int estLen = txnStage.length() + txnId.length() + 5;
268 		StringBuilder n = new StringBuilder(estLen);
269 		n.append(txnStage).append(txnId).append('.');
270 		n.append(Integer.toHexString(cmds.size()));
271 		cmds.add(new ReceiveCommand(ObjectId.zeroId(), id, n.toString()));
272 	}
273 }