1 /*
2 * Copyright (C) 2016, Google Inc.
3 * and other copyright owners as documented in the project's IP log.
4 *
5 * This program and the accompanying materials are made available
6 * under the terms of the Eclipse Distribution License v1.0 which
7 * accompanies this distribution, is reproduced below, and is
8 * available at http://www.eclipse.org/org/documents/edl-v10.php
9 *
10 * All rights reserved.
11 *
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
14 * conditions are met:
15 *
16 * - Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials provided
22 * with the distribution.
23 *
24 * - Neither the name of the Eclipse Foundation, Inc. nor the
25 * names of its contributors may be used to endorse or promote
26 * products derived from this software without specific prior
27 * written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
30 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
31 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
32 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
34 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
36 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
37 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
38 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
39 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
41 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42 */
43
44 package org.eclipse.jgit.internal.ketch;
45
46 import static org.eclipse.jgit.lib.FileMode.TYPE_GITLINK;
47
48 import java.io.IOException;
49 import java.util.ArrayList;
50 import java.util.HashSet;
51 import java.util.List;
52 import java.util.Set;
53
54 import org.eclipse.jgit.annotations.Nullable;
55 import org.eclipse.jgit.lib.AnyObjectId;
56 import org.eclipse.jgit.lib.CommitBuilder;
57 import org.eclipse.jgit.lib.ObjectId;
58 import org.eclipse.jgit.lib.ObjectInserter;
59 import org.eclipse.jgit.lib.PersonIdent;
60 import org.eclipse.jgit.lib.Repository;
61 import org.eclipse.jgit.revwalk.RevCommit;
62 import org.eclipse.jgit.revwalk.RevObject;
63 import org.eclipse.jgit.revwalk.RevWalk;
64 import org.eclipse.jgit.transport.ReceiveCommand;
65 import org.eclipse.jgit.treewalk.EmptyTreeIterator;
66 import org.eclipse.jgit.treewalk.TreeWalk;
67 import org.eclipse.jgit.treewalk.filter.TreeFilter;
68
69 /**
70 * Constructs a set of commands to stage content during a proposal.
71 */
72 public class StageBuilder {
73 /**
74 * Acceptable number of references to send in a single stage transaction.
75 * <p>
76 * If the number of unique objects exceeds this amount the builder will
77 * attempt to decrease the reference count by chaining commits..
78 */
79 private static final int SMALL_BATCH_SIZE = 5;
80
81 /**
82 * Acceptable number of commits to chain together using parent pointers.
83 * <p>
84 * When staging many unique commits the {@link StageBuilder} batches
85 * together unrelated commits as parents of a temporary commit. After the
86 * proposal completes the temporary commit is discarded and can be garbage
87 * collected by all replicas.
88 */
89 private static final int TEMP_PARENT_BATCH_SIZE = 128;
90
91 private static final byte[] PEEL = { ' ', '^' };
92
93 private final String txnStage;
94 private final String txnId;
95
96 /**
97 * Construct a stage builder for a transaction.
98 *
99 * @param txnStageNamespace
100 * namespace for transaction references to build
101 * {@code "txnStageNamespace/txnId.n"} style names.
102 * @param txnId
103 * identifier used to name temporary staging refs.
104 */
105 public StageBuilder(String txnStageNamespace, ObjectId txnId) {
106 this.txnStage = txnStageNamespace;
107 this.txnId = txnId.name();
108 }
109
110 /**
111 * Compare two RefTrees and return commands to stage new objects.
112 * <p>
113 * This method ignores the lineage between the two RefTrees and does a
114 * straight diff on the two trees. New objects will be staged. The diff
115 * strategy is useful to catch-up a lagging replica, without sending every
116 * intermediate step. This may mean the replica does not have the same
117 * object set as other replicas if there are rewinds or branch deletes.
118 *
119 * @param git
120 * source repository to read {@code oldTree} and {@code newTree}
121 * from.
122 * @param oldTree
123 * accepted RefTree on the replica ({@code refs/txn/accepted}).
124 * Use {@link org.eclipse.jgit.lib.ObjectId#zeroId()} if the
125 * remote does not have any ref tree, e.g. a new replica catching
126 * up.
127 * @param newTree
128 * RefTree being sent to the replica. The trees will be compared.
129 * @return list of commands to create {@code "refs/txn/stage/..."}
130 * references on replicas anchoring new objects into the repository
131 * while a transaction gains consensus.
132 * @throws java.io.IOException
133 * {@code git} cannot be accessed to compare {@code oldTree} and
134 * {@code newTree} to build the object set.
135 */
136 public List<ReceiveCommand> makeStageList(Repository git, ObjectId oldTree,
137 ObjectId newTree) throws IOException {
138 try (RevWalklk/RevWalk.html#RevWalk">RevWalk rw = new RevWalk(git);
139 TreeWalk tw = new TreeWalk(rw.getObjectReader());
140 ObjectInserter ins = git.newObjectInserter()) {
141 if (AnyObjectId.equals(oldTree, ObjectId.zeroId())) {
142 tw.addTree(new EmptyTreeIterator());
143 } else {
144 tw.addTree(rw.parseTree(oldTree));
145 }
146 tw.addTree(rw.parseTree(newTree));
147 tw.setFilter(TreeFilter.ANY_DIFF);
148 tw.setRecursive(true);
149
150 Set<ObjectId> newObjs = new HashSet<>();
151 while (tw.next()) {
152 if (tw.getRawMode(1) == TYPE_GITLINK
153 && !tw.isPathSuffix(PEEL, 2)) {
154 newObjs.add(tw.getObjectId(1));
155 }
156 }
157
158 List<ReceiveCommand> cmds = makeStageList(newObjs, git, ins);
159 ins.flush();
160 return cmds;
161 }
162 }
163
164 /**
165 * Construct a set of commands to stage objects on a replica.
166 *
167 * @param newObjs
168 * objects to send to a replica.
169 * @param git
170 * local repository to read source objects from. Required to
171 * perform minification of {@code newObjs}.
172 * @param inserter
173 * inserter to write temporary commit objects during minification
174 * if many new branches are created by {@code newObjs}.
175 * @return list of commands to create {@code "refs/txn/stage/..."}
176 * references on replicas anchoring {@code newObjs} into the
177 * repository while a transaction gains consensus.
178 * @throws java.io.IOException
179 * {@code git} cannot be accessed to perform minification of
180 * {@code newObjs}.
181 */
182 public List<ReceiveCommand> makeStageList(Set<ObjectId> newObjs,
183 @Nullable Repository git, @Nullable ObjectInserter inserter)
184 throws IOException {
185 if (git == null || newObjs.size() <= SMALL_BATCH_SIZE) {
186 // Without a source repository can only construct unique set.
187 List<ReceiveCommand> cmds = new ArrayList<>(newObjs.size());
188 for (ObjectId id : newObjs) {
189 stage(cmds, id);
190 }
191 return cmds;
192 }
193
194 List<ReceiveCommand> cmds = new ArrayList<>();
195 List<RevCommit> commits = new ArrayList<>();
196 reduceObjects(cmds, commits, git, newObjs);
197
198 if (inserter == null || commits.size() <= 1
199 || (cmds.size() + commits.size()) <= SMALL_BATCH_SIZE) {
200 // Without an inserter to aggregate commits, or for a small set of
201 // commits just send one stage ref per commit.
202 for (RevCommit c : commits) {
203 stage(cmds, c.copy());
204 }
205 return cmds;
206 }
207
208 // 'commits' is sorted most recent to least recent commit.
209 // Group batches of commits and build a chain.
210 // TODO(sop) Cluster by restricted graphs to support filtering.
211 ObjectId tip = null;
212 for (int end = commits.size(); end > 0;) {
213 int start = Math.max(0, end - TEMP_PARENT_BATCH_SIZE);
214 List<RevCommit> batch = commits.subList(start, end);
215 List<ObjectId> parents = new ArrayList<>(1 + batch.size());
216 if (tip != null) {
217 parents.add(tip);
218 }
219 parents.addAll(batch);
220
221 CommitBuilder b = new CommitBuilder();
222 b.setTreeId(batch.get(0).getTree());
223 b.setParentIds(parents);
224 b.setAuthor(tmpAuthor(batch));
225 b.setCommitter(b.getAuthor());
226 tip = inserter.insert(b);
227 end = start;
228 }
229 stage(cmds, tip);
230 return cmds;
231 }
232
233 private static PersonIdent tmpAuthor(List<RevCommit> commits) {
234 // Construct a predictable author using most recent commit time.
235 int t = 0;
236 for (int i = 0; i < commits.size();) {
237 t = Math.max(t, commits.get(i).getCommitTime());
238 }
239 String name = "Ketch Stage"; //$NON-NLS-1$
240 String email = "tmp@tmp"; //$NON-NLS-1$
241 return new PersonIdent(name, email, t * 1000L, 0);
242 }
243
244 private void reduceObjects(List<ReceiveCommand> cmds,
245 List<RevCommit> commits, Repository git,
246 Set<ObjectId> newObjs) throws IOException {
247 try (RevWalklk/RevWalk.html#RevWalk">RevWalk rw = new RevWalk(git)) {
248 rw.setRetainBody(false);
249
250 for (ObjectId id : newObjs) {
251 RevObject obj = rw.parseAny(id);
252 if (obj instanceof RevCommit) {
253 rw.markStart((RevCommit) obj);
254 } else {
255 stage(cmds, id);
256 }
257 }
258
259 for (RevCommit c; (c = rw.next()) != null;) {
260 commits.add(c);
261 rw.markUninteresting(c);
262 }
263 }
264 }
265
266 private void stage(List<ReceiveCommand> cmds, ObjectId id) {
267 int estLen = txnStage.length() + txnId.length() + 5;
268 StringBuilder n = new StringBuilder(estLen);
269 n.append(txnStage).append(txnId).append('.');
270 n.append(Integer.toHexString(cmds.size()));
271 cmds.add(new ReceiveCommand(ObjectId.zeroId(), id, n.toString()));
272 }
273 }