1 /*
2 * Copyright (C) 2016, Google Inc.
3 * and other copyright owners as documented in the project's IP log.
4 *
5 * This program and the accompanying materials are made available
6 * under the terms of the Eclipse Distribution License v1.0 which
7 * accompanies this distribution, is reproduced below, and is
8 * available at http://www.eclipse.org/org/documents/edl-v10.php
9 *
10 * All rights reserved.
11 *
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
14 * conditions are met:
15 *
16 * - Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials provided
22 * with the distribution.
23 *
24 * - Neither the name of the Eclipse Foundation, Inc. nor the
25 * names of its contributors may be used to endorse or promote
26 * products derived from this software without specific prior
27 * written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
30 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
31 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
32 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
34 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
36 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
37 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
38 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
39 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
41 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42 */
43
44 package org.eclipse.jgit.internal.ketch;
45
46 import static org.eclipse.jgit.lib.FileMode.TYPE_GITLINK;
47
48 import java.io.IOException;
49 import java.util.ArrayList;
50 import java.util.HashSet;
51 import java.util.List;
52 import java.util.Set;
53
54 import org.eclipse.jgit.annotations.Nullable;
55 import org.eclipse.jgit.lib.AnyObjectId;
56 import org.eclipse.jgit.lib.CommitBuilder;
57 import org.eclipse.jgit.lib.ObjectId;
58 import org.eclipse.jgit.lib.ObjectInserter;
59 import org.eclipse.jgit.lib.PersonIdent;
60 import org.eclipse.jgit.lib.Repository;
61 import org.eclipse.jgit.revwalk.RevCommit;
62 import org.eclipse.jgit.revwalk.RevObject;
63 import org.eclipse.jgit.revwalk.RevWalk;
64 import org.eclipse.jgit.transport.ReceiveCommand;
65 import org.eclipse.jgit.treewalk.EmptyTreeIterator;
66 import org.eclipse.jgit.treewalk.TreeWalk;
67 import org.eclipse.jgit.treewalk.filter.TreeFilter;
68
69 /** Constructs a set of commands to stage content during a proposal. */
70 public class StageBuilder {
71 /**
72 * Acceptable number of references to send in a single stage transaction.
73 * <p>
74 * If the number of unique objects exceeds this amount the builder will
75 * attempt to decrease the reference count by chaining commits..
76 */
77 private static final int SMALL_BATCH_SIZE = 5;
78
79 /**
80 * Acceptable number of commits to chain together using parent pointers.
81 * <p>
82 * When staging many unique commits the {@link StageBuilder} batches
83 * together unrelated commits as parents of a temporary commit. After the
84 * proposal completes the temporary commit is discarded and can be garbage
85 * collected by all replicas.
86 */
87 private static final int TEMP_PARENT_BATCH_SIZE = 128;
88
89 private static final byte[] PEEL = { ' ', '^' };
90
91 private final String txnStage;
92 private final String txnId;
93
94 /**
95 * Construct a stage builder for a transaction.
96 *
97 * @param txnStageNamespace
98 * namespace for transaction references to build
99 * {@code "txnStageNamespace/txnId.n"} style names.
100 * @param txnId
101 * identifier used to name temporary staging refs.
102 */
103 public StageBuilder(String txnStageNamespace, ObjectId txnId) {
104 this.txnStage = txnStageNamespace;
105 this.txnId = txnId.name();
106 }
107
108 /**
109 * Compare two RefTrees and return commands to stage new objects.
110 * <p>
111 * This method ignores the lineage between the two RefTrees and does a
112 * straight diff on the two trees. New objects will be staged. The diff
113 * strategy is useful to catch-up a lagging replica, without sending every
114 * intermediate step. This may mean the replica does not have the same
115 * object set as other replicas if there are rewinds or branch deletes.
116 *
117 * @param git
118 * source repository to read {@code oldTree} and {@code newTree}
119 * from.
120 * @param oldTree
121 * accepted RefTree on the replica ({@code refs/txn/accepted}).
122 * Use {@link ObjectId#zeroId()} if the remote does not have any
123 * ref tree, e.g. a new replica catching up.
124 * @param newTree
125 * RefTree being sent to the replica. The trees will be compared.
126 * @return list of commands to create {@code "refs/txn/stage/..."}
127 * references on replicas anchoring new objects into the repository
128 * while a transaction gains consensus.
129 * @throws IOException
130 * {@code git} cannot be accessed to compare {@code oldTree} and
131 * {@code newTree} to build the object set.
132 */
133 public List<ReceiveCommand> makeStageList(Repository git, ObjectId oldTree,
134 ObjectId newTree) throws IOException {
135 try (RevWalk rw = new RevWalk(git);
136 TreeWalk tw = new TreeWalk(rw.getObjectReader());
137 ObjectInserter ins = git.newObjectInserter()) {
138 if (AnyObjectId.equals(oldTree, ObjectId.zeroId())) {
139 tw.addTree(new EmptyTreeIterator());
140 } else {
141 tw.addTree(rw.parseTree(oldTree));
142 }
143 tw.addTree(rw.parseTree(newTree));
144 tw.setFilter(TreeFilter.ANY_DIFF);
145 tw.setRecursive(true);
146
147 Set<ObjectId> newObjs = new HashSet<>();
148 while (tw.next()) {
149 if (tw.getRawMode(1) == TYPE_GITLINK
150 && !tw.isPathSuffix(PEEL, 2)) {
151 newObjs.add(tw.getObjectId(1));
152 }
153 }
154
155 List<ReceiveCommand> cmds = makeStageList(newObjs, git, ins);
156 ins.flush();
157 return cmds;
158 }
159 }
160
161 /**
162 * Construct a set of commands to stage objects on a replica.
163 *
164 * @param newObjs
165 * objects to send to a replica.
166 * @param git
167 * local repository to read source objects from. Required to
168 * perform minification of {@code newObjs}.
169 * @param inserter
170 * inserter to write temporary commit objects during minification
171 * if many new branches are created by {@code newObjs}.
172 * @return list of commands to create {@code "refs/txn/stage/..."}
173 * references on replicas anchoring {@code newObjs} into the
174 * repository while a transaction gains consensus.
175 * @throws IOException
176 * {@code git} cannot be accessed to perform minification of
177 * {@code newObjs}.
178 */
179 public List<ReceiveCommand> makeStageList(Set<ObjectId> newObjs,
180 @Nullable Repository git, @Nullable ObjectInserter inserter)
181 throws IOException {
182 if (git == null || newObjs.size() <= SMALL_BATCH_SIZE) {
183 // Without a source repository can only construct unique set.
184 List<ReceiveCommand> cmds = new ArrayList<>(newObjs.size());
185 for (ObjectId id : newObjs) {
186 stage(cmds, id);
187 }
188 return cmds;
189 }
190
191 List<ReceiveCommand> cmds = new ArrayList<>();
192 List<RevCommit> commits = new ArrayList<>();
193 reduceObjects(cmds, commits, git, newObjs);
194
195 if (inserter == null || commits.size() <= 1
196 || (cmds.size() + commits.size()) <= SMALL_BATCH_SIZE) {
197 // Without an inserter to aggregate commits, or for a small set of
198 // commits just send one stage ref per commit.
199 for (RevCommit c : commits) {
200 stage(cmds, c.copy());
201 }
202 return cmds;
203 }
204
205 // 'commits' is sorted most recent to least recent commit.
206 // Group batches of commits and build a chain.
207 // TODO(sop) Cluster by restricted graphs to support filtering.
208 ObjectId tip = null;
209 for (int end = commits.size(); end > 0;) {
210 int start = Math.max(0, end - TEMP_PARENT_BATCH_SIZE);
211 List<RevCommit> batch = commits.subList(start, end);
212 List<ObjectId> parents = new ArrayList<>(1 + batch.size());
213 if (tip != null) {
214 parents.add(tip);
215 }
216 parents.addAll(batch);
217
218 CommitBuilder b = new CommitBuilder();
219 b.setTreeId(batch.get(0).getTree());
220 b.setParentIds(parents);
221 b.setAuthor(tmpAuthor(batch));
222 b.setCommitter(b.getAuthor());
223 tip = inserter.insert(b);
224 end = start;
225 }
226 stage(cmds, tip);
227 return cmds;
228 }
229
230 private static PersonIdent tmpAuthor(List<RevCommit> commits) {
231 // Construct a predictable author using most recent commit time.
232 int t = 0;
233 for (int i = 0; i < commits.size();) {
234 t = Math.max(t, commits.get(i).getCommitTime());
235 }
236 String name = "Ketch Stage"; //$NON-NLS-1$
237 String email = "tmp@tmp"; //$NON-NLS-1$
238 return new PersonIdent(name, email, t * 1000L, 0);
239 }
240
241 private void reduceObjects(List<ReceiveCommand> cmds,
242 List<RevCommit> commits, Repository git,
243 Set<ObjectId> newObjs) throws IOException {
244 try (RevWalk rw = new RevWalk(git)) {
245 rw.setRetainBody(false);
246
247 for (ObjectId id : newObjs) {
248 RevObject obj = rw.parseAny(id);
249 if (obj instanceof RevCommit) {
250 rw.markStart((RevCommit) obj);
251 } else {
252 stage(cmds, id);
253 }
254 }
255
256 for (RevCommit c; (c = rw.next()) != null;) {
257 commits.add(c);
258 rw.markUninteresting(c);
259 }
260 }
261 }
262
263 private void stage(List<ReceiveCommand> cmds, ObjectId id) {
264 int estLen = txnStage.length() + txnId.length() + 5;
265 StringBuilder n = new StringBuilder(estLen);
266 n.append(txnStage).append(txnId).append('.');
267 n.append(Integer.toHexString(cmds.size()));
268 cmds.add(new ReceiveCommand(ObjectId.zeroId(), id, n.toString()));
269 }
270 }