PackWriterBitmapPreparer.java

/*
 * Copyright (C) 2012, Google Inc. and others
 *
 * This program and the accompanying materials are made available under the
 * terms of the Eclipse Distribution License v. 1.0 which is available at
 * https://www.eclipse.org/org/documents/edl-v10.php.
 *
 * SPDX-License-Identifier: BSD-3-Clause
 */

package org.eclipse.jgit.internal.storage.pack;

import static org.eclipse.jgit.internal.storage.file.PackBitmapIndex.FLAG_REUSE;
import static org.eclipse.jgit.revwalk.RevFlag.SEEN;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;

import org.eclipse.jgit.errors.IncorrectObjectTypeException;
import org.eclipse.jgit.errors.MissingObjectException;
import org.eclipse.jgit.internal.JGitText;
import org.eclipse.jgit.internal.revwalk.AddUnseenToBitmapFilter;
import org.eclipse.jgit.internal.storage.file.BitmapIndexImpl;
import org.eclipse.jgit.internal.storage.file.BitmapIndexImpl.CompressedBitmap;
import org.eclipse.jgit.internal.storage.file.PackBitmapIndex;
import org.eclipse.jgit.internal.storage.file.PackBitmapIndexBuilder;
import org.eclipse.jgit.internal.storage.file.PackBitmapIndexRemapper;
import org.eclipse.jgit.lib.AnyObjectId;
import org.eclipse.jgit.lib.BitmapIndex.BitmapBuilder;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.ObjectReader;
import org.eclipse.jgit.lib.ProgressMonitor;
import org.eclipse.jgit.revwalk.BitmapWalker;
import org.eclipse.jgit.revwalk.ObjectWalk;
import org.eclipse.jgit.revwalk.RevCommit;
import org.eclipse.jgit.revwalk.RevObject;
import org.eclipse.jgit.revwalk.RevWalk;
import org.eclipse.jgit.revwalk.filter.RevFilter;
import org.eclipse.jgit.storage.pack.PackConfig;
import org.eclipse.jgit.util.BlockList;
import org.eclipse.jgit.util.SystemReader;

import com.googlecode.javaewah.EWAHCompressedBitmap;

/**
 * Helper class for the {@link PackWriter} to select commits for which to build
 * pack index bitmaps.
 */
class PackWriterBitmapPreparer {

	private static final int DAY_IN_SECONDS = 24 * 60 * 60;

	private static final int DISTANCE_THRESHOLD = 2000;

	private static final Comparator<RevCommit> ORDER_BY_REVERSE_TIMESTAMP = (
			RevCommit a, RevCommit b) -> Integer
					.signum(b.getCommitTime() - a.getCommitTime());

	private final ObjectReader reader;
	private final ProgressMonitor pm;
	private final Set<? extends ObjectId> want;
	private final PackBitmapIndexBuilder writeBitmaps;
	private final BitmapIndexImpl commitBitmapIndex;
	private final PackBitmapIndexRemapper bitmapRemapper;
	private final BitmapIndexImpl bitmapIndex;

	private final int contiguousCommitCount;
	private final int recentCommitCount;
	private final int recentCommitSpan;
	private final int distantCommitSpan;
	private final int excessiveBranchCount;
	private final long inactiveBranchTimestamp;

	PackWriterBitmapPreparer(ObjectReader reader,
			PackBitmapIndexBuilder writeBitmaps, ProgressMonitor pm,
			Set<? extends ObjectId> want, PackConfig config)
					throws IOException {
		this.reader = reader;
		this.writeBitmaps = writeBitmaps;
		this.pm = pm;
		this.want = want;
		this.commitBitmapIndex = new BitmapIndexImpl(writeBitmaps);
		this.bitmapRemapper = PackBitmapIndexRemapper.newPackBitmapIndex(
				reader.getBitmapIndex(), writeBitmaps);
		this.bitmapIndex = new BitmapIndexImpl(bitmapRemapper);
		this.contiguousCommitCount = config.getBitmapContiguousCommitCount();
		this.recentCommitCount = config.getBitmapRecentCommitCount();
		this.recentCommitSpan = config.getBitmapRecentCommitSpan();
		this.distantCommitSpan = config.getBitmapDistantCommitSpan();
		this.excessiveBranchCount = config.getBitmapExcessiveBranchCount();
		long now = SystemReader.getInstance().getCurrentTime();
		long ageInSeconds = config.getBitmapInactiveBranchAgeInDays()
				* DAY_IN_SECONDS;
		this.inactiveBranchTimestamp = (now / 1000) - ageInSeconds;
	}

	/**
	 * Returns the commit objects for which bitmap indices should be built.
	 *
	 * @param expectedCommitCount
	 *            count of commits in the pack
	 * @param excludeFromBitmapSelection
	 *            commits that should be excluded from bitmap selection
	 * @return commit objects for which bitmap indices should be built
	 * @throws IncorrectObjectTypeException
	 *             if any of the processed objects is not a commit
	 * @throws IOException
	 *             on errors reading pack or index files
	 * @throws MissingObjectException
	 *             if an expected object is missing
	 */
	Collection<BitmapCommit> selectCommits(int expectedCommitCount,
			Set<? extends ObjectId> excludeFromBitmapSelection)
			throws IncorrectObjectTypeException, IOException,
			MissingObjectException {
		/*
		 * Thinking of bitmap indices as a cache, if we find bitmaps at or at a
		 * close ancestor to 'old' and 'new' when calculating old..new, then all
		 * objects can be calculated with minimal graph walking. A distribution
		 * that favors creating bitmaps for the most recent commits maximizes
		 * the cache hits for clients that are close to HEAD, which is the
		 * majority of calculations performed.
		 */
		try (RevWalk rw = new RevWalk(reader);
				RevWalk rw2 = new RevWalk(reader)) {
			pm.beginTask(JGitText.get().selectingCommits,
					ProgressMonitor.UNKNOWN);
			rw.setRetainBody(false);
			CommitSelectionHelper selectionHelper = captureOldAndNewCommits(rw,
					expectedCommitCount, excludeFromBitmapSelection);
			pm.endTask();

			// Add reused bitmaps from the previous GC pack's bitmap indices.
			// Currently they are always fully reused, even if their spans don't
			// match this run's PackConfig values.
			int newCommits = selectionHelper.getCommitCount();
			BlockList<BitmapCommit> selections = new BlockList<>(
					selectionHelper.reusedCommits.size()
							+ newCommits / recentCommitSpan + 1);
			for (BitmapCommit reuse : selectionHelper.reusedCommits) {
				selections.add(reuse);
			}

			if (newCommits == 0) {
				for (AnyObjectId id : selectionHelper.newWants) {
					selections.add(new BitmapCommit(id, false, 0));
				}
				return selections;
			}

			pm.beginTask(JGitText.get().selectingCommits, newCommits);
			int totalWants = want.size();
			BitmapBuilder seen = commitBitmapIndex.newBitmapBuilder();
			seen.or(selectionHelper.reusedCommitsBitmap);
			rw2.setRetainBody(false);
			rw2.setRevFilter(new NotInBitmapFilter(seen));

			// For each branch, do a revwalk to enumerate its commits. Exclude
			// both reused commits and any commits seen in a previous branch.
			// Then iterate through all new commits from oldest to newest,
			// selecting well-spaced commits in this branch.
			for (RevCommit rc : selectionHelper.newWantsByNewest) {
				BitmapBuilder tipBitmap = commitBitmapIndex.newBitmapBuilder();
				rw2.markStart((RevCommit) rw2.peel(rw2.parseAny(rc)));
				RevCommit rc2;
				while ((rc2 = rw2.next()) != null) {
					tipBitmap.addObject(rc2, Constants.OBJ_COMMIT);
				}
				int cardinality = tipBitmap.cardinality();
				seen.or(tipBitmap);

				// Within this branch, keep ordered lists of commits
				// representing chains in its history, where each chain is a
				// "sub-branch". Ordering commits by these chains makes for
				// fewer differences between consecutive selected commits, which
				// in turn provides better compression/on the run-length
				// encoding of the XORs between them.
				List<List<BitmapCommit>> chains = new ArrayList<>();

				// Mark the current branch as inactive if its tip commit isn't
				// recent and there are an excessive number of branches, to
				// prevent memory bloat of computing too many bitmaps for stale
				// branches.
				boolean isActiveBranch = true;
				if (totalWants > excessiveBranchCount && !isRecentCommit(rc)) {
					isActiveBranch = false;
				}

				// Insert bitmaps at the offsets suggested by the
				// nextSelectionDistance() heuristic. Only reuse bitmaps created
				// for more distant commits.
				int index = -1;
				int nextIn = nextSpan(cardinality);
				int nextFlg = nextIn == distantCommitSpan
						? PackBitmapIndex.FLAG_REUSE
						: 0;

				// For the current branch, iterate through all commits from
				// oldest to newest.
				for (RevCommit c : selectionHelper) {
					// Optimization: if we have found all the commits for this
					// branch, stop searching
					int distanceFromTip = cardinality - index - 1;
					if (distanceFromTip == 0) {
						break;
					}

					// Ignore commits that are not in this branch
					if (!tipBitmap.contains(c)) {
						continue;
					}

					index++;
					nextIn--;
					pm.update(1);

					// Always pick the items in wants, prefer merge commits.
					if (selectionHelper.newWants.remove(c)) {
						if (nextIn > 0) {
							nextFlg = 0;
						}
					} else {
						boolean stillInSpan = nextIn >= 0;
						boolean isMergeCommit = c.getParentCount() > 1;
						// Force selection if:
						// a) we have exhausted the window looking for merges
						// b) we are in the top commits of an active branch
						// c) we are at a branch tip
						boolean mustPick = (nextIn <= -recentCommitSpan)
								|| (isActiveBranch
										&& (distanceFromTip <= contiguousCommitCount))
								|| (distanceFromTip == 1); // most recent commit
						if (!mustPick && (stillInSpan || !isMergeCommit)) {
							continue;
						}
					}

					// This commit is selected.
					// Calculate where to look for the next one.
					int flags = nextFlg;
					int currDist = distanceFromTip;
					nextIn = nextSpan(distanceFromTip);
					nextFlg = nextIn == distantCommitSpan
							? PackBitmapIndex.FLAG_REUSE
							: 0;

					// Create the commit bitmap for the current commit
					BitmapBuilder bitmap = commitBitmapIndex.newBitmapBuilder();
					rw.reset();
					rw.markStart(c);
					rw.setRevFilter(new AddUnseenToBitmapFilter(
							selectionHelper.reusedCommitsBitmap, bitmap));
					while (rw.next() != null) {
						// The filter adds the reachable commits to bitmap.
					}

					// Sort the commits by independent chains in this branch's
					// history, yielding better compression when building
					// bitmaps.
					List<BitmapCommit> longestAncestorChain = null;
					for (List<BitmapCommit> chain : chains) {
						BitmapCommit mostRecentCommit = chain
								.get(chain.size() - 1);
						if (bitmap.contains(mostRecentCommit)) {
							if (longestAncestorChain == null
									|| longestAncestorChain.size() < chain
											.size()) {
								longestAncestorChain = chain;
							}
						}
					}

					if (longestAncestorChain == null) {
						longestAncestorChain = new ArrayList<>();
						chains.add(longestAncestorChain);
					}

					// The commit bc should reuse bitmap walker from its close
					// ancestor. And the bitmap of bc should only be added to
					// PackBitmapIndexBuilder when it's an old enough
					// commit,i.e. distance from tip should be greater than
					// DISTANCE_THRESHOLD, to save memory.
					BitmapCommit bc = BitmapCommit.newBuilder(c).setFlags(flags)
							.setAddToIndex(currDist >= DISTANCE_THRESHOLD)
							.setReuseWalker(!longestAncestorChain.isEmpty())
							.build();
					longestAncestorChain.add(bc);
					writeBitmaps.addBitmap(c, bitmap, 0);
				}

				for (List<BitmapCommit> chain : chains) {
					selections.addAll(chain);
				}
			}

			// Add the remaining peeledWant
			for (AnyObjectId remainingWant : selectionHelper.newWants) {
				selections.add(new BitmapCommit(remainingWant, false, 0));
			}
			writeBitmaps.resetBitmaps(selections.size()); // Remove the temporary commit bitmaps.

			pm.endTask();
			return selections;
		}
	}

	private boolean isRecentCommit(RevCommit revCommit) {
		return revCommit.getCommitTime() > inactiveBranchTimestamp;
	}

	/**
	 * A RevFilter that excludes the commits named in a bitmap from the walk.
	 * <p>
	 * If a commit is in {@code bitmap} then that commit is not emitted by the
	 * walk and its parents are marked as SEEN so the walk can skip them.  The
	 * bitmaps passed in have the property that the parents of any commit in
	 * {@code bitmap} are also in {@code bitmap}, so marking the parents as
	 * SEEN speeds up the RevWalk by saving it from walking down blind alleys
	 * and does not change the commits emitted.
	 */
	private static class NotInBitmapFilter extends RevFilter {
		private final BitmapBuilder bitmap;

		NotInBitmapFilter(BitmapBuilder bitmap) {
			this.bitmap = bitmap;
		}

		@Override
		public final boolean include(RevWalk rw, RevCommit c) {
			if (!bitmap.contains(c)) {
				return true;
			}
			for (RevCommit p : c.getParents()) {
				p.add(SEEN);
			}
			return false;
		}

		@Override
		public final NotInBitmapFilter clone() {
			throw new UnsupportedOperationException();
		}

		@Override
		public final boolean requiresCommitBody() {
			return false;
		}
	}

	/**
	 * Records which of the {@code wants} can be found in the previous GC pack's
	 * bitmap indices and which are new.
	 *
	 * @param rw
	 *            a {@link RevWalk} to find reachable objects in this repository
	 * @param expectedCommitCount
	 *            expected count of commits. The actual count may be less due to
	 *            unreachable garbage.
	 * @param excludeFromBitmapSelection
	 *            commits that should be excluded from bitmap selection
	 * @return a {@link CommitSelectionHelper} capturing which commits are
	 *         covered by a previous pack's bitmaps and which new commits need
	 *         bitmap coverage
	 * @throws IncorrectObjectTypeException
	 *             if any of the processed objects is not a commit
	 * @throws IOException
	 *             on errors reading pack or index files
	 * @throws MissingObjectException
	 *             if an expected object is missing
	 */
	private CommitSelectionHelper captureOldAndNewCommits(RevWalk rw,
			int expectedCommitCount,
			Set<? extends ObjectId> excludeFromBitmapSelection)
			throws IncorrectObjectTypeException, IOException,
			MissingObjectException {
		// Track bitmaps and commits from the previous GC pack bitmap indices.
		BitmapBuilder reuse = commitBitmapIndex.newBitmapBuilder();
		List<BitmapCommit> reuseCommits = new ArrayList<>();
		for (PackBitmapIndexRemapper.Entry entry : bitmapRemapper) {
			// More recent commits did not have the reuse flag set, so skip them
			if ((entry.getFlags() & FLAG_REUSE) != FLAG_REUSE) {
				continue;
			}
			RevObject ro = rw.peel(rw.parseAny(entry));
			if (!(ro instanceof RevCommit)) {
				continue;
			}

			RevCommit rc = (RevCommit) ro;
			reuseCommits.add(new BitmapCommit(rc, false, entry.getFlags()));
			if (!reuse.contains(rc)) {
				EWAHCompressedBitmap bitmap = bitmapRemapper.ofObjectType(
						bitmapRemapper.getBitmap(rc), Constants.OBJ_COMMIT);
				reuse.or(new CompressedBitmap(bitmap, commitBitmapIndex));
			}
		}

		// Add branch tips that are not represented in a previous pack's bitmap
		// indices. Set up a RevWalk to find new commits not in the old packs.
		List<RevCommit> newWantsByNewest = new ArrayList<>(want.size());
		Set<RevCommit> newWants = new HashSet<>(want.size());
		for (AnyObjectId objectId : want) {
			RevObject ro = rw.peel(rw.parseAny(objectId));
			if (!(ro instanceof RevCommit) || reuse.contains(ro)
					|| excludeFromBitmapSelection.contains(ro)) {
				continue;
			}

			RevCommit rc = (RevCommit) ro;
			rw.markStart(rc);
			newWants.add(rc);
			newWantsByNewest.add(rc);
		}

		// Create a list of commits in reverse order (older to newer) that are
		// not in the previous bitmap indices and are reachable.
		rw.setRevFilter(new NotInBitmapFilter(reuse));
		RevCommit[] commits = new RevCommit[expectedCommitCount];
		int pos = commits.length;
		RevCommit rc;
		while ((rc = rw.next()) != null && pos > 0) {
			commits[--pos] = rc;
			pm.update(1);
		}

		// Sort the new wants by reverse commit time.
		Collections.sort(newWantsByNewest, ORDER_BY_REVERSE_TIMESTAMP);
		return new CommitSelectionHelper(newWants, commits, pos,
				newWantsByNewest, reuse, reuseCommits);
	}

	/*-
	 * Returns the desired distance to the next bitmap based on the distance
	 * from the tip commit. Only differentiates recent from distant spans,
	 * selectCommits() handles the contiguous commits at the tip for active
	 * or inactive branches.
	 *
	 * A graph of this function looks like this, where
	 * the X axis is the distance from the tip commit and the Y axis is the
	 * bitmap selection distance.
	 *
	 * 5000                ____...
	 *                    /
	 *                  /
	 *                /
	 *              /
	 *  100  _____/
	 *       0  20100  25000
	 *
	 * Linear scaling between 20100 and 25000 prevents spans >100 for distances
	 * <20000 (otherwise, a span of 5000 would be returned for a distance of
	 * 21000, and the range 16000-20000 would have no selections).
	 */
	int nextSpan(int distanceFromTip) {
		if (distanceFromTip < 0) {
			throw new IllegalArgumentException();
		}

		// Commits more toward the start will have more bitmaps.
		if (distanceFromTip <= recentCommitCount) {
			return recentCommitSpan;
		}

		int next = Math.min(distanceFromTip - recentCommitCount,
				distantCommitSpan);
		return Math.max(next, recentCommitSpan);
	}

	BitmapWalker newBitmapWalker() {
		return new BitmapWalker(
				new ObjectWalk(reader), bitmapIndex, null);
	}

	/**
	 * Container for state used in the first phase of selecting commits, which
	 * walks all of the reachable commits via the branch tips that are not
	 * covered by a previous pack's bitmaps ({@code newWants}) and stores them
	 * in {@code newCommitsByOldest}. {@code newCommitsByOldest} is initialized
	 * with an expected size of all commits, but may be smaller if some commits
	 * are unreachable and/or some commits are covered by a previous pack's
	 * bitmaps. {@code commitStartPos} will contain a positive offset to either
	 * the root commit or the oldest commit not covered by previous bitmaps.
	 */
	private static final class CommitSelectionHelper implements Iterable<RevCommit> {
		final Set<? extends ObjectId> newWants;

		final List<RevCommit> newWantsByNewest;
		final BitmapBuilder reusedCommitsBitmap;

		final List<BitmapCommit> reusedCommits;

		final RevCommit[] newCommitsByOldest;

		final int newCommitStartPos;

		CommitSelectionHelper(Set<? extends ObjectId> newWants,
				RevCommit[] commitsByOldest, int commitStartPos,
				List<RevCommit> newWantsByNewest,
				BitmapBuilder reusedCommitsBitmap,
				List<BitmapCommit> reuse) {
			this.newWants = newWants;
			this.newCommitsByOldest = commitsByOldest;
			this.newCommitStartPos = commitStartPos;
			this.newWantsByNewest = newWantsByNewest;
			this.reusedCommitsBitmap = reusedCommitsBitmap;
			this.reusedCommits = reuse;
		}

		@Override
		public Iterator<RevCommit> iterator() {
			// Member variables referenced by this iterator will have synthetic
			// accessors generated for them if they are made private.
			return new Iterator<>() {
				int pos = newCommitStartPos;

				@Override
				public boolean hasNext() {
					return pos < newCommitsByOldest.length;
				}

				@Override
				public RevCommit next() {
					return newCommitsByOldest[pos++];
				}

				@Override
				public void remove() {
					throw new UnsupportedOperationException();
				}
			};
		}

		int getCommitCount() {
			return newCommitsByOldest.length - newCommitStartPos;
		}
	}
}