RawTextComparator.java

/*
 * Copyright (C) 2009-2010, Google Inc.
 * Copyright (C) 2008-2009, Johannes E. Schindelin <johannes.schindelin@gmx.de> and others
 *
 * This program and the accompanying materials are made available under the
 * terms of the Eclipse Distribution License v. 1.0 which is available at
 * https://www.eclipse.org/org/documents/edl-v10.php.
 *
 * SPDX-License-Identifier: BSD-3-Clause
 */

package org.eclipse.jgit.diff;

import static org.eclipse.jgit.util.RawCharUtil.isWhitespace;
import static org.eclipse.jgit.util.RawCharUtil.trimLeadingWhitespace;
import static org.eclipse.jgit.util.RawCharUtil.trimTrailingWhitespace;

import org.eclipse.jgit.util.IntList;

/**
 * Equivalence function for {@link org.eclipse.jgit.diff.RawText}.
 */
public abstract class RawTextComparator extends SequenceComparator<RawText> {
	/** No special treatment. */
	public static final RawTextComparator DEFAULT = new RawTextComparator() {
		@Override
		public boolean equals(RawText a, int ai, RawText b, int bi) {
			ai++;
			bi++;

			int as = a.lines.get(ai);
			int bs = b.lines.get(bi);
			final int ae = a.lines.get(ai + 1);
			final int be = b.lines.get(bi + 1);

			if (ae - as != be - bs)
				return false;

			while (as < ae) {
				if (a.content[as++] != b.content[bs++])
					return false;
			}
			return true;
		}

		@Override
		protected int hashRegion(byte[] raw, int ptr, int end) {
			int hash = 5381;
			for (; ptr < end; ptr++)
				hash = ((hash << 5) + hash) + (raw[ptr] & 0xff);
			return hash;
		}
	};

	/** Ignores all whitespace. */
	public static final RawTextComparator WS_IGNORE_ALL = new RawTextComparator() {
		@Override
		public boolean equals(RawText a, int ai, RawText b, int bi) {
			ai++;
			bi++;

			int as = a.lines.get(ai);
			int bs = b.lines.get(bi);
			int ae = a.lines.get(ai + 1);
			int be = b.lines.get(bi + 1);

			ae = trimTrailingWhitespace(a.content, as, ae);
			be = trimTrailingWhitespace(b.content, bs, be);

			while (as < ae && bs < be) {
				byte ac = a.content[as];
				byte bc = b.content[bs];

				while (as < ae - 1 && isWhitespace(ac)) {
					as++;
					ac = a.content[as];
				}

				while (bs < be - 1 && isWhitespace(bc)) {
					bs++;
					bc = b.content[bs];
				}

				if (ac != bc)
					return false;

				as++;
				bs++;
			}

			return as == ae && bs == be;
		}

		@Override
		protected int hashRegion(byte[] raw, int ptr, int end) {
			int hash = 5381;
			for (; ptr < end; ptr++) {
				byte c = raw[ptr];
				if (!isWhitespace(c))
					hash = ((hash << 5) + hash) + (c & 0xff);
			}
			return hash;
		}
	};

	/**
	 * Ignore leading whitespace.
	 **/
	public static final RawTextComparator WS_IGNORE_LEADING = new RawTextComparator() {
		@Override
		public boolean equals(RawText a, int ai, RawText b, int bi) {
			ai++;
			bi++;

			int as = a.lines.get(ai);
			int bs = b.lines.get(bi);
			int ae = a.lines.get(ai + 1);
			int be = b.lines.get(bi + 1);

			as = trimLeadingWhitespace(a.content, as, ae);
			bs = trimLeadingWhitespace(b.content, bs, be);

			if (ae - as != be - bs)
				return false;

			while (as < ae) {
				if (a.content[as++] != b.content[bs++])
					return false;
			}
			return true;
		}

		@Override
		protected int hashRegion(byte[] raw, int ptr, int end) {
			int hash = 5381;
			ptr = trimLeadingWhitespace(raw, ptr, end);
			for (; ptr < end; ptr++)
				hash = ((hash << 5) + hash) + (raw[ptr] & 0xff);
			return hash;
		}
	};

	/** Ignores trailing whitespace. */
	public static final RawTextComparator WS_IGNORE_TRAILING = new RawTextComparator() {
		@Override
		public boolean equals(RawText a, int ai, RawText b, int bi) {
			ai++;
			bi++;

			int as = a.lines.get(ai);
			int bs = b.lines.get(bi);
			int ae = a.lines.get(ai + 1);
			int be = b.lines.get(bi + 1);

			ae = trimTrailingWhitespace(a.content, as, ae);
			be = trimTrailingWhitespace(b.content, bs, be);

			if (ae - as != be - bs)
				return false;

			while (as < ae) {
				if (a.content[as++] != b.content[bs++])
					return false;
			}
			return true;
		}

		@Override
		protected int hashRegion(byte[] raw, int ptr, int end) {
			int hash = 5381;
			end = trimTrailingWhitespace(raw, ptr, end);
			for (; ptr < end; ptr++)
				hash = ((hash << 5) + hash) + (raw[ptr] & 0xff);
			return hash;
		}
	};

	/** Ignores whitespace occurring between non-whitespace characters. */
	public static final RawTextComparator WS_IGNORE_CHANGE = new RawTextComparator() {
		@Override
		public boolean equals(RawText a, int ai, RawText b, int bi) {
			ai++;
			bi++;

			int as = a.lines.get(ai);
			int bs = b.lines.get(bi);
			int ae = a.lines.get(ai + 1);
			int be = b.lines.get(bi + 1);

			ae = trimTrailingWhitespace(a.content, as, ae);
			be = trimTrailingWhitespace(b.content, bs, be);

			while (as < ae && bs < be) {
				byte ac = a.content[as++];
				byte bc = b.content[bs++];

				if (isWhitespace(ac) && isWhitespace(bc)) {
					as = trimLeadingWhitespace(a.content, as, ae);
					bs = trimLeadingWhitespace(b.content, bs, be);
				} else if (ac != bc) {
					return false;
				}
			}
			return as == ae && bs == be;
		}

		@Override
		protected int hashRegion(byte[] raw, int ptr, int end) {
			int hash = 5381;
			end = trimTrailingWhitespace(raw, ptr, end);
			while (ptr < end) {
				byte c = raw[ptr++];
				if (isWhitespace(c)) {
					ptr = trimLeadingWhitespace(raw, ptr, end);
					c = ' ';
				}
				hash = ((hash << 5) + hash) + (c & 0xff);
			}
			return hash;
		}
	};

	@Override
	public int hash(RawText seq, int lno) {
		final int begin = seq.lines.get(lno + 1);
		final int end = seq.lines.get(lno + 2);
		return hashRegion(seq.content, begin, end);
	}

	/** {@inheritDoc} */
	@Override
	public Edit reduceCommonStartEnd(RawText a, RawText b, Edit e) {
		// This is a faster exact match based form that tries to improve
		// performance for the common case of the header and trailer of
		// a text file not changing at all. After this fast path we use
		// the slower path based on the super class' using equals() to
		// allow for whitespace ignore modes to still work.

		if (e.beginA == e.endA || e.beginB == e.endB)
			return e;

		byte[] aRaw = a.content;
		byte[] bRaw = b.content;

		int aPtr = a.lines.get(e.beginA + 1);
		int bPtr = a.lines.get(e.beginB + 1);

		int aEnd = a.lines.get(e.endA + 1);
		int bEnd = b.lines.get(e.endB + 1);

		// This can never happen, but the JIT doesn't know that. If we
		// define this assertion before the tight while loops below it
		// should be able to skip the array bound checks on access.
		//
		if (aPtr < 0 || bPtr < 0 || aEnd > aRaw.length || bEnd > bRaw.length)
			throw new ArrayIndexOutOfBoundsException();

		while (aPtr < aEnd && bPtr < bEnd && aRaw[aPtr] == bRaw[bPtr]) {
			aPtr++;
			bPtr++;
		}

		while (aPtr < aEnd && bPtr < bEnd && aRaw[aEnd - 1] == bRaw[bEnd - 1]) {
			aEnd--;
			bEnd--;
		}

		e.beginA = findForwardLine(a.lines, e.beginA, aPtr);
		e.beginB = findForwardLine(b.lines, e.beginB, bPtr);

		e.endA = findReverseLine(a.lines, e.endA, aEnd);

		final boolean partialA = aEnd < a.lines.get(e.endA + 1);
		if (partialA)
			bEnd += a.lines.get(e.endA + 1) - aEnd;

		e.endB = findReverseLine(b.lines, e.endB, bEnd);

		if (!partialA && bEnd < b.lines.get(e.endB + 1))
			e.endA++;

		return super.reduceCommonStartEnd(a, b, e);
	}

	private static int findForwardLine(IntList lines, int idx, int ptr) {
		final int end = lines.size() - 2;
		while (idx < end && lines.get(idx + 2) < ptr)
			idx++;
		return idx;
	}

	private static int findReverseLine(IntList lines, int idx, int ptr) {
		while (0 < idx && ptr <= lines.get(idx))
			idx--;
		return idx;
	}

	/**
	 * Compute a hash code for a region.
	 *
	 * @param raw
	 *            the raw file content.
	 * @param ptr
	 *            first byte of the region to hash.
	 * @param end
	 *            1 past the last byte of the region.
	 * @return hash code for the region <code>[ptr, end)</code> of raw.
	 */
	protected abstract int hashRegion(byte[] raw, int ptr, int end);
}