BinaryDeltaInputStream.java

/*
 * Copyright (C) 2021 Thomas Wolf <thomas.wolf@paranor.ch> and others
 *
 * This program and the accompanying materials are made available under the
 * terms of the Eclipse Distribution License v. 1.0 which is available at
 * https://www.eclipse.org/org/documents/edl-v10.php.
 *
 * SPDX-License-Identifier: BSD-3-Clause
 */
package org.eclipse.jgit.util.io;

import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import java.io.StreamCorruptedException;
import java.text.MessageFormat;

import org.eclipse.jgit.internal.JGitText;

/**
 * An {@link InputStream} that applies a binary delta to a base on the fly.
 * <p>
 * Delta application to a base needs random access to the base data. The delta
 * is expressed as a sequence of copy and insert instructions. A copy
 * instruction has the form "COPY fromOffset length" and says "copy length bytes
 * from the base, starting at offset fromOffset, to the result". An insert
 * instruction has the form "INSERT length" followed by length bytes and says
 * "copy the next length bytes from the delta to the result".
 * </p>
 * <p>
 * These instructions are generated using a content-defined chunking algorithm
 * (currently C git uses the standard Rabin variant; but there are others that
 * could be used) that identifies equal chunks. It is entirely possible that a
 * later copy instruction has a fromOffset that is before the fromOffset of an
 * earlier copy instruction.
 * </p>
 * <p>
 * This makes it impossible to stream the base.
 * </p>
 * <p>
 * JGit is limited to 2GB maximum size for the base since array indices are
 * signed 32bit values.
 *
 * @since 5.12
 */
public class BinaryDeltaInputStream extends InputStream {

	private final byte[] base;

	private final InputStream delta;

	private long resultLength;

	private long toDeliver = -1;

	private int fromBase;

	private int fromDelta;

	private int baseOffset = -1;

	/**
	 * Creates a new {@link BinaryDeltaInputStream} that applies {@code delta}
	 * to {@code base}.
	 *
	 * @param base
	 *            data to apply the delta to
	 * @param delta
	 *            {@link InputStream} delivering the delta to apply
	 */
	public BinaryDeltaInputStream(byte[] base, InputStream delta) {
		this.base = base;
		this.delta = delta;
	}

	@Override
	public int read() throws IOException {
		int b = readNext();
		if (b >= 0) {
			toDeliver--;
		}
		return b;
	}

	@Override
	public int read(byte[] b, int off, int len) throws IOException {
		return super.read(b, off, len);
	}

	private void initialize() throws IOException {
		long baseSize = readVarInt(delta);
		if (baseSize > Integer.MAX_VALUE || baseSize < 0
				|| (int) baseSize != base.length) {
			throw new IOException(MessageFormat.format(
					JGitText.get().binaryDeltaBaseLengthMismatch,
					Integer.valueOf(base.length), Long.valueOf(baseSize)));
		}
		resultLength = readVarInt(delta);
		if (resultLength < 0) {
			throw new StreamCorruptedException(
					JGitText.get().binaryDeltaInvalidResultLength);
		}
		toDeliver = resultLength;
		baseOffset = 0;
	}

	private int readNext() throws IOException {
		if (baseOffset < 0) {
			initialize();
		}
		if (fromBase > 0) {
			fromBase--;
			return base[baseOffset++] & 0xFF;
		} else if (fromDelta > 0) {
			fromDelta--;
			return delta.read();
		}
		int command = delta.read();
		if (command < 0) {
			return -1;
		}
		if ((command & 0x80) != 0) {
			// Decode offset and length to read from base
			long copyOffset = 0;
			for (int i = 1, shift = 0; i < 0x10; i *= 2, shift += 8) {
				if ((command & i) != 0) {
					copyOffset |= ((long) next(delta)) << shift;
				}
			}
			int copySize = 0;
			for (int i = 0x10, shift = 0; i < 0x80; i *= 2, shift += 8) {
				if ((command & i) != 0) {
					copySize |= next(delta) << shift;
				}
			}
			if (copySize == 0) {
				copySize = 0x10000;
			}
			if (copyOffset > base.length - copySize) {
				throw new StreamCorruptedException(MessageFormat.format(
						JGitText.get().binaryDeltaInvalidOffset,
						Long.valueOf(copyOffset), Integer.valueOf(copySize)));
			}
			baseOffset = (int) copyOffset;
			fromBase = copySize;
			return readNext();
		} else if (command != 0) {
			// The next 'command' bytes come from the delta
			fromDelta = command - 1;
			return delta.read();
		} else {
			// Zero is reserved
			throw new StreamCorruptedException(
					JGitText.get().unsupportedCommand0);
		}
	}

	private int next(InputStream in) throws IOException {
		int b = in.read();
		if (b < 0) {
			throw new EOFException();
		}
		return b;
	}

	private long readVarInt(InputStream in) throws IOException {
		long val = 0;
		int shift = 0;
		int b;
		do {
			b = next(in);
			val |= ((long) (b & 0x7f)) << shift;
			shift += 7;
		} while ((b & 0x80) != 0);
		return val;
	}

	/**
	 * Tells the expected size of the final result.
	 *
	 * @return the size
	 * @throws IOException
	 *             if the size cannot be determined from {@code delta}
	 */
	public long getExpectedResultSize() throws IOException {
		if (baseOffset < 0) {
			initialize();
		}
		return resultLength;
	}

	/**
	 * Tells whether the delta has been fully consumed, and the expected number
	 * of bytes for the combined result have been read from this
	 * {@link BinaryDeltaInputStream}.
	 *
	 * @return whether delta application was successful
	 */
	public boolean isFullyConsumed() {
		try {
			return toDeliver == 0 && delta.read() < 0;
		} catch (IOException e) {
			return toDeliver == 0;
		}
	}

	@Override
	public void close() throws IOException {
		delta.close();
	}
}