/*
 * Decompiled with CFR 0.152.
 */
package org.apache.iceberg.mr.hive;

import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
import org.apache.hadoop.hive.ql.metadata.HiveUtils;
import org.apache.hadoop.hive.ql.plan.MapWork;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.ql.session.SessionStateUtil;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.JobContext;
import org.apache.hadoop.mapred.JobContextImpl;
import org.apache.hadoop.mapred.JobID;
import org.apache.hadoop.mapred.OutputCommitter;
import org.apache.hadoop.mapred.TaskAttemptContext;
import org.apache.hadoop.mapred.TaskAttemptID;
import org.apache.hadoop.mapreduce.TaskType;
import org.apache.iceberg.AppendFiles;
import org.apache.iceberg.ContentFile;
import org.apache.iceberg.DataFile;
import org.apache.iceberg.DeleteFile;
import org.apache.iceberg.DeleteFiles;
import org.apache.iceberg.OverwriteFiles;
import org.apache.iceberg.ReplacePartitions;
import org.apache.iceberg.RewriteFiles;
import org.apache.iceberg.RowDelta;
import org.apache.iceberg.Snapshot;
import org.apache.iceberg.SnapshotUpdate;
import org.apache.iceberg.Table;
import org.apache.iceberg.exceptions.NotFoundException;
import org.apache.iceberg.expressions.Expression;
import org.apache.iceberg.expressions.Expressions;
import org.apache.iceberg.hadoop.Util;
import org.apache.iceberg.io.FileIO;
import org.apache.iceberg.io.OutputFile;
import org.apache.iceberg.mr.Catalogs;
import org.apache.iceberg.mr.hive.FilesForCommit;
import org.apache.iceberg.mr.hive.HiveIcebergInputFormat;
import org.apache.iceberg.mr.hive.HiveTableUtil;
import org.apache.iceberg.mr.hive.IcebergTableUtil;
import org.apache.iceberg.mr.hive.TezUtil;
import org.apache.iceberg.mr.hive.compaction.IcebergCompactionUtil;
import org.apache.iceberg.mr.hive.writer.HiveIcebergWriter;
import org.apache.iceberg.mr.hive.writer.WriterRegistry;
import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.base.Splitter;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
import org.apache.iceberg.relocated.com.google.common.collect.ListMultimap;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.relocated.com.google.common.collect.Maps;
import org.apache.iceberg.relocated.com.google.common.collect.Multimap;
import org.apache.iceberg.relocated.com.google.common.collect.Multimaps;
import org.apache.iceberg.relocated.com.google.common.collect.Sets;
import org.apache.iceberg.relocated.com.google.common.util.concurrent.ThreadFactoryBuilder;
import org.apache.iceberg.util.Tasks;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class HiveIcebergOutputCommitter
extends OutputCommitter {
    private static final Logger LOG = LoggerFactory.getLogger(HiveIcebergOutputCommitter.class);
    private static final Splitter TABLE_NAME_SPLITTER = Splitter.on("..");
    private static final String FOR_COMMIT_EXTENSION = ".forCommit";
    private static final String CONFLICT_DETECTION_FILTER = "Conflict detection Filter Expression: {}";
    private ExecutorService workerPool;

    public void setupJob(JobContext jobContext) {
    }

    public void setupTask(TaskAttemptContext taskAttemptContext) {
    }

    public boolean needsTaskCommit(TaskAttemptContext context) {
        return TaskType.REDUCE.equals((Object)context.getTaskAttemptID().getTaskID().getTaskType()) || context.getJobConf().getNumReduceTasks() == 0;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void commitTask(TaskAttemptContext originalContext) throws IOException {
        TaskAttemptContext context = TezUtil.enrichContextWithAttemptWrapper(originalContext);
        TaskAttemptID attemptID = context.getTaskAttemptID();
        JobConf jobConf = context.getJobConf();
        Set<Path> mergedPaths = HiveIcebergOutputCommitter.getCombinedLocations(jobConf);
        Set<String> outputs = HiveIcebergOutputCommitter.outputTables((Configuration)context.getJobConf());
        Map writers = Optional.ofNullable(WriterRegistry.writers(attemptID)).orElseGet(() -> {
            LOG.info("CommitTask found no writers for output tables: {}, attemptID: {}", (Object)outputs, (Object)attemptID);
            return ImmutableMap.of();
        });
        ExecutorService tableExecutor = HiveIcebergOutputCommitter.tableExecutor((Configuration)jobConf, outputs.size());
        try {
            Tasks.foreach(outputs).retry(3).stopOnFailure().throwFailureWhenFinished().executeWith(tableExecutor).run(output -> {
                Table table = HiveTableUtil.deserializeTable((Configuration)context.getJobConf(), output);
                if (table != null) {
                    String fileForCommitLocation = HiveIcebergOutputCommitter.generateFileForCommitLocation(table.location(), (Configuration)jobConf, (org.apache.hadoop.mapreduce.JobID)attemptID.getJobID(), attemptID.getTaskID().getId());
                    if (writers.get(output) != null) {
                        ArrayList<DataFile> dataFiles = Lists.newArrayList();
                        ArrayList<DeleteFile> deleteFiles = Lists.newArrayList();
                        ArrayList<DataFile> replacedDataFiles = Lists.newArrayList();
                        ArrayList<DeleteFile> rewrittenDeleteFiles = Lists.newArrayList();
                        HashSet<CharSequence> referencedDataFiles = Sets.newHashSet();
                        for (HiveIcebergWriter writer : (List)writers.get(output)) {
                            FilesForCommit files = writer.files();
                            dataFiles.addAll(files.dataFiles());
                            deleteFiles.addAll(files.deleteFiles());
                            replacedDataFiles.addAll(files.replacedDataFiles());
                            referencedDataFiles.addAll(files.referencedDataFiles());
                            rewrittenDeleteFiles.addAll(files.rewrittenDeleteFiles());
                        }
                        HiveIcebergOutputCommitter.createFileForCommit(new FilesForCommit(dataFiles, deleteFiles, replacedDataFiles, referencedDataFiles, rewrittenDeleteFiles, mergedPaths), fileForCommitLocation, table.io());
                    } else {
                        LOG.info("CommitTask found no writer for specific table: {}, attemptID: {}", output, (Object)attemptID);
                        HiveIcebergOutputCommitter.createFileForCommit(FilesForCommit.empty(), fileForCommitLocation, table.io());
                    }
                } else {
                    LOG.info("CommitTask found no serialized table in config for table: {}.", output);
                }
            }, IOException.class);
        }
        finally {
            if (tableExecutor != null) {
                tableExecutor.shutdown();
            }
        }
        WriterRegistry.removeWriters(attemptID);
    }

    public void abortTask(TaskAttemptContext originalContext) throws IOException {
        TaskAttemptContext context = TezUtil.enrichContextWithAttemptWrapper(originalContext);
        Map<String, List<HiveIcebergWriter>> writerMap = WriterRegistry.removeWriters(context.getTaskAttemptID());
        if (writerMap != null) {
            for (List<HiveIcebergWriter> writerList : writerMap.values()) {
                for (HiveIcebergWriter writer : writerList) {
                    writer.close(true);
                }
            }
        }
    }

    public void commitJob(JobContext originalContext) throws IOException {
        this.commitJobs(Collections.singletonList(originalContext), Context.Operation.OTHER);
    }

    public void setWorkerPool(ExecutorService workerPool) {
        this.workerPool = workerPool;
    }

    public void commitJobs(List<JobContext> originalContextList, Context.Operation operation) throws IOException {
        List<JobContext> jobContextList = originalContextList.stream().map(TezUtil::enrichContextWithVertexId).collect(Collectors.toList());
        Multimap<OutputTable, JobContext> outputs = HiveIcebergOutputCommitter.collectOutputs(jobContextList);
        JobConf jobConf = jobContextList.getFirst().getJobConf();
        long startTime = System.currentTimeMillis();
        String ids = jobContextList.stream().map(jobContext -> jobContext.getJobID().toString()).collect(Collectors.joining(","));
        LOG.info("Committing job(s) {} has started", (Object)ids);
        ConcurrentLinkedQueue<String> jobLocations = new ConcurrentLinkedQueue<String>();
        try (ExecutorService fileExecutor = HiveIcebergOutputCommitter.fileExecutor((Configuration)jobConf);
             ExecutorService tableExecutor = HiveIcebergOutputCommitter.tableExecutor((Configuration)jobConf, outputs.keySet().size());){
            Tasks.foreach(outputs.keySet()).throwFailureWhenFinished().stopOnFailure().executeWith(tableExecutor).run(output -> {
                Collection<JobContext> jobContexts = outputs.get((OutputTable)output);
                Table table = output.table;
                jobContexts.forEach(jobContext -> jobLocations.add(HiveIcebergOutputCommitter.generateJobLocation(table.location(), (Configuration)jobConf, jobContext.getJobID())));
                this.commitTable(table.io(), fileExecutor, (OutputTable)output, jobContexts, operation);
            });
            this.cleanMergeTaskInputFiles(jobContextList, fileExecutor);
        }
        LOG.info("Commit took {} ms for job(s) {}", (Object)(System.currentTimeMillis() - startTime), (Object)ids);
        for (JobContext jobContext2 : jobContextList) {
            this.cleanup(jobContext2, jobLocations);
        }
    }

    private static Multimap<OutputTable, JobContext> collectOutputs(List<JobContext> jobContextList) {
        ListMultimap<OutputTable, JobContext> outputs = Multimaps.newListMultimap(Maps.newHashMap(), Lists::newArrayList);
        for (JobContext jobContext : jobContextList) {
            for (String output : HiveIcebergOutputCommitter.outputTables((Configuration)jobContext.getJobConf())) {
                Table table = SessionStateUtil.getResource((Configuration)jobContext.getJobConf(), (String)output).filter(o -> o instanceof Table).map(o -> (Table)o).orElseGet(() -> HiveTableUtil.deserializeTable((Configuration)jobContext.getJobConf(), output));
                if (table != null) {
                    String catalogName = HiveIcebergOutputCommitter.catalogName((Configuration)jobContext.getJobConf(), output);
                    outputs.put(new OutputTable(catalogName, output, table), jobContext);
                    continue;
                }
                LOG.info("Found no table object in QueryState or conf for: {}. Skipping job commit.", (Object)output);
            }
        }
        return outputs;
    }

    public void abortJob(JobContext originalContext, int status) throws IOException {
        this.abortJobs(Collections.singletonList(originalContext));
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void abortJobs(List<JobContext> originalContextList) throws IOException {
        List<JobContext> jobContextList = originalContextList.stream().map(TezUtil::enrichContextWithVertexId).collect(Collectors.toList());
        Multimap<OutputTable, JobContext> outputs = HiveIcebergOutputCommitter.collectOutputs(jobContextList);
        JobConf jobConf = jobContextList.getFirst().getJobConf();
        String ids = jobContextList.stream().map(jobContext -> jobContext.getJobID().toString()).collect(Collectors.joining(","));
        LOG.info("Job(s) {} are aborted. Data file cleaning started", (Object)ids);
        ExecutorService fileExecutor = HiveIcebergOutputCommitter.fileExecutor((Configuration)jobConf);
        ExecutorService tableExecutor = HiveIcebergOutputCommitter.tableExecutor((Configuration)jobConf, outputs.keySet().size());
        ConcurrentLinkedQueue<String> jobLocations = new ConcurrentLinkedQueue<String>();
        try {
            Tasks.foreach(outputs.keySet()).suppressFailureWhenFinished().executeWith(tableExecutor).onFailure((output, ex) -> LOG.warn("Failed cleanup table {} on abort job", output, (Object)ex)).run(output -> {
                for (JobContext jobContext : outputs.get((OutputTable)output)) {
                    LOG.info("Cleaning job for jobID: {}, table: {}", (Object)jobContext.getJobID(), output);
                    Table table = output.table;
                    String jobLocation = HiveIcebergOutputCommitter.generateJobLocation(table.location(), (Configuration)jobConf, jobContext.getJobID());
                    jobLocations.add(jobLocation);
                    int numTasks = HiveIcebergOutputCommitter.listForCommits(jobConf, jobLocation).size();
                    FilesForCommit results = HiveIcebergOutputCommitter.collectResults(numTasks, fileExecutor, table.location(), jobContext, table.io(), false);
                    Tasks.foreach(results.allFiles()).retry(3).suppressFailureWhenFinished().executeWith(fileExecutor).onFailure((file, ex) -> LOG.warn("Failed to remove data file {} on abort job", (Object)file.location(), (Object)ex)).run(file -> table.io().deleteFile(file.location()));
                }
            }, IOException.class);
        }
        finally {
            fileExecutor.shutdown();
            if (tableExecutor != null) {
                tableExecutor.shutdown();
            }
        }
        LOG.info("Job(s) {} are aborted. Data file cleaning finished", (Object)ids);
        for (JobContext jobContext2 : jobContextList) {
            this.cleanup(jobContext2, jobLocations);
        }
    }

    private static Set<FileStatus> listForCommits(JobConf jobConf, String jobLocation) throws IOException {
        Path path = new Path(jobLocation);
        LOG.debug("Listing job location to get commitTask manifest files for abort: {}", (Object)jobLocation);
        Object[] children = path.getFileSystem((Configuration)jobConf).listStatus(path);
        LOG.debug("Listing the job location: {} yielded these files: {}", (Object)jobLocation, (Object)Arrays.toString(children));
        return Arrays.stream(children).filter(child -> !child.isDirectory() && child.getPath().getName().endsWith(FOR_COMMIT_EXTENSION)).collect(Collectors.toSet());
    }

    private void commitTable(FileIO io, ExecutorService executor, OutputTable outputTable, Collection<JobContext> jobContexts, Context.Operation operation) {
        String name = outputTable.tableName;
        Properties catalogProperties = new Properties();
        catalogProperties.put("name", name);
        catalogProperties.put("location", outputTable.table.location());
        if (outputTable.catalogName != null) {
            catalogProperties.put("iceberg.catalog", outputTable.catalogName);
        }
        ArrayList<DataFile> dataFiles = Lists.newArrayList();
        ArrayList<DeleteFile> deleteFiles = Lists.newArrayList();
        ArrayList<DataFile> replacedDataFiles = Lists.newArrayList();
        ArrayList<DeleteFile> rewrittenDeleteFiles = Lists.newArrayList();
        HashSet<CharSequence> referencedDataFiles = Sets.newHashSet();
        HashSet<Path> mergedAndDeletedFiles = Sets.newHashSet();
        Table table = null;
        String branchName = null;
        Long snapshotId = null;
        Expression filterExpr = null;
        for (JobContext jobContext : jobContexts) {
            JobConf conf = jobContext.getJobConf();
            table = Optional.ofNullable(table).orElseGet(() -> Catalogs.loadTable((Configuration)conf, catalogProperties));
            branchName = conf.get("iceberg.mr.output.table.snapshot.ref");
            snapshotId = this.getSnapshotId(outputTable.table, branchName);
            if (filterExpr == null) {
                filterExpr = SessionStateUtil.getConflictDetectionFilter((Configuration)conf, (Object)catalogProperties.get("name")).map(expr -> HiveIcebergInputFormat.getFilterExpr((Configuration)conf, expr)).orElse(null);
            }
            LOG.info("Committing job has started for table: {}, using location: {}", (Object)table, (Object)HiveIcebergOutputCommitter.generateJobLocation(outputTable.table.location(), (Configuration)conf, jobContext.getJobID()));
            int numTasks = SessionStateUtil.getCommitInfo((Configuration)conf, (String)name).map(info -> (SessionStateUtil.CommitInfo)info.get(jobContext.getJobID().toString())).map(SessionStateUtil.CommitInfo::getTaskNum).orElseGet(() -> {
                LOG.info("Number of tasks not available in session state for jobID: {}, table: {}. Falling back to jobConf numReduceTasks/numMapTasks", (Object)jobContext.getJobID(), (Object)name);
                return conf.getNumReduceTasks() > 0 ? conf.getNumReduceTasks() : conf.getNumMapTasks();
            });
            FilesForCommit writeResults = HiveIcebergOutputCommitter.collectResults(numTasks, executor, outputTable.table.location(), jobContext, io, true);
            dataFiles.addAll(writeResults.dataFiles());
            deleteFiles.addAll(writeResults.deleteFiles());
            replacedDataFiles.addAll(writeResults.replacedDataFiles());
            referencedDataFiles.addAll(writeResults.referencedDataFiles());
            rewrittenDeleteFiles.addAll(writeResults.rewrittenDeleteFiles());
            mergedAndDeletedFiles.addAll(writeResults.mergedAndDeletedFiles());
        }
        dataFiles.removeIf(dataFile -> mergedAndDeletedFiles.contains(new Path(dataFile.location())));
        deleteFiles.removeIf(deleteFile -> mergedAndDeletedFiles.contains(new Path(deleteFile.location())));
        FilesForCommit filesForCommit = new FilesForCommit(dataFiles, deleteFiles, replacedDataFiles, referencedDataFiles, rewrittenDeleteFiles, Collections.emptySet());
        long startTime = System.currentTimeMillis();
        if (Context.Operation.IOW != operation) {
            if (filesForCommit.isEmpty()) {
                LOG.info("Not creating a new commit for table: {}, jobIDs: {}, since there were no new files to add", (Object)table, (Object)jobContexts.stream().map(org.apache.hadoop.mapreduce.JobContext::getJobID).map(String::valueOf).collect(Collectors.joining(",")));
            } else {
                this.commitWrite(table, branchName, snapshotId, startTime, filesForCommit, operation, filterExpr);
            }
        } else {
            Context.RewritePolicy rewritePolicy = Context.RewritePolicy.fromString((String)jobContexts.stream().findAny().map(x -> x.getJobConf().get(HiveConf.ConfVars.REWRITE_POLICY.varname)).orElse(Context.RewritePolicy.DEFAULT.name()));
            if (rewritePolicy != Context.RewritePolicy.DEFAULT) {
                String partitionPath = jobContexts.stream().findAny().map(x -> x.getJobConf().get("compaction_partition_path")).orElse(null);
                long fileSizeThreshold = jobContexts.stream().findAny().map(x -> x.getJobConf().get("compactor.threshold.file.size.threshold")).map(Long::parseLong).orElse(-1L);
                this.commitCompaction(table, snapshotId, startTime, filesForCommit, partitionPath, fileSizeThreshold);
            } else {
                this.commitOverwrite(table, branchName, snapshotId, startTime, filesForCommit);
            }
        }
    }

    private Long getSnapshotId(Table table, String branchName) {
        Snapshot snapshot = IcebergTableUtil.getTableSnapshot(table, branchName);
        return snapshot != null ? Long.valueOf(snapshot.snapshotId()) : null;
    }

    private void commitWrite(Table table, String branchName, Long snapshotId, long startTime, FilesForCommit results, Context.Operation operation, Expression filterExpr) {
        if (!results.replacedDataFiles().isEmpty()) {
            OverwriteFiles write = table.newOverwrite();
            results.replacedDataFiles().forEach(write::deleteFile);
            results.dataFiles().forEach(write::addFile);
            if (StringUtils.isNotEmpty((CharSequence)branchName)) {
                write.toBranch(HiveUtils.getTableSnapshotRef((String)branchName));
            }
            if (snapshotId != null) {
                write.validateFromSnapshot(snapshotId);
            }
            if (filterExpr != null) {
                LOG.debug(CONFLICT_DETECTION_FILTER, (Object)filterExpr);
                write.conflictDetectionFilter(filterExpr);
            }
            write.validateNoConflictingData();
            write.validateNoConflictingDeletes();
            this.commit(write);
            return;
        }
        if (results.deleteFiles().isEmpty() && Context.Operation.MERGE != operation) {
            AppendFiles write = table.newAppend();
            results.dataFiles().forEach(write::appendFile);
            if (StringUtils.isNotEmpty((CharSequence)branchName)) {
                write.toBranch(HiveUtils.getTableSnapshotRef((String)branchName));
            }
            this.commit(write);
        } else {
            RowDelta write = table.newRowDelta();
            results.dataFiles().forEach(write::addRows);
            results.deleteFiles().forEach(write::addDeletes);
            results.rewrittenDeleteFiles().forEach(write::removeDeletes);
            if (StringUtils.isNotEmpty((CharSequence)branchName)) {
                write.toBranch(HiveUtils.getTableSnapshotRef((String)branchName));
            }
            if (snapshotId != null) {
                write.validateFromSnapshot(snapshotId);
            }
            if (filterExpr != null) {
                LOG.debug(CONFLICT_DETECTION_FILTER, (Object)filterExpr);
                write.conflictDetectionFilter(filterExpr);
            }
            if (!results.dataFiles().isEmpty()) {
                write.validateDeletedFiles();
                write.validateNoConflictingDeleteFiles();
            }
            write.validateDataFilesExist(results.referencedDataFiles());
            write.validateNoConflictingDataFiles();
            this.commit(write);
        }
        LOG.info("Write commit took {} ms for table: {} with {} data and {} delete file(s)", new Object[]{System.currentTimeMillis() - startTime, table, results.dataFiles().size(), results.deleteFiles().size()});
        LOG.debug("Added files {}", (Object)results);
    }

    private void commit(SnapshotUpdate<?> update) {
        if (this.workerPool != null) {
            update.scanManifestsWith(this.workerPool);
        }
        update.commit();
    }

    private void commitCompaction(Table table, Long snapshotId, long startTime, FilesForCommit results, String partitionPath, long fileSizeThreshold) {
        List<DataFile> existingDataFiles = IcebergCompactionUtil.getDataFiles(table, partitionPath, fileSizeThreshold);
        List<DeleteFile> existingDeleteFiles = fileSizeThreshold == -1L ? IcebergCompactionUtil.getDeleteFiles(table, partitionPath) : Collections.emptyList();
        RewriteFiles rewriteFiles = table.newRewrite();
        existingDataFiles.forEach(rewriteFiles::deleteFile);
        existingDeleteFiles.forEach(rewriteFiles::deleteFile);
        results.dataFiles().forEach(rewriteFiles::addFile);
        if (snapshotId != null) {
            rewriteFiles.validateFromSnapshot(snapshotId);
        }
        rewriteFiles.commit();
        LOG.info("Compaction commit took {} ms for table: {} partition: {} with {} file(s)", new Object[]{System.currentTimeMillis() - startTime, table, StringUtils.defaultString((String)partitionPath, (String)"N/A"), results.dataFiles().size()});
    }

    private void commitOverwrite(Table table, String branchName, Long snapshotId, long startTime, FilesForCommit results) {
        Preconditions.checkArgument(results.deleteFiles().isEmpty(), "Can not handle deletes with overwrite");
        if (!results.dataFiles().isEmpty()) {
            ReplacePartitions overwrite = table.newReplacePartitions();
            results.dataFiles().forEach(overwrite::addFile);
            if (StringUtils.isNotEmpty((CharSequence)branchName)) {
                overwrite.toBranch(HiveUtils.getTableSnapshotRef((String)branchName));
            }
            if (snapshotId != null) {
                overwrite.validateFromSnapshot(snapshotId);
            }
            overwrite.validateNoConflictingDeletes();
            overwrite.validateNoConflictingData();
            this.commit(overwrite);
            LOG.info("Overwrite commit took {} ms for table: {} with {} file(s)", new Object[]{System.currentTimeMillis() - startTime, table, results.dataFiles().size()});
        } else if (table.spec().isUnpartitioned()) {
            DeleteFiles deleteFiles = table.newDelete();
            deleteFiles.deleteFromRowFilter(Expressions.alwaysTrue());
            if (StringUtils.isNotEmpty((CharSequence)branchName)) {
                deleteFiles.toBranch(HiveUtils.getTableSnapshotRef((String)branchName));
            }
            this.commit(deleteFiles);
            LOG.info("Cleared table contents as part of empty overwrite for unpartitioned table. Commit took {} ms for table: {}", (Object)(System.currentTimeMillis() - startTime), (Object)table);
        }
        LOG.debug("Overwrote partitions with files {}", (Object)results);
    }

    private void cleanup(JobContext jobContext, Collection<String> jobLocations) throws IOException {
        JobConf jobConf = jobContext.getJobConf();
        LOG.info("Cleaning for job {} started", (Object)jobContext.getJobID());
        Tasks.foreach(jobLocations).retry(3).suppressFailureWhenFinished().onFailure((jobLocation, ex) -> LOG.debug("Failed to remove directory {} on job cleanup", jobLocation, (Object)ex)).run(jobLocation -> {
            LOG.info("Cleaning location: {}", jobLocation);
            Path toDelete = new Path(jobLocation);
            FileSystem fs = Util.getFs(toDelete, (Configuration)jobConf);
            fs.delete(toDelete, true);
        }, IOException.class);
        LOG.info("Cleaning for job {} finished", (Object)jobContext.getJobID());
    }

    private static ExecutorService fileExecutor(Configuration conf) {
        int size = conf.getInt("iceberg.mr.commit.file.thread.pool.size", 10);
        return Executors.newFixedThreadPool(size, new ThreadFactoryBuilder().setDaemon(true).setPriority(5).setNameFormat("iceberg-commit-file-pool-%d").build());
    }

    private static ExecutorService tableExecutor(Configuration conf, int maxThreadNum) {
        int size = conf.getInt("iceberg.mr.commit.table.thread.pool.size", 10);
        if ((size = Math.min(maxThreadNum, size)) > 1) {
            return Executors.newFixedThreadPool(size, new ThreadFactoryBuilder().setDaemon(true).setPriority(5).setNameFormat("iceberg-commit-table-pool-%d").build());
        }
        return null;
    }

    private static FilesForCommit collectResults(int numTasks, ExecutorService executor, String location, JobContext jobContext, FileIO io, boolean throwOnFailure) {
        JobConf conf = jobContext.getJobConf();
        ConcurrentLinkedQueue<DataFile> dataFiles = new ConcurrentLinkedQueue<DataFile>();
        ConcurrentLinkedQueue<DeleteFile> deleteFiles = new ConcurrentLinkedQueue<DeleteFile>();
        ConcurrentLinkedQueue<DataFile> replacedDataFiles = new ConcurrentLinkedQueue<DataFile>();
        ConcurrentLinkedQueue<DeleteFile> rewrittenDeleteFiles = new ConcurrentLinkedQueue<DeleteFile>();
        ConcurrentLinkedQueue<CharSequence> referencedDataFiles = new ConcurrentLinkedQueue<CharSequence>();
        ConcurrentLinkedQueue<Path> mergedAndDeletedFiles = new ConcurrentLinkedQueue<Path>();
        Tasks.range(numTasks).throwFailureWhenFinished(throwOnFailure).executeWith(executor).retry(3).run(taskId -> {
            String taskFileName = HiveIcebergOutputCommitter.generateFileForCommitLocation(location, (Configuration)conf, jobContext.getJobID(), taskId);
            FilesForCommit files = HiveIcebergOutputCommitter.readFileForCommit(taskFileName, io);
            LOG.debug("Found Iceberg commitTask manifest file: {}\n{}", (Object)taskFileName, (Object)files);
            dataFiles.addAll(files.dataFiles());
            deleteFiles.addAll(files.deleteFiles());
            replacedDataFiles.addAll(files.replacedDataFiles());
            rewrittenDeleteFiles.addAll(files.rewrittenDeleteFiles());
            referencedDataFiles.addAll(files.referencedDataFiles());
            mergedAndDeletedFiles.addAll(files.mergedAndDeletedFiles());
        });
        return new FilesForCommit(dataFiles, deleteFiles, replacedDataFiles, referencedDataFiles, rewrittenDeleteFiles, mergedAndDeletedFiles);
    }

    @VisibleForTesting
    static String generateJobLocation(String location, Configuration conf, org.apache.hadoop.mapreduce.JobID jobId) {
        String queryId = conf.get(HiveConf.ConfVars.HIVE_QUERY_ID.varname);
        return location + "/temp/" + queryId + "-" + String.valueOf(jobId);
    }

    private static String generateFileForCommitLocation(String location, Configuration conf, org.apache.hadoop.mapreduce.JobID jobId, int taskId) {
        return HiveIcebergOutputCommitter.generateJobLocation(location, conf, jobId) + "/task-" + taskId + FOR_COMMIT_EXTENSION;
    }

    private static void createFileForCommit(FilesForCommit writeResult, String location, FileIO io) throws IOException {
        OutputFile fileForCommit = io.newOutputFile(location);
        try (ObjectOutputStream oos = new ObjectOutputStream(fileForCommit.createOrOverwrite());){
            oos.writeObject(writeResult);
        }
        LOG.debug("Created Iceberg commitTask manifest file: {}\n{}", (Object)location, (Object)writeResult);
    }

    private static FilesForCommit readFileForCommit(String fileForCommitLocation, FileIO io) {
        FilesForCommit filesForCommit;
        ObjectInputStream ois = new ObjectInputStream(io.newInputFile(fileForCommitLocation).newStream());
        try {
            filesForCommit = (FilesForCommit)ois.readObject();
        }
        catch (Throwable throwable) {
            try {
                try {
                    ois.close();
                }
                catch (Throwable throwable2) {
                    throwable.addSuppressed(throwable2);
                }
                throw throwable;
            }
            catch (IOException | ClassNotFoundException e) {
                throw new NotFoundException("Can not read or parse commitTask manifest file: %s", fileForCommitLocation);
            }
        }
        ois.close();
        return filesForCommit;
    }

    public static List<FileStatus> getOutputFiles(List<JobContext> jobContexts) throws IOException {
        Multimap<OutputTable, JobContext> outputs = HiveIcebergOutputCommitter.collectOutputs(jobContexts);
        JobConf jobConf = jobContexts.getFirst().getJobConf();
        ConcurrentMap parentDirToDataFile = Maps.newConcurrentMap();
        ConcurrentMap parentDirToDeleteFile = Maps.newConcurrentMap();
        try (ExecutorService fileExecutor = HiveIcebergOutputCommitter.fileExecutor((Configuration)jobConf);
             ExecutorService tableExecutor = HiveIcebergOutputCommitter.tableExecutor((Configuration)jobConf, outputs.keySet().size());){
            Tasks.foreach(outputs.keySet()).suppressFailureWhenFinished().executeWith(tableExecutor).onFailure((output, ex) -> LOG.warn("Failed to retrieve merge input file for the table {}", output, (Object)ex)).run(output -> {
                for (JobContext jobContext : outputs.get((OutputTable)output)) {
                    Path filePath;
                    Table table = output.table;
                    FileSystem fileSystem = new Path(table.location()).getFileSystem((Configuration)jobConf);
                    String jobLocation = HiveIcebergOutputCommitter.generateJobLocation(table.location(), (Configuration)jobConf, jobContext.getJobID());
                    int numTasks = HiveIcebergOutputCommitter.listForCommits(jobConf, jobLocation).size();
                    FilesForCommit results = HiveIcebergOutputCommitter.collectResults(numTasks, fileExecutor, table.location(), jobContext, table.io(), false);
                    for (DataFile dataFile : results.dataFiles()) {
                        filePath = new Path(dataFile.location());
                        parentDirToDataFile.computeIfAbsent(filePath.getParent(), k -> Lists.newArrayList()).add(fileSystem.getFileStatus(filePath));
                    }
                    for (DeleteFile deleteFile : results.deleteFiles()) {
                        filePath = new Path(deleteFile.location());
                        parentDirToDeleteFile.computeIfAbsent(filePath.getParent(), k -> Lists.newArrayList()).add(fileSystem.getFileStatus(filePath));
                    }
                }
            }, IOException.class);
        }
        return Stream.of(parentDirToDataFile, parentDirToDeleteFile).flatMap(files -> files.values().stream().flatMap(Collection::stream)).collect(Collectors.toList());
    }

    public static List<ContentFile<?>> getOutputContentFiles(List<JobContext> jobContexts) throws IOException {
        Multimap<OutputTable, JobContext> outputs = HiveIcebergOutputCommitter.collectOutputs(jobContexts);
        JobConf jobConf = jobContexts.getFirst().getJobConf();
        ConcurrentLinkedQueue files = new ConcurrentLinkedQueue();
        try (ExecutorService fileExecutor = HiveIcebergOutputCommitter.fileExecutor((Configuration)jobConf);
             ExecutorService tableExecutor = HiveIcebergOutputCommitter.tableExecutor((Configuration)jobConf, outputs.keySet().size());){
            Tasks.foreach(outputs.keySet()).suppressFailureWhenFinished().executeWith(tableExecutor).onFailure((output, ex) -> LOG.warn("Failed to retrieve merge input file for the table {}", output, (Object)ex)).run(output -> {
                for (JobContext jobContext : outputs.get((OutputTable)output)) {
                    Table table = output.table;
                    String jobLocation = HiveIcebergOutputCommitter.generateJobLocation(table.location(), (Configuration)jobConf, jobContext.getJobID());
                    int numTasks = HiveIcebergOutputCommitter.listForCommits(jobConf, jobLocation).size();
                    FilesForCommit results = HiveIcebergOutputCommitter.collectResults(numTasks, fileExecutor, table.location(), jobContext, table.io(), false);
                    files.addAll(results.dataFiles());
                    files.addAll(results.deleteFiles());
                }
            }, IOException.class);
        }
        return Lists.newArrayList(files);
    }

    private void cleanMergeTaskInputFiles(List<JobContext> jobContexts, ExecutorService fileExecutor) throws IOException {
        Stream mergedPaths = jobContexts.stream().map(JobContext::getJobConf).filter(jobConf -> jobConf.getInputFormat().getClass().isAssignableFrom(CombineHiveInputFormat.class)).map(Utilities::getMapWork).filter(Objects::nonNull).map(MapWork::getInputPaths).filter(Objects::nonNull).flatMap(Collection::stream);
        Tasks.foreach(mergedPaths).retry(3).executeWith(fileExecutor).run(path -> {
            FileSystem fs = path.getFileSystem((Configuration)((JobContext)jobContexts.getFirst()).getJobConf());
            if (fs.exists(path)) {
                fs.delete(path, true);
            }
        }, IOException.class);
    }

    static List<JobContext> getJobContexts(Properties properties) {
        HiveConf configuration = SessionState.getSessionConf();
        String tableName = properties.getProperty("name");
        String snapshotRef = properties.getProperty("snapshot_ref");
        return HiveIcebergOutputCommitter.generateJobContexts((Configuration)configuration, tableName, snapshotRef).stream().map(TezUtil::enrichContextWithVertexId).toList();
    }

    static List<JobContext> generateJobContexts(Configuration configuration, String tableName, String branchName) {
        JobConf jobConf = new JobConf(configuration);
        Optional commitInfoMap = SessionStateUtil.getCommitInfo((Configuration)jobConf, (String)tableName);
        if (commitInfoMap.isPresent()) {
            LinkedList<JobContext> jobContextList = Lists.newLinkedList();
            for (SessionStateUtil.CommitInfo commitInfo : ((Map)commitInfoMap.get()).values()) {
                JobID jobID = JobID.forName((String)commitInfo.getJobIdStr());
                commitInfo.getProps().forEach((arg_0, arg_1) -> ((JobConf)jobConf).set(arg_0, arg_1));
                jobConf.set("iceberg.mr.output.tables", tableName);
                if (branchName != null) {
                    jobConf.set("iceberg.mr.output.table.snapshot.ref", branchName);
                }
                jobContextList.add((JobContext)new JobContextImpl(jobConf, (org.apache.hadoop.mapreduce.JobID)jobID, null));
            }
            return jobContextList;
        }
        LOG.debug("Unable to find commit information in query state for table: {}", (Object)tableName);
        return Collections.emptyList();
    }

    private static Set<Path> getCombinedLocations(JobConf jobConf) {
        MapWork mrwork;
        HashSet<Path> mergedPaths = Sets.newHashSet();
        if (jobConf.getInputFormat().getClass().isAssignableFrom(CombineHiveInputFormat.class) && (mrwork = Utilities.getMapWork((Configuration)jobConf)) != null && mrwork.getInputPaths() != null) {
            mergedPaths.addAll(mrwork.getInputPaths());
        }
        return mergedPaths;
    }

    private static Set<String> outputTables(Configuration config) {
        return Sets.newHashSet(TABLE_NAME_SPLITTER.split(config.get("iceberg.mr.output.tables")));
    }

    private static String catalogName(Configuration config, String name) {
        return config.get("iceberg.mr.table.catalog." + name);
    }

    private record OutputTable(String catalogName, String tableName, Table table) {
        @Override
        public boolean equals(Object o) {
            if (this == o) {
                return true;
            }
            if (o == null || this.getClass() != o.getClass()) {
                return false;
            }
            OutputTable output1 = (OutputTable)o;
            return Objects.equals(this.tableName, output1.tableName);
        }

        @Override
        public int hashCode() {
            return Objects.hash(this.tableName);
        }
    }
}

