#!/usr/bin/perl

# Copyright © 2020-2021 Felix Lechner
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.

use v5.20;
use warnings;
use utf8;

use Const::Fast;
use DBI;
use File::Find::Rule;
use Email::Address::XS;
use JSON::MaybeXS;
use List::SomeUtils qw(uniq natatime);
use Path::Tiny;
use Time::Duration;
use Time::Moment;
use Try::Tiny;
use Unicode::UTF8 qw(encode_utf8 decode_utf8);
use YAML::XS qw(LoadFile);

const my $EMPTY => q{};
const my $SINGLE_QUOTE => q{'};

const my $RUNS_PER_UPSERT => 500;
const my $DATABASE_SETTLEMENT_PERIOD => 10;

my $scriptdir = path(__FILE__)->parent->stringify;

die encode_utf8("Usage [config-file] ( [JSON-file] | [folder])+\n")
  if @ARGV < 2;

my $configfile = shift @ARGV;
die encode_utf8('No config file')
  unless length $configfile;

my $yaml = LoadFile($configfile);

# get database config
my $dbconnectstring = $yaml->{database};
die encode_utf8('No database connect string')
  unless length $dbconnectstring;

my @files;
for my $candidate (@ARGV) {
    if (-d $candidate) {
        push(@files,File::Find::Rule->file->name('*.json.xz')->in($candidate));
        next;
    }

    unless (-e _) {
        warn encode_utf8("File does not exist: $candidate");
        next;
    }

    if (path($candidate) !~ / [.] json [.] xz $/isx) {
        warn encode_utf8("Filename different from Lintian hints: $candidate");
        next;
    }

    push(@files, $candidate);
}

my $iterator = natatime($RUNS_PER_UPSERT, sort @files);
while (my @partial = $iterator->()) {

    my $postgres = DBI->connect('dbi:Pg:' . $dbconnectstring,
        $EMPTY, $EMPTY,{AutoCommit => 0, pg_enable_utf8 => 0});

    insert_together($postgres, sort @partial);
    say encode_utf8('Elapsed time: ' . duration(time - $^T));

    delete_old_runs($postgres);
    delete_source_only_runs($postgres);

    $postgres->disconnect;

    sleep $DATABASE_SETTLEMENT_PERIOD;
}

my $postgres = DBI->connect('dbi:Pg:' . $dbconnectstring,
    $EMPTY, $EMPTY,{AutoCommit => 0, pg_enable_utf8 => 0});

prune_lintian_versions($postgres);

$postgres->disconnect;

exit;

sub insert_together {
    my ($database, @resultspaths) = @_;

    # needed to find hints by date in Perl later
    $database->do('SET timezone TO ' . $SINGLE_QUOTE . 'UTC' . $SINGLE_QUOTE);
    $database->commit;

    my @db_runs;
    my %keyed;

    for my $resultspath (@resultspaths) {

        say encode_utf8("Reading $resultspath");

        # auto deleted when this variable is lost
        my $decompressed = Path::Tiny->tempfile;

        my $decompressedpath = $decompressed->stringify;

        die encode_utf8("Cannot decompress $resultspath.")
          if
          system("xz --decompress --stdout $resultspath > $decompressedpath");

        my $json = path($decompressedpath)->slurp;
        return
          unless length $json;

        my $run;
        try {
            $run = decode_json($json);
        } catch {
            warn encode_utf8("Error reading $resultspath: $_");
        };
        return
          unless defined $run;

        # conserve memory
        undef $json;

        my $tool = $run->{tool} // {};
        next
          unless %{$tool};

        my $product = $run->{product} // {};
        next
          unless %{$product};

        # limit precision; value returned later could be rounded
        # also, javascript knows only about milliseconds
        $product->{processing_start}
          = Time::Moment->from_string($product->{processing_start})
          ->at_utc->strftime('%Y-%m-%dT%H:%M:%S%3f%Z');
        $product->{processing_end}
          = Time::Moment->from_string($product->{processing_end})
          ->at_utc->strftime('%Y-%m-%dT%H:%M:%S%3f%Z');

        my $stdout = $product->{stdout};
        next
          unless ref($stdout) eq 'HASH';

        my @groups = @{ $stdout->{groups} // []};
        for my $group (@groups) {

            if (   $group->{source_name} ne $run->{source_name}
                || $group->{source_version} ne $run->{source_version}) {

                warn encode_utf8(
"group data is different task data in $resultspath; skipping."
                );
                next;
            }

            my %db_run;

            $db_run{$_} = $run->{$_}
              for qw{source_name source_version release port};

            $db_run{invocation_options} = $tool->{options};

            $db_run{start_time} = $product->{processing_start};
            $db_run{end_time} = $product->{processing_end};

            $db_run{messages} = [split(/\n/, $product->{stderr} // $EMPTY)];
            $db_run{exit_status} = $product->{exit_status};

            $db_run{lintian_version} = $stdout->{lintian_version};

            push(@db_runs, \%db_run);

            my @db_inputs;
            my @db_hints;
            my @db_masks;

            # add hints
            my @input_files = @{ $group->{input_files} // []};
            for my $input_file (@input_files) {

                my %db_input;

                $db_input{$_} = $db_run{$_}
                  for
                  qw{source_name source_version release port lintian_version start_time};

                $db_input{pool_path} = $input_file->{path} // $EMPTY;

                push(@db_inputs, \%db_input);

                my $position = 1;

                my @hints = @{$input_file->{hints} // []};
                for my $hint (@hints) {

                    my %db_hint;

                    $db_hint{$_} = $db_input{$_}
                      for
                      qw{source_name source_version release port lintian_version start_time pool_path};

                    $db_hint{tag_name} = $hint->{tag} // $EMPTY;
                    $db_hint{note} = $hint->{note} // $EMPTY;

                    if (defined $hint->{override}) {
                        $db_hint{override} = 'yes';
                        $db_hint{override_justification}
                          = $hint->{override}{justification} // $EMPTY;
                    } else {
                        $db_hint{override} = 'no';
                        $db_hint{override_justification} = $EMPTY;
                    }

                    $db_hint{position} = $position;

                    if (defined $hint->{pointer}) {

                        my $pointer = $hint->{pointer};

                        $db_hint{item_line_position}
                          = $pointer->{line_position} // 0;

                        if (defined $pointer->{item}) {

                            my $item = $pointer->{item};

                            my ($index)
                              = ($item->{index}
                                  =~ m{^ \S+ \s+ [(] (\S+) [)] $}x);

                            $db_hint{item_index} = $index;
                            $db_hint{item_name} = $item->{name};

                        } else {
                            $db_hint{item_index} = 'none';
                            $db_hint{item_name} = $EMPTY;
                        }

                    } else {
                        $db_hint{item_index} = 'none';
                        $db_hint{item_name} = $EMPTY;
                        $db_hint{item_line_position} = 0;
                    }

                    push(@db_hints, \%db_hint);

                    for my $mask (@{$hint->{masks}}) {

                        my %db_mask;

                        $db_mask{screen} = $mask->{screen};
                        $db_mask{excuse} = $mask->{excuse} // $EMPTY;
                        $db_mask{$_} = $db_hint{$_}
                          for
                          qw{source_name source_version lintian_version start_time release port pool_path position};

                        push(@db_masks, \%db_mask);
                    }

                } continue {
                    $position++;
                }
            }

            my $source_name = $run->{source_name};
            my $source_version = $run->{source_version};
            my $release = $run->{release};
            my $port_architecture = $run->{port};

            my $truncated_time = $product->{processing_start};

            my $lintian_version = $stdout->{lintian_version};

            $keyed{$source_name}{$source_version}{$lintian_version}
              {$truncated_time}{$release}{$port_architecture} = {
                inputs =>\@db_inputs,
                hints => \@db_hints,
                masks => \@db_masks
              };
        }
    }

    my @required_versions = uniq map { $_->{lintian_version} } @db_runs;

    if (@required_versions) {
        system("$scriptdir/add-lintian-tags", $configfile, @required_versions)
          == 0
          or die encode_utf8(
            "Cannot add tags from Lintian versions: @required_versions");
    }

    $database->do(
        'SET CONSTRAINTS lintian.runs_released_sources_fkey DEFERRED');

    my $insert_runs_sql =<<~'END_OF_QUERY';
INSERT INTO lintian.runs
SELECT
    *
FROM
    json_populate_recordset(NULL::lintian.runs, $1) AS j
WHERE
    EXISTS (
        SELECT
            TRUE
        FROM
            archive.sources AS s
        WHERE
            j.source_name = s.source_name
            AND j.source_version = s.source_version)
ON CONFLICT (source_name,
    source_version,
    lintian_version,
    start_time,
    release,
    port)
    DO NOTHING
RETURNING
    source_name,
    source_version,
    lintian_version,
    to_json(start_time) #>> '{}' AS start_time,
    release,
    port
END_OF_QUERY

    say encode_utf8('Trying to insert ' . scalar @db_runs . ' runs.');

    my $json_runs = encode_json(\@db_runs);
    undef @db_runs;
    my $insert_runs = $database->prepare($insert_runs_sql);
    $insert_runs->execute($json_runs);
    undef $json_runs;

    say encode_utf8('Inserted ' . $insert_runs->rows . ' runs.');

    my @db_inputs;
    my @db_hints;
    my @db_masks;

    while (my $row = $insert_runs->fetchrow_hashref) {

        my $source_name = decode_utf8($row->{source_name});
        my $source_version = decode_utf8($row->{source_version});
        my $lintian_version = decode_utf8($row->{lintian_version});

        # format from postgres is different; +00:00 instead of Z
        my $truncated_time
          = Time::Moment->from_string(decode_utf8($row->{start_time}))
          ->at_utc->strftime('%Y-%m-%dT%H:%M:%S%3f%Z');
        my $release = decode_utf8($row->{release});
        my $port_architecture = decode_utf8($row->{port});

        my $for_run = $keyed{$source_name}{$source_version}{$lintian_version}
          {$truncated_time}{$release}{$port_architecture};

        push(@db_inputs, @{$for_run->{inputs} // []});
        push(@db_hints, @{$for_run->{hints} // []});
        push(@db_masks, @{$for_run->{masks} // []});
    }

    undef %keyed;

    $insert_runs->finish;

    my $insert_inputs_sql =<<~'END_OF_QUERY';
INSERT INTO lintian.inputs
SELECT
    *
FROM
    json_populate_recordset(NULL::lintian.inputs, $1) AS j
WHERE
    EXISTS (
        SELECT
            TRUE
        FROM
            lintian.runs AS r
        WHERE
            j.source_name = r.source_name
            AND j.source_version = r.source_version
            AND j.lintian_version = r.lintian_version
            AND j.start_time = r.start_time
            AND j.release = r.release
            AND j.port = r.port)
ON CONFLICT (source_name,
    source_version,
    lintian_version,
    start_time,
    release,
    port,
    pool_path)
    DO NOTHING
END_OF_QUERY

    say encode_utf8('Inserting ' . scalar @db_inputs . ' inputs.');

    my $json_inputs = encode_json(\@db_inputs);
    undef @db_inputs;
    my $insert_inputs = $database->prepare($insert_inputs_sql);
    $insert_inputs->execute($json_inputs);
    undef $json_inputs;

    my $insert_hints_sql =<<~'END_OF_QUERY';
INSERT INTO lintian.hints
SELECT
    *
FROM
    json_populate_recordset(NULL::lintian.hints, $1) AS j
WHERE
    EXISTS (
        SELECT
            TRUE
        FROM
            lintian.inputs AS i
        WHERE
            j.source_name = i.source_name
            AND j.source_version = i.source_version
            AND j.lintian_version = i.lintian_version
            AND j.start_time = i.start_time
            AND j.release = i.release
            AND j.port = i.port
            AND j.pool_path = i.pool_path)
ON CONFLICT (source_name,
    source_version,
    lintian_version,
    start_time,
    release,
    port,
    pool_path,
    position)
    DO NOTHING
END_OF_QUERY

    say encode_utf8('Inserting ' . scalar @db_hints . ' hints.');

    my $json_hints = encode_json(\@db_hints);
    undef @db_hints;
    my $insert_hints = $database->prepare($insert_hints_sql);
    $insert_hints->execute($json_hints);
    undef $json_hints;

    my $insert_masks_sql =<<~'END_OF_QUERY';
INSERT INTO lintian.masks
SELECT
    *
FROM
    json_populate_recordset(NULL::lintian.masks, $1) AS j
WHERE
    EXISTS (
        SELECT
            TRUE
        FROM
            lintian.hints AS h
        WHERE
            j.source_name = h.source_name
            AND j.source_version = h.source_version
            AND j.lintian_version = h.lintian_version
            AND j.start_time = h.start_time
            AND j.release = h.release
            AND j.port = h.port
            AND j.pool_path = h.pool_path
            AND j.position = h.position)
ON CONFLICT (source_name,
    source_version,
    lintian_version,
    start_time,
    release,
    port,
    pool_path,
    position,
    screen)
    DO NOTHING
END_OF_QUERY

    say encode_utf8('Inserting ' . scalar @db_masks . ' masks.');

    my $json_masks = encode_json(\@db_masks);
    undef @db_masks;
    my $insert_masks = $database->prepare($insert_masks_sql);
    $insert_masks->execute($json_masks);
    undef $json_masks;

    my $delete_outdated_runs_sql =<<~'END_OF_QUERY';
DELETE FROM lintian.runs AS r
WHERE NOT EXISTS (
        SELECT
            *
        FROM
            archive.released_sources AS rs
        WHERE
            rs.source_name = r.source_name
            AND rs.source_version = r.source_version
            AND rs.release = r.release)
END_OF_QUERY

    $database->do($delete_outdated_runs_sql);
    $database->commit;

    return;
}

sub delete_old_runs {
    my ($database) = @_;

    my $delete_old_runs_sql =<<~'END_OF_QUERY';
DELETE
FROM
    lintian.runs AS r1
WHERE
    (lintian_version, start_time, release, port) IN (
        SELECT
            r2.lintian_version,
            r2.start_time,
            r2.release,
            r2.port
        FROM
            lintian.runs AS r2
        WHERE
            r2.source_name = r1.source_name
            AND r2.source_version = r1.source_version
            AND r2.release = r1.release
            AND r2.port = r1.port
        ORDER BY
            lintian_version DESC,
            start_time DESC
        OFFSET $1)
END_OF_QUERY

    say encode_utf8(
        'Keeping only most recent run for each source/version/release/port.');
    my $delete_old_runs = $database->prepare($delete_old_runs_sql);
    $delete_old_runs->execute(encode_utf8(1));
    $database->commit;

    say encode_utf8('Elapsed time: ' . duration(time - $^T));

    return;
}

sub delete_source_only_runs {
    my ($database) = @_;

    my $delete_source_only_runs_sql =<<~'END_OF_QUERY';
DELETE FROM lintian.runs AS r
WHERE NOT EXISTS (
        SELECT
            *
        FROM
            archive.installables AS i
            JOIN archive.released_installables AS ri ON ri.installable_name = i.installable_name
                AND ri.installable_version = i.installable_version
                AND ri.installable_architecture = i.installable_architecture
        WHERE
            i.source_name = r.source_name
            AND i.source_version = r.source_version
            AND ri.release = r.release
            AND ri.port = r.port)
END_OF_QUERY

    say encode_utf8('Deleting runs without installable packages.');
    my $delete_source_only_runs
      = $database->prepare($delete_source_only_runs_sql);
    $delete_source_only_runs->execute;
    $database->commit;

    say encode_utf8('Elapsed time: ' . duration(time - $^T));

    return;
}

sub prune_lintian_versions {
    my ($database) = @_;

    my $prune_lintian_versions_sql =<<~'END_OF_QUERY';
DELETE FROM lintian.versions AS v
WHERE NOT EXISTS (
    SELECT
        *
    FROM
        lintian.runs AS r
        WHERE
            r.lintian_version = v.lintian_version
)
END_OF_QUERY

    say encode_utf8('Pruning Lintian versions not used in any runs.');
    $database->do($prune_lintian_versions_sql);
    $database->commit;

    say encode_utf8('Elapsed time: ' . duration(time - $^T));

    return;
}

# Local Variables:
# indent-tabs-mode: nil
# cperl-indent-level: 4
# End:
# vim: syntax=perl sw=4 sts=4 sr et
