#! /usr/bin/python3

"""
Migrate existing build results for a given project and all of its CoprDirs
from one storage (Copr backend) to another (Pulp).
"""

import os
import sys
import argparse
import logging
from copr.v3 import Client
from copr_common.log import setup_script_logger
from copr_backend.helpers import BackendConfigReader
from copr_backend.constants import PULP_REDIRECT_FILE
from copr_backend.storage import PulpStorage
from copr_backend.frontend import FrontendClient
from copr_backend.exceptions import FrontendClientException


STORAGES = ["backend", "pulp"]

log = logging.getLogger(__name__)


def get_arg_parser():
    """
    CLI argument parser
    """
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--src",
        required=True,
        choices=STORAGES,
        help="The source storage",
    )
    parser.add_argument(
        "--dst",
        required=True,
        choices=STORAGES,
        help="The destination storage",
    )
    target = parser.add_mutually_exclusive_group(required=True)
    target.add_argument(
        "--project",
        help="Full name of the project that is to be migrated",
    )
    target.add_argument(
        "--owner",
        help="Migrate all projects for this owner",
    )
    parser.add_argument(
        "--delete",
        action="store_true",
        default=False,
        help="After migrating the data, remove it from the old storage",
    )
    return parser


def is_valid_build_directory(name):
    """
    See the `copr-backend-resultdir-cleaner`. We may want to share the code
    between them.
    """
    if name in ["repodata", "devel"]:
        return False

    if name.startswith("repodata.old") or name.startswith(".repodata."):
        return False

    if name in ["tmp", "cache", "appdata"]:
        return False

    parts = name.split("-")
    if len(parts) <= 1:
        return False

    number = parts[0]
    if len(number) != 8 or any(not c.isdigit() for c in number):
        return False

    return True


def change_on_frontend(client, owner, project, storage):
    """
    Request copr-frontend to change storage for this project in database
    """
    data = {
        "owner": owner,
        "project": project,
        "storage": storage,
    }
    client.post("change-storage", data)


def add_redirect(fullname):
    """
    Create a HTTP redirect for this project
    See https://pagure.io/fedora-infra/ansible/blob/main/f/roles/copr/backend/templates/lighttpd/pulp-redirect.lua.j2
    """
    path = PULP_REDIRECT_FILE
    with open(path, "a", encoding="utf-8") as fp:
        print(fullname, file=fp)


def change_storage_for_project(fullname, dst, config):
    """
    Migrate one project
    """
    # pylint: disable=too-many-locals
    # pylint: disable=too-many-statements
    # pylint: disable=too-many-branches
    # pylint: disable=too-many-nested-blocks
    owner, project = fullname.split("/")
    ownerdir = os.path.join(config.destdir, owner)
    ok = True

    for subproject_entry in os.scandir(ownerdir):
        subproject = subproject_entry.name
        if not (subproject == project or subproject.startswith(project + ":")):
            continue

        coprdir = os.path.join(ownerdir, subproject)
        for chroot_entry in os.scandir(coprdir):
            chroot = chroot_entry.name
            if chroot == "srpm-builds":
                continue

            if not chroot_entry.is_dir():
                continue

            chrootdir = os.path.join(coprdir, chroot)
            appstream = None
            devel = None
            storage = PulpStorage(
                owner, subproject, appstream, devel, config, log)

            # TODO Fault-tolerance and data consistency
            # Errors when creating things in Pulp will likely happen
            # (networking issues, unforseen Pulp validation, etc). We
            # should figure out how to ensure that all RPMs were
            # successfully uploaded, and if not, we know about it.
            #
            # We also need to make sure that no builds, actions, or cron,
            # are currently writing into the results directory. Otherwise
            # we can end up with incosystent data in Pulp.

            result = storage.init_project(subproject, chroot)
            if not result:
                log.error("Failed to initialize chroot: %s", chroot)
                ok = False
                break

            uploaded = {}

            # We need to sort them alphabetically so that we start uploading
            # from the oldest builds and therefore we upload all RPMs, even
            # if there are NVR duplicities.
            builddirs = sorted(os.scandir(chrootdir), key=lambda x: x.name)
            for builddir_entry in builddirs:
                if not builddir_entry.is_dir():
                    continue

                builddir = builddir_entry.name
                resultdir = os.path.join(chrootdir, builddir)

                if not is_valid_build_directory(builddir):
                    log.info("Skipping: %s", resultdir)
                    continue

                build_id = int(builddir.split("-")[0])

                to_upload = []
                rpms = storage.find_build_results(resultdir)
                for rpm in rpms:
                    # Was a package with this NEVRA already uploaded?
                    # We cannot simply uploaded.get(rpm) because the keys are
                    # full paths and we need to compare only basenames
                    # uploaded_rpm = None
                    basename = os.path.basename(rpm)
                    if basename in uploaded:
                        # If the already uploaded package comes from a newer
                        # build we don't need to bother with uploading this
                        # package
                        if build_id < uploaded[basename]["build_id"]:
                            log.info("Skipping duplicate %s from build %s, "
                                     "a newer build already uploaded",
                                     basename, build_id)
                            continue
                    to_upload.append(rpm)

                # We cannot check return code here
                results = storage.upload_build_results(
                    to_upload,
                    chroot,
                    build_id=build_id,
                )

                # It is possible that we already uploaded a package with the
                # same NEVRA. In such case we will replace it here, and the
                # previously uploaded package won't get into the repository.
                # That doesn't bother us because Pulp will garbage collect and
                # remove it.
                uploaded.update(results)

            # Add build results to the repository
            all_package_hrefs = [x["pulp_href"] for x in uploaded.values()]
            if not storage.create_repository_version(subproject, chroot, all_package_hrefs):
                log.error("Failed to create repository version for chroot: %s", chroot)
                sys.exit(1)

            log.info("OK: %s", chroot)

    # Not everything was migrated successfully. Play it safe and fail.
    if not ok:
        log.error(
            "Failure during '%s' migration, not switching on frontend",
            fullname,
        )
        sys.exit(1)

    # Change storage in the frontend database
    frontend_client = FrontendClient(config, try_indefinitely=False, logger=log)
    try:
        change_on_frontend(frontend_client, owner, project, dst)
    except FrontendClientException as ex:
        log.error("Failed to change storage on frontend for %s because: %s",
                  fullname, str(ex))
        # If the project was deleted on frontend, we don't mind
        if "404 NOT FOUND" not in str(ex):
            sys.exit(1)

    # At this point all data is successfully migrated and frontend thinks the
    # project is in Pulp, so we can safely add the HTTP redirect
    try:
        add_redirect(fullname)
    except OSError as ex:
        log.error("Failed to add a redirect for %s because: %s",
                  fullname, str(ex))

    log.info("Project %s successfully migrated", fullname)


def query_project(owner, name, config):
    """
    Query project information via the public API
    """
    client = Client({"copr_url": config.frontend_base_url})
    project = client.project_proxy.get(owner, name)
    return project


def all_projects_for_owner(owner, config):
    """
    Return full names of all projects for a given owner
    We cannot simply list all directories in
    `os.path.join(config.destdir, owner)` because we need to filter out
    projects that were already migrated to Pulp.
    """
    fullnames = []
    client = Client({"copr_url": config.frontend_base_url})
    projects = client.project_proxy.get_list(owner)
    for project in projects:
        if project.storage == "pulp":
            continue
        fullnames.append(project.full_name)
    return fullnames


def main():
    """
    The main function
    """
    setup_script_logger(log, "/var/log/copr-backend/change-storage.log")
    parser = get_arg_parser()
    args = parser.parse_args()

    if args.src == args.dst:
        log.info("The source and destination storage is the same, nothing to do.")
        return

    if args.src == "pulp":
        log.error("Migration from pulp to somewhere else is not supported")
        sys.exit(1)

    if args.delete:
        log.error("Data removal is not supported yet")
        sys.exit(1)

    config = BackendConfigReader("/etc/copr/copr-be.conf").read()
    if args.project:
        ownername, projectname = args.project.split("/", 1)
        project = query_project(ownername, projectname, config)
        if project.storage == args.dst:
            print(
                "The project {0} has already been migrated to {1}"
                .format(args.project, args.dst)
            )
            sys.exit(0)
        change_storage_for_project(args.project, args.dst, config)
    elif args.owner:
        projects = all_projects_for_owner(args.owner, config)
        for i, fullname in enumerate(projects, start=1):
            print(
                "[{0}/{1}] Migrating {2} to {3}"
                .format(i, len(projects), fullname, args.dst)
            )
            change_storage_for_project(fullname, args.dst, config)
    else:
        log.error("Unexpected choice. This should never happen")
        sys.exit(1)


if __name__ == "__main__":
    try:
        main()
    except KeyboardInterrupt:
        pass
