""" Functions to return versions of things. """
import functools
import logging
# import pip
import os
import re
import shlex
import sys
from subprocess import DEVNULL, check_output
from git import GitCommandError, InvalidGitRepositoryError, NoSuchPathError, Repo
import biothings
from biothings.utils.dataload import dict_sweep
[docs]
def get_python_version():
    """Get a list of python packages installed and their versions."""
    try:
        output = check_output([sys.executable or "python3", "-m", "pip", "list"], stderr=DEVNULL)
        return output.decode("utf-8").replace("\r", "").split("\n")[2:-1]
    except Exception:
        return [] 
[docs]
@functools.lru_cache()
def get_biothings_commit():
    """Gets the biothings commit information."""
    try:
        with open(os.path.join(os.path.dirname(biothings.__file__), ".git-info"), "r", encoding="utf-8") as git_file:
            lines = [ln.strip("\n") for ln in git_file.readlines()]
            return {
                "repository-url": lines[0],
                "commit-hash": lines[1],
                "master-commits": lines[2],
                "version": biothings.__version__,
            }
    except Exception:
        return {
            "repository-url": "",
            "commit-hash": "",
            "master-commits": "",
            "version": biothings.__version__,
        } 
[docs]
def get_python_exec_version():
    """return Python version"""
    return {
        "version": sys.version,
        "version_info": {
            "major": sys.version_info[0],
            "minor": sys.version_info[1],
            "micro": sys.version_info[2],
        },
    } 
[docs]
@functools.lru_cache()
def get_software_info(app_dir=None):
    """return current application info"""
    return {
        "python-package-info": get_python_version(),
        "codebase": get_repository_information(app_dir=app_dir),
        "biothings": get_biothings_commit(),
        "python-info": get_python_exec_version(),
    } 
[docs]
def check_new_version(folder, max_commits=10):
    """
    Given a folder pointing to a Git repo, return a dict containing info
    about remote commits not qpplied yet to the repo, or empty dict if nothing
    new.
    """
    # from https://stackoverflow.com/questions/8290233/gitpython-get-list-of-remote-commits-not-yet-applied
    try:
        repo = Repo(folder)
    except InvalidGitRepositoryError:
        logging.warning("Not a valid git repository for folder '%s', skipped for checking new version.", folder)
        return
    try:
        # Get URL from actual remote branch name that is being tracked.
        # more details: see comments in get_version
        remote_name = repo.active_branch.tracking_branch().remote_name
        url = repo.remote(remote_name).url
        repo_url = re.sub(r"\.git$", "", url)
    except Exception as err:
        logging.debug("Can't determine repository URL: %s", err)
        repo_url = None
    new_info = {}
    try:
        # we can't directly get the list of new commits without fetching them locally first
        # but we'd like to avoid fetching all the time just to check.
        # what we can do is a ls-remote and check the HEAD hash, if different, then fetch
        # (no pull) and inspect differences.
        try:
            head = repo.head.ref
        except TypeError as err:
            # cannot get the head reference, e.g. when HEAD is detached at certain commit point
            # For example, TypeError: HEAD is a detached symbolic reference as it points to '19ad50463d0cdd3329618789040c4b9012ccca24'
            logging.warning("%s, skipped for checking new version.", err)
            return
        tracking = head.tracking_branch()
        # inspect remote HEAD for that branch
        output = repo.git.ls_remote("--heads", tracking.remote_name, tracking.remote_head)
        remote_head_hexsha = output.split("\t")[0]
        if remote_head_hexsha == head.commit.hexsha:
            # hashes the same, we're up-to-date with the remote
            return
        else:
            logging.info("HEAD on remote is different, new commit(s) available for '%s'", folder)
            logging.info("HEAD(remote): %s, HEAD(local): %s", remote_head_hexsha, head.commit.hexsha)
            # need to fetch new code locally
            # usually one remotes, but just in case...
            for remote in repo.remotes:
                remote.fetch()
        # now identify new commits
        new_commits = [commit for commit in tracking.commit.iter_items(repo, f"{head.path}..{tracking.path}")]
        if new_commits:
            new_info = {
                "latest": new_commits[0].hexsha[:6],
                "commits": [
                    {
                        "hash": c.hexsha[:6],
                        "url": repo_url and os.path.join(repo_url, "commit", c.hexsha) or None,
                        "date": c.committed_datetime.isoformat(),
                        "message": c.message,
                    }
                    for c in new_commits
                ][:max_commits],
                "total": len(new_commits),
            }
    except Exception as err:
        logging.warning("Can't check for new version: %s", err)
        raise err
    return new_info 
[docs]
def get_version(folder):
    """return revision of a git folder"""
    try:
        repo = Repo(folder)  # app or lib dir
    except InvalidGitRepositoryError:
        logging.warning("Not a valid git repository for folder '%s', skipped for getting its version.", folder)
        return
    try:
        # Get URL from actual remote branch name that is being tracked.
        # do not assume that the active branch is tracking origin,
        # or if it is tracking anything, or if the alias origin exists
        remote_name = repo.active_branch.tracking_branch().remote_name
        url = repo.remote(remote_name).url
    except Exception:  # pylint: disable=W0702
        # it is possible that the active branch is not tracking anything
        url = None
    try:
        commit = repo.head.object.hexsha[:6]
        commitdate = repo.head.object.committed_datetime.isoformat()
    except Exception as err:
        logging.warning("can't determine app commit hash: %s", err)
        commit = "unknown"
        commitdate = "unknown"
    try:
        return {
            "branch": repo.active_branch.name,
            "commit": commit,
            "date": commitdate,
            "giturl": url,
        }
    except Exception as err:
        logging.warning("can't determine app version, assuming HEAD detached': %s", err)
        return {
            "branch": "HEAD detached",
            "commit": commit,
            "date": commitdate,
            "giturl": url,
        } 
[docs]
def set_versions(config, app_folder):
    """
    Propagate versions (git branch name) in config module.
    Also set app and biothings folder paths (though not
    exposed as a config param since they are lower-cased,
    see biothings.__init__.py, regex PARAM_PAT)
    """
    if not os.path.exists(app_folder):
        raise FileNotFoundError(f"'{app_folder}' application folder doesn't exist")
    # app_version: version of the API application
    if not hasattr(config, "APP_VERSION"):
        config.APP_VERSION = get_version(app_folder)
        config.app_folder = app_folder
    else:
        logging.info("app_version '%s' forced in configuration file", config.APP_VERSION)
    # biothings_version: version of BioThings SDK
    if not hasattr(config, "BIOTHINGS_VERSION"):
        import biothings
        # .../biothings.api/biothings/__init__.py
        bt_folder, _bt = os.path.split(os.path.split(os.path.realpath(biothings.__file__))[0])
        if not os.path.exists(bt_folder):
            raise FileNotFoundError(f"'{bt_folder}' biothings folder doesn't exist")
        assert _bt == "biothings", "Expectig 'biothings' dir in biothings lib path"
        config.BIOTHINGS_VERSION = get_version(bt_folder)
        config.biothings_folder = bt_folder
    else:
        logging.info("biothings_version '%s' forced in configuration file", config.BIOTHINGS_VERSION)
    logging.info(
        "Running app_version=%s with biothings_version=%s", repr(config.APP_VERSION), repr(config.BIOTHINGS_VERSION)
    ) 
[docs]
def get_source_code_info(src_file):
    """
    Given a path to a source code, try to find information
    about repository, revision, URL pointing to that file, etc...
    Return None if nothing can be determined.
    Tricky cases:
      - src_file could refer to another repo, within current repo
        (namely a remote data plugin, cloned within the api's plugins folder
      - src_file could point to a folder, when for instance a dataplugin is
        analized. This is because we can't point to an uploader file since
        it's dynamically generated
    """
    # need to be absolute to build proper github URL
    abs_src_file = os.path.abspath(src_file)
    try:
        repo = Repo(abs_src_file, search_parent_directories=True)
    except (InvalidGitRepositoryError, NoSuchPathError):
        logging.exception("Can't find a github repository for file '%s'", src_file)
        return None
    try:
        gcmd = repo.git
        _hash = gcmd.rev_list(-1, repo.active_branch, abs_src_file)
        rel_src_file = abs_src_file.replace(repo.working_dir, "").strip("/")
        if not _hash:
            # seems to be a repo cloned within a repo, change directory
            curdir = os.path.abspath(os.curdir)
            try:
                if os.path.isdir(abs_src_file):
                    os.chdir(abs_src_file)
                    _hash = gcmd.rev_list(-1, repo.active_branch)
                else:
                    dirname, filename = os.path.split(abs_src_file)
                    os.chdir(dirname)
                    _hash = gcmd.rev_list(-1, repo.active_branch, filename)
                rel_src_file = ""  # will point to folder by commit hash
            finally:
                os.chdir(curdir)
        if _hash:
            short_hash = gcmd.rev_parse(_hash, short=7)
        else:
            logging.warning("Couldn't determine commit hash for file '%s'", src_file)
            _hash = None
            short_hash = None
        # could have more than one URLs for origin, only take first
        repo_url = next(repo.remote().urls)
        info = {
            "repo": repo_url,
            "commit": short_hash,
            "branch": repo.active_branch.name,
        }
        if os.path.isdir(src_file):
            info["folder"] = rel_src_file
        else:
            info["file"] = rel_src_file
        info = dict_sweep(info)
        # rebuild URL to that file
        if "github.com" in repo_url:
            info["url"] = os.path.join(re.sub(r"\.git$", "", repo_url), "tree", _hash, rel_src_file)
        return info
    except GitCommandError:
        logging.exception("Error while getting git information for file '%s'", src_file)
        return None
    except TypeError as err:
        # happens with biothings symlink, just ignore
        logging.debug("Can't determine source code info (but that's fine): %s", err)
        return None