Source code for astroquery.cosmosim.core

# Licensed under a 3-clause BSD style license - see LICENSE.rst


import requests
import warnings
import numpy as np
import sys
from bs4 import BeautifulSoup
import keyring
import time
import smtplib
import re
import os
import threading
from email.mime.multipart import MIMEMultipart
from email.mime.base import MIMEBase, message
from email.mime.text import MIMEText

# Astropy imports
from astropy.table import Table
import astropy.io.votable as votable
from astroquery import log

# Astroquery imports
from ..query import QueryWithLogin
from . import conf
from ..exceptions import LoginError

conf.max_lines = -1
conf.max_width = -1

__all__ = ['CosmoSim']


[docs]class CosmoSimClass(QueryWithLogin):

    QUERY_URL = conf.query_url
    SCHEMA_URL = conf.schema_url
    TIMEOUT = conf.timeout
    USERNAME = conf.username

    def __init__(self):
        super(CosmoSimClass, self).__init__()

    def _login(self, username=None, password=None, store_password=False,
               reenter_password=False):
        """
        Login to the CosmoSim database.

        Parameters
        ----------
        username : str, optional
            Username to the CosmoSim database. If not given, it should be
            specified in the config file.
        store_password : bool, optional
            Stores the password securely in your keyring. Default is False.
        reenter_password : bool, optional
            Asks for the password even if it is already stored in the
            keyring. This is the way to overwrite an already stored passwork
            on the keyring. Default is False.
        """
        if username is None:
            if self.USERNAME == "":
                raise LoginError("If you do not pass a username to login(), "
                                 "you should configure a default one!")
            else:
                username = self.USERNAME

        # login after logging out (interactive)
        if not hasattr(self, 'session'):
            self.session = requests.session()

        # login after login (interactive)
        if hasattr(self, 'username'):
            log.warning("Attempting to login while another user ({0}) "
                        "is already logged in.".format(self.username))
            self.check_login_status()
            return

        self.username = username

        # Get password from keyring or prompt
        self.password, password_from_keyring = self._get_password(
            "astroquery:www.cosmosim.org", username, reenter=reenter_password)

        # Authenticate
        warnings.warn("Authenticating {0} on www.cosmosim.org..."
                      .format(self.username))
        authenticated = self._request('POST', CosmoSim.QUERY_URL,
                                      auth=(self.username, self.password),
                                      cache=False)
        if authenticated.status_code == 200:
            warnings.warn("Authentication successful!")
        elif (authenticated.status_code == 401 or
              authenticated.status_code == 403):
            warnings.warn("Authentication failed!")
        elif authenticated.status_code == 503:
            warnings.warn("Service Temporarily Unavailable...")

        # Generating dictionary of existing tables
        self._existing_tables()

        if (authenticated.status_code == 200 and
                password_from_keyring is None and store_password):
            keyring.set_password("astroquery:www.cosmosim.org",
                                 self.username, self.password)

        # Delete job; prevent them from piling up with phase PENDING
        if authenticated.status_code == 200:
            soup = BeautifulSoup(authenticated.content, "lxml")
            self.delete_job(jobid=str(soup.find("uws:jobref")["id"]),
                            squash=True)

        return authenticated

[docs]    def logout(self, deletepw=False):
        """
        Public function which allows the user to logout of their cosmosim
        credentials.

        Parameters
        ----------
        deletepw : bool
            A hard logout - delete the password to the associated username
            from the keychain. The default is True.

        Returns
        -------
        """

        if (hasattr(self, 'username') and hasattr(self, 'password') and
                hasattr(self, 'session')):
            if deletepw is True:
                try:
                    keyring.delete_password("astroquery:www.cosmosim.org",
                                            self.username)
                    warnings.warn("Removed password for {0} in the keychain."
                                  .format(self.username))
                except keyring.errors.PasswordDeleteError:
                    warnings.warn("Password for {0} was never stored in the "
                                  "keychain.".format(self.username))

            del self.session
            del self.username
            del self.password
        else:
            log.error("You must log in before attempting to logout.")

[docs]    def check_login_status(self):
        """
        Public function which checks the status of a user login attempt.
        """

        if (hasattr(self, 'username') and hasattr(self, 'password') and
                hasattr(self, 'session')):
            authenticated = self._request('POST', CosmoSim.QUERY_URL,
                                          auth=(self.username, self.password),
                                          cache=False)
            if authenticated.status_code == 200:
                warnings.warn("Status: You are logged in as {0}."
                              .format(self.username))
                soup = BeautifulSoup(authenticated.content, "lxml")
                self.delete_job(jobid=str(soup.find("uws:jobref")["id"]),
                                squash=True)
            else:
                warnings.warn("Status: The username/password combination "
                              "for {0} appears to be incorrect."
                              .format(self.username))
                warnings.warn("Please re-attempt to login with your cosmosim "
                              "credentials.")
        else:
            warnings.warn("Status: You are not logged in.")

[docs]    def run_sql_query(self, query_string, tablename=None, queue=None,
                      mail=None, text=None, cache=True):
        """
        Public function which sends a POST request containing the sql query
        string.

        Parameters
        ----------
        query_string : string
            The sql query to be sent to the CosmoSim.org server.
        tablename : string
            The name of the table for which the query data will be stored
            under. If left blank or if it already exists, one will be
            generated automatically.
        queue : string
            The short/long queue option. Default is short.
        mail : string
            The user's email address for receiving job completion alerts.
        text : string
            The user's cell phone number for receiving job completion alerts.
        cache : bool
            Whether to cache the query locally

        Returns
        -------
        result : jobid
            The jobid of the query
        """

        self._existing_tables()

        if not queue:
            queue = 'short'

        if tablename in self.table_dict.values():
            result = self._request('POST',
                                   CosmoSim.QUERY_URL,
                                   auth=(self.username, self.password),
                                   data={'query': query_string, 'phase': 'run',
                                         'queue': queue},
                                   cache=cache)
            soup = BeautifulSoup(result.content, "lxml")
            phase = soup.find("uws:phase").string
            if phase in ['ERROR']:
                warnings.warn("No table was generated for job with phase "
                              "`{0}`".format(phase))
                gen_tablename = "{0}".format(phase)
            else:
                gen_tablename = str(soup.find(id="table").string)
            log.warning("Table name {0} is already taken."
                        .format(tablename))
            warnings.warn("Generated table name: {0}".format(gen_tablename))
        elif tablename is None:
            result = self._request('POST', CosmoSim.QUERY_URL,
                                   auth=(self.username, self.password),
                                   data={'query': query_string, 'phase': 'run',
                                         'queue': queue},
                                   cache=cache)
        else:
            result = self._request('POST', CosmoSim.QUERY_URL,
                                   auth=(self.username, self.password),
                                   data={'query': query_string,
                                         'table': str(tablename),
                                         'phase': 'run', 'queue': queue},
                                   cache=cache)
            self._existing_tables()

        soup = BeautifulSoup(result.content, "lxml")
        self.current_job = str(soup.find("uws:jobref")["id"])
        warnings.warn("Job created: {}".format(self.current_job))

        if mail or text:
            self._initialize_alerting(self.current_job, mail=mail, text=text)

        return self.current_job

    def _existing_tables(self):
        """
        Internal function which builds a dictionary of the tables already in
        use for a given set of user credentials. Keys are jobids and values
        are the tables which are stored under those keys.
        """
        checkalljobs = self.check_all_jobs()
        completed_jobs = [key for key in self.job_dict.keys()
                          if self.job_dict[key] in ['COMPLETED', 'EXECUTING']]
        soup = BeautifulSoup(checkalljobs.content, "lxml")
        self.table_dict = {}

        for tag in soup.find_all({"uws:jobref"}):
            jobid = tag.get('xlink:href').split('/')[-1]
            if jobid in completed_jobs:
                self.table_dict[jobid] = str(tag.get('id'))

[docs]    def check_job_status(self, jobid=None):
        """
        A public function which sends an http GET request for a given jobid,
        and checks the server status. If no jobid is provided, it uses the most
        recent query (if one exists).

        Parameters
        ----------
        jobid : string
            The jobid of the sql query. If no jobid is given, it attempts to
            use the most recent job (if it exists in this session).

        Returns
        -------
        result : content of `~requests.Response` object
            The requests response phase
        """

        if jobid is None:
            if hasattr(self, 'current_job'):
                jobid = self.current_job
            else:
                jobid = self.current_job

        response = self._request(
            'GET', CosmoSim.QUERY_URL + '/{}'.format(jobid) + '/phase',
            auth=(self.username, self.password), data={'print': 'b'},
            cache=False)

        log.info("Job {}: {}".format(jobid, response.content))
        return response.content

[docs]    def check_all_jobs(self, phase=None, regex=None, sortby=None):
        """
        Public function which builds a dictionary whose keys are each jobid
        for a given set of user credentials and whose values are the phase
        status (e.g. - EXECUTING,COMPLETED,PENDING,ERROR).

        Parameters
        ----------
        phase : list
            A list of phase(s) of jobs to be checked on. If nothing
            provided, all are checked.
        regex : string
            A regular expression to match all tablenames to. Matching table
            names will be included.  Note - Only tables/starttimes are
            associated with jobs which have phase COMPLETED.
        sortby : string
            An option to sort jobs (after phase and regex criteria have been
            taken into account) by either the execution start time
            (``starttime``), or by the table name (``'tablename'``).

        Returns
        -------
        checkalljobs : `~requests.Response` object
            The requests response for the GET request for finding all
            existing jobs.
        """
        checkalljobs = self._request('GET', CosmoSim.QUERY_URL,
                                     auth=(self.username, self.password),
                                     params={'print': 'b'}, cache=False)

        self.job_dict = {}
        soup = BeautifulSoup(checkalljobs.content, "lxml")

        for tag in soup.find_all({"uws:jobref"}):
            tag_phase = str(tag.find('uws:phase').string)
            if tag_phase in ['COMPLETED', 'EXECUTING', 'ABORTED', 'ERROR']:
                self.job_dict['{0}'.format(tag.get('xlink:href')
                                           .split('/')[-1])] = tag_phase
            else:
                self.job_dict[str(tag.get('id'))] = tag_phase

        if phase:
            phase = [phase[i].upper() for i in range(len(phase))]

        if regex:
            pattern = re.compile("{}".format(regex))
            try:
                groups = [pattern.match(self.table_dict.values()[i]).group()
                          for i in range(len(self.table_dict.values()))
                          if pattern.match(self.table_dict.values()[i])
                          is not None]
                matching_tables = [groups[i]
                                   for i in range(len(groups))
                                   if groups[i] in self.table_dict.values()]
            except AttributeError:
                warnings.warn('No tables matching the regular expression '
                              '`{0}` were found.'.format(regex))
                matching_tables = self.table_dict.values()

            if phase:
                if "COMPLETED" not in phase:
                    warnings.warn("No jobs with phase `{0}` matching "
                                  "the regular expression `{1}` were found.\n"
                                  "Matching regular expression `{1}` to all "
                                  "jobs with phase `COMPLETED` instead "
                                  "(unsorted):".format(phase, regex))
                else:
                    matching_tables = [[self.table_dict[i]
                                        for i in self.table_dict.keys()
                                        if self.table_dict[i] == miter and
                                        self.job_dict[i] in phase][0]
                                       for miter in matching_tables]
            self._existing_tables()  # creates a fresh up-to-date table_dict

        self._starttime_dict()

        if not sortby:
            if regex:
                matching = zip(*[[(i, self.job_dict[i], self.starttime_dict[i])
                                  for i in self.table_dict.keys()
                                  if self.table_dict[i] == miter][0]
                                 for miter in matching_tables])
                (matching_jobids, matching_phases,
                 matching_starttimes) = matching
        if sortby:
            if sortby.upper() == "TABLENAME":
                if 'matching_tables' not in locals():
                    matching_tables = sorted(self.table_dict.values())
                else:
                    matching_tables = sorted(matching_tables)
                matching = zip(*[[(i, self.job_dict[i], self.starttime_dict[i])
                                  for i in self.table_dict.keys()
                                  if self.table_dict[i] == miter][0]
                                 for miter in matching_tables])
                (matching_jobids, matching_phases,
                 matching_starttimes) = matching

            elif sortby.upper() == 'STARTTIME':
                if 'matching_tables' not in locals():
                    matching_starttimes = sorted(self.starttime_dict.values())
                    matching = zip(*[[(i, self.job_dict[i], self.table_dict[i])
                                      for i in self.starttime_dict.keys()
                                      if self.starttime_dict[i] == miter][0]
                                     for miter in matching_starttimes])
                    (matching_jobids, matching_phases,
                     matching_tables) = matching
                else:
                    matching_starttimes = [[self.starttime_dict[i]
                                            for i in self.table_dict.keys()
                                            if self.table_dict[i] == miter][0]
                                           for miter in matching_tables]
                    matching = zip(*[[(i, self.job_dict[i], self.table_dict[i])
                                      for i in self.starttime_dict.keys()
                                      if self.starttime_dict[i] == miter][0]
                                     for miter in matching_starttimes])
                    (matching_jobids, matching_phases,
                     matching_tables) = matching

        frame = sys._getframe(1)

        # list of methods which use check_all_jobs() for which I would not
        # like job_dict to be printed to the terminal
        do_not_print_job_dict = ['completed_job_info', 'general_job_info',
                                 'delete_all_jobs', '_existing_tables',
                                 'delete_job', 'download']

        if frame.f_code.co_name in do_not_print_job_dict:
            return checkalljobs
        else:
            if not phase and not regex:
                if not sortby:
                    t = Table()
                    t['JobID'] = list(self.job_dict.keys())
                    t['Phase'] = list(self.job_dict.values())
                    t.pprint()
                else:
                    if sortby.upper() == 'TABLENAME':
                        t = Table()
                        t['Tablename'] = matching_tables
                        t['Starttime'] = matching_starttimes
                        t['JobID'] = matching_jobids
                        t['Phase'] = matching_phases
                        t.pprint()
                    if sortby.upper() == 'STARTTIME':
                        t = Table()
                        t['Starttime'] = matching_starttimes
                        t['Tablename'] = matching_tables
                        t['JobID'] = matching_jobids
                        t['Phase'] = matching_phases
                        t.pprint()

            elif not phase and regex:
                t = Table()
                if sortby:
                    if sortby.upper() == 'STARTTIME':
                        t['Starttime'] = matching_starttimes
                        t['Tablename'] = matching_tables
                    if sortby.upper() == 'TABLENAME':
                        t['Tablename'] = matching_tables
                        t['Starttime'] = matching_starttimes
                if not sortby:
                    t['Tablename'] = matching_tables
                    t['Starttime'] = matching_starttimes
                t['JobID'] = matching_jobids
                t['Phase'] = matching_phases
                t.pprint()

            if phase and not regex:
                if len(phase) == 1 and "COMPLETED" in phase:
                    if not sortby:
                        matching_jobids = [key
                                           for key in self.job_dict.keys()
                                           if self.job_dict[key] in phase]
                        matching = zip(*[[(self.table_dict[i],
                                           self.job_dict[i],
                                           self.starttime_dict[i])
                                          for i in self.table_dict.keys()
                                          if i == miter][0]
                                         for miter in matching_jobids])
                        (matching_tables, matching_phases,
                         matching_starttimes) = matching

                    t = Table()
                    t['JobID'] = matching_jobids
                    t['Phase'] = matching_phases
                    t['Tablename'] = matching_tables
                    t['Starttime'] = matching_starttimes

                    if sortby:
                        if sortby.upper() == 'TABLENAME':
                            t['Tablename',
                              'Starttime', 'JobID', 'Phase'].pprint()
                        if sortby.upper() == 'STARTTIME':
                            t['Starttime',
                              'Tablename', 'JobID', 'Phase'].pprint()
                    else:
                        t.pprint()

                else:
                    if sortby:
                        warnings.warn('Sorting can only be applied to jobs '
                                      'with phase `COMPLETED`.')
                    if not sortby:
                        matching_jobids = [key
                                           for key in self.job_dict.keys()
                                           if self.job_dict[key] in phase]
                        matching_phases = [self.job_dict[key]
                                           for key in self.job_dict.keys()
                                           if self.job_dict[key] in phase]
                        t = Table()
                        t['JobID'] = matching_jobids
                        t['Phase'] = matching_phases
                        t.pprint()

            if phase and regex:
                t = Table()
                t['Tablename'] = matching_tables
                t['Starttime'] = matching_starttimes
                t['JobID'] = matching_jobids
                t['Phase'] = matching_phases

                if sortby:
                    if sortby.upper() == 'TABLENAME':
                        t.pprint()
                    if sortby.upper() == 'STARTTIME':
                        t['Starttime', 'Tablename', 'JobID', 'Phase'].pprint()
                else:
                    t.pprint()

            return checkalljobs

[docs]    def completed_job_info(self, jobid=None, output=False):
        """
        A public function which sends an http GET request for a given
        jobid with phase COMPLETED. If output is True, the function prints
        a dictionary to the screen, while always generating a global
        dictionary ``response_dict_current``. If no jobid is provided,
        a visual of all responses with phase COMPLETED is generated.

        Parameters
        ----------
        jobid : string
            The jobid of the sql query.
        output : bool
            Print output of response(s) to the terminal
        """

        self.check_all_jobs()

        if jobid is None:
            completed_jobids = [key for key in self.job_dict.keys()
                                if self.job_dict[key] == 'COMPLETED']
            response_list = [
                self._request(
                    'GET',
                    CosmoSim.QUERY_URL + "/{}".format(completed_jobids[i]),
                    auth=(self.username, self.password), cache=False)
                for i in range(len(completed_jobids))]
            self.response_dict_current = {}
            for i, vals in enumerate(completed_jobids):
                self.response_dict_current[vals] = (
                    self._generate_response_dict(response_list[i]))
        else:
            if self.job_dict[jobid] == 'COMPLETED':
                response_list = [
                    self._request(
                        'GET', CosmoSim.QUERY_URL + "/{}".format(jobid),
                        auth=(self.username, self.password), cache=False)]
                self.response_dict_current = {}
                self.response_dict_current[jobid] = (
                    self._generate_response_dict(response_list[0]))
            else:
                warnings.warn("JobID must refer to a query with a phase "
                              "of 'COMPLETED'.")
                return

        if output is True:
            dictkeys = self.response_dict_current.keys()
            if len(dictkeys) > 1:
                keys = [i for i in self.response_dict_current.keys()]
                phases = [self.job_dict[key] for key in keys]
                t = Table()
                t['JobID'] = keys
                t['Phase'] = phases
                t.pprint()
                warnings.warn("Use specific jobid to get more information, or "
                              "explore `self.response_dict_current`.")
            elif len(dictkeys) == 1:
                print(self.response_dict_current[dictkeys[0]]['content'])
            else:
                log.error('No completed jobs found.')
            return
        else:
            return

    def _generate_response_dict(self, response):
        """
        A private function which takes in a response object and creates a
        response dictionary.

        Parameters
        ----------
        response : `~requests.Response`
            requests response object

        Returns
        -------
        response_dict : dict
            A dictionary of some of the response object's methods
        """

        R = response
        response_dict = {'content': R.content,
                         'cookies': R.cookies,
                         'elapsed': R.elapsed,
                         'encoding': R.encoding,
                         'headers': R.headers,
                         'ok': R.ok,
                         'request': R.request,
                         'url': R.url}

        return response_dict

    def _starttime_dict(self):
        """
        A private function which generates a dictionary of jobids (must have
        phase COMPLETED) linked to starttimes.
        """

        completed_ids = [key
                         for key in self.job_dict.keys()
                         if self.job_dict[key] == 'COMPLETED']
        response_list = [self._request('GET',
                                       CosmoSim.QUERY_URL + "/{}".format(i),
                                       auth=(self.username, self.password),
                                       cache=False)
                         for i in completed_ids]
        soups = [BeautifulSoup(response_list[i].content, "lxml")
                 for i in range(len(response_list))]
        self.starttime_dict = {}
        for i in range(len(soups)):
            self.starttime_dict[str(completed_ids[i])] = str(
                soups[i].find('uws:starttime').string)

[docs]    def general_job_info(self, jobid=None, output=False):
        """
        A public function which sends an http GET request for a given
        jobid in any phase. If no jobid is provided, a summary of all
        jobs is generated.

        Parameters
        ----------
        jobid : string
            The jobid of the sql query.
        output : bool
            Print output of response(s) to the terminal
        """

        self.check_all_jobs()

        if jobid is None:
            print("Job Summary:\n"
                  "There are {0} jobs with phase: COMPLETED.\n"
                  "There are {1} jobs with phase: ERROR.\n"
                  "There are {2} jobs with phase: ABORTED.\n"
                  "There are {3} jobs with phase: PENDING.\n"
                  "There are {4} jobs with phase: EXECUTING.\n"
                  "There are {5} jobs with phase: QUEUED.\n"
                  "Try providing a jobid for the job you'd like to "
                  "know more about.\n To see a list of all jobs, use "
                  "`check_all_jobs()`."
                  .format(self.job_dict.values().count('COMPLETED'),
                          self.job_dict.values().count('ERROR'),
                          self.job_dict.values().count('ABORTED'),
                          self.job_dict.values().count('PENDING'),
                          self.job_dict.values().count('EXECUTING'),
                          self.job_dict.values().count('QUEUED')))
            return
        else:
            response_list = [self._request(
                'GET', CosmoSim.QUERY_URL + "/{}".format(jobid),
                auth=(self.username, self.password), cache=False)]

            if response_list[0].ok is False:
                log.error('Must provide a valid jobid.')
                return
            else:
                self.response_dict_current = {}
                self.response_dict_current[jobid] = (
                    self._generate_response_dict(response_list[0]))

        if output is True:
            dictkeys = self.response_dict_current.keys()
            print(self.response_dict_current[dictkeys[0]]['content'])
            return
        else:
            return

[docs]    def delete_job(self, jobid=None, squash=None):
        """
        A public function which deletes a stored job from the server in any
        phase.  If no jobid is given, it attempts to use the most recent job
        (if it exists in this session). If jobid is specified, then it
        deletes the corresponding job, and if it happens to match the
        existing current job, that variable gets deleted.

        Parameters
        ----------
        jobid : string
            The jobid of the sql query. If no jobid is given, it attempts to
            use the most recent job (if it exists in this session).
        output : bool
            Print output of response(s) to the terminal

        Returns
        -------
        result : list
            A list of response object(s)

        """

        self.check_all_jobs()

        if jobid is None:
            if hasattr(self, 'current_job'):
                jobid = self.current_job

        if jobid:
            if hasattr(self, 'current_job'):
                if jobid == self.current_job:
                    del self.current_job

        if self.job_dict[jobid] in ['COMPLETED', 'ERROR',
                                    'ABORTED', 'PENDING']:
            result = self.session.delete(
                CosmoSim.QUERY_URL + "/{}".format(jobid),
                auth=(self.username, self.password), data={'follow': ''})

        else:
            warnings.warn("Can only delete a job with phase: "
                          "'COMPLETED', 'ERROR', 'ABORTED', or 'PENDING'.")
            return

        if not result.ok:
            result.raise_for_status()
        if squash is None:
            warnings.warn('Deleted job: {}'.format(jobid))

        return result

[docs]    def abort_job(self, jobid=None):
        """
        """

        self.check_all_jobs()

[docs]    def delete_all_jobs(self, phase=None, regex=None):
        """
        A public function which deletes any/all jobs from the server in any
        phase and/or with its tablename matching any desired regular
        expression.

        Parameters
        ----------
        phase : list
            A list of job phases to be deleted. If nothing provided, all are
            deleted.
        regex : string
            A regular expression to match all tablenames to. Matching table
            names will be deleted.
        """

        self.check_all_jobs()

        if regex:
            pattern = re.compile("{}".format(regex))
            groups = [pattern.match(self.table_dict.values()[i]).group()
                      for i in range(len(self.table_dict.values()))]
            matching_tables = [groups[i] for i in range(len(groups))
                               if groups[i] in self.table_dict.values()]

        if phase:
            phase = [phase[i].upper() for i in range(len(phase))]
            if regex:
                for key in self.job_dict.keys():
                    if self.job_dict[key] in phase:
                        if key in self.table_dict.keys():
                            if self.table_dict[key] in matching_tables:
                                result = self.session.delete(
                                    CosmoSim.QUERY_URL + "/{}".format(key),
                                    auth=(self.username, self.password),
                                    data={'follow': ''})
                                if not result.ok:
                                    result.raise_for_status()
                                warnings.warn("Deleted job: {0} (Table: {1})"
                                              .format(key,
                                                      self.table_dict[key]))
            if not regex:
                for key in self.job_dict.keys():
                    if self.job_dict[key] in phase:
                        result = self.session.delete(
                            CosmoSim.QUERY_URL + "/{}".format(key),
                            auth=(self.username, self.password),
                            data={'follow': ''})
                        if not result.ok:
                            result.raise_for_status()
                        warnings.warn("Deleted job: {}".format(key))

        if not phase:
            if regex:
                for key in self.job_dict.keys():
                    if key in self.table_dict.keys():
                        if self.table_dict[key] in matching_tables:
                            result = self.session.delete(
                                CosmoSim.QUERY_URL + "/{}".format(key),
                                auth=(self.username, self.password),
                                data={'follow': ''})
                            if not result.ok:
                                result.raise_for_status()
                            warnings.warn("Deleted job: {0} (Table: {1})"
                                          .format(key, self.table_dict[key]))
            if not regex:
                for key in self.job_dict.keys():
                    result = self.session.delete(
                        CosmoSim.QUERY_URL + "/{}".format(key),
                        auth=(self.username, self.password),
                        data={'follow': ''})
                    if not result.ok:
                        result.raise_for_status()
                    warnings.warn("Deleted job: {}".format(key))

        self._existing_tables()
        return

    def _generate_schema(self):
        """
        Internal function which builds a schema of all simulations within
        the database (in the form of a dictionary).
        """

        response = self._request('GET', CosmoSim.SCHEMA_URL,
                                 auth=(self.username, self.password),
                                 headers={'Accept': 'application/json'},
                                 cache=False)
        data = response.json()
        self.db_dict = {}
        for i in range(len(data['databases'])):
            self.db_dict[str(data['databases'][i]['name'])] = {}

            sstr = str(data['databases'][i]['name'])
            sid = str(data['databases'][i]['id'])
            self.db_dict[sstr]['id'] = sid
            sdesc = str(data['databases'][i]['description'])
            self.db_dict[sstr]['description'] = sdesc
            self.db_dict[sstr]['tables'] = {}
            for j in range(len(data['databases'][i]['tables'])):
                sstr2 = str(data['databases'][i]['tables'][j]['name'])
                self.db_dict[sstr]['tables'][sstr2] = {}
                sdata = data['databases'][i]['tables'][j]['id']
                self.db_dict[sstr]['tables'][sstr2]['id'] = sdata
                sdesc2 = data['databases'][i]['tables'][j]['description']
                self.db_dict[sstr]['tables'][sstr2]['description'] = sdesc2
                self.db_dict[sstr]['tables'][sstr2]['columns'] = {}
                tmpval = len(data['databases'][i]['tables'][j]['columns'])
                for k in range(tmpval):
                    sdata2 = data['databases'][i]['tables'][j]['columns'][k]
                    sdata2_id = sdata2['id']
                    sstr3 = str(sdata2['name'])

                    sdesc3 = sdata2['description']
                    self.db_dict[sstr]['tables'][sstr2]['columns'][sstr3] = {
                        'id': sdata2_id,
                        'description': sdesc3}
        return response

[docs]    def explore_db(self, db=None, table=None, col=None):
        """
        A public function which allows for the exploration of any simulation
        and its tables within the database. This function is meant to aid
        the user in constructing sql queries.

        Parameters
        ----------
        db : string
            The database to explore.
        table : string
            The table to explore.
        col : string
            The column to explore.
        """

        try:
            self.db_dict
        except AttributeError:
            self._generate_schema()

        projects = np.sort(list(self.db_dict.keys()))
        largest = max([len(projects[i]) for i in range(len(projects))])
        t = Table()
        # db not specified
        if not db:
            warnings.warn("Must first specify a database.")
            proj_list = []
            attr_list = []
            info_list = []
            tmp2_largest = 0
            for proj in projects:
                size = len((self.db_dict['{0}'.format(proj)].keys()))
                proj_list += (['@ {}'.format(proj)] +
                              ['' for i in range(size - 1)] +
                              ['-' * (largest + 2)])
                tmp_largest = max([len(str(key))
                                   for key in self.db_dict[proj].keys()])
                attr_list += (['@ {}'.format(key)
                              if isinstance(self.db_dict[proj][key], dict)
                              else '{}:'.format(key)
                              for key in self.db_dict[proj].keys()] +
                              ['-' * (tmp_largest + 2)])
                tmpinfosize = max([len(self.db_dict[proj][key])
                                   if isinstance(self.db_dict[proj][key], str)
                                   else 0
                                   for key in self.db_dict[proj].keys()])
                if tmpinfosize > tmp2_largest:
                    tmp2_largest = tmpinfosize
            for proj in projects:
                info_list += ([self.db_dict[proj][key]
                              if isinstance(self.db_dict[proj][key], str)
                              else ""
                              for key in self.db_dict[proj].keys()] +
                              ['-' * tmp2_largest])
            t['Projects'] = proj_list
            t['Project Items'] = attr_list
            t['Information'] = info_list
            t.pprint()
        # db specified
        if db:
            try:
                size1 = len(list(self.db_dict[str(db)].keys()))
                slist = [list(self.db_dict[db][key].keys())
                         if isinstance(self.db_dict[db][key], dict)
                         else key
                         for key in self.db_dict[db].keys()]
                size2 = len(max(slist, key=np.size))
            except (KeyError, NameError):
                log.error("Must first specify a valid database.")
                return
            # if col is specified, table must be specified, and I need to
            # check the max size of any given column in the structure
            if table:
                try:
                    size2 = max(size2, len(list(self.db_dict[db]['tables']
                                           [table]['columns'].keys())))
                    if col:
                        try:
                            size2 = max(size2, len(list(self.db_dict[db]['tables']
                                                   [table]['columns']
                                                   [col].keys())))
                        except(KeyError, NameError):
                            log.error("Must first specify a valid column "
                                      "of the `{0}` table within the `{1}`"
                                      " db".format(table, db))
                            return
                except (KeyError, NameError):
                    log.error("Must first specify a valid table within "
                              "the `{0}` db.".format(db))
                    return

            t['Projects'] = (['--> @ {}:'.format(db)] +
                             ['' for i in range(size2 - 1)])
            t['Project Items'] = (
                ['--> @ {}:'.format(key)
                 if (isinstance(self.db_dict[db][key], dict) and
                     (len(list(self.db_dict[db][key].keys())) ==
                      len(list(self.db_dict[db]['tables'].keys()))))
                 else '@ {}'.format(key)
                 if (isinstance(self.db_dict[db][key], dict) and
                     (len(list(self.db_dict[db][key].keys())) !=
                      len(self.db_dict[db]['tables'].keys())))
                 else str(key)
                 for key in self.db_dict[db].keys()] +
                ['' for i in range(size2 - size1)])
            # if only db is specified
            if not table:
                if not col:
                    reordered = sorted(max(slist, key=np.size), key=len)
                    t['Tables'] = ['@ {}'.format(i)
                                   if isinstance(self.db_dict[db]['tables'][i],
                                                 dict)
                                   else str(i)
                                   for i in reordered]
            # if table has been specified
            else:
                reordered = (
                    [str(table)] +
                    sorted([key for key in self.db_dict[db]['tables'].keys()
                            if key != table], key=len))
                t['Tables'] = (
                    ['--> @ {}:'.format(i)
                     if (i == table and
                         isinstance(self.db_dict[db]['tables'][i], dict))
                     else '@ {}'.format(i)
                     if (i != table and
                         isinstance(self.db_dict[db]['tables'][i], dict))
                     else str(i)
                     for i in reordered] +
                    ['' for j in range(size2 - len(reordered))])

                # if column has been specified
                if col:
                    tblcols_dict = self.db_dict[db]['tables'][table].keys()
                    t['Table Items'] = (
                        ['--> @ columns:'] +
                        [i for i in tblcols_dict if i != 'columns'] +
                        ['' for j in range(size2 - len(tblcols_dict))])
                    col_dict = (self.db_dict[db]['tables'][table]
                                ['columns'].keys())
                    reordered = ([str(col)] +
                                 [i for i in col_dict if i != col])

                    temp_columns = []

                    columns = self.db_dict[db]['tables'][table]['columns']
                    for i in reordered:
                        c = columns[i]
                        if isinstance(c, dict) and i == col:
                            temp_columns.append('--> @ {}:'.format(i))
                        elif not isinstance(c, dict) and i == col:
                            temp_columns.append('--> {}:'.format(i))
                        elif isinstance(c, dict) and i != col:
                            temp_columns.append('@ {}'.format(i))
                        else:
                            temp_columns.append(str(i))

                    if len(col_dict) < size2:
                        size_diff = size2 - len(col_dict)
                        t['Columns'] = (temp_columns +
                                        ['' for j in range(size_diff)])

                        colinfo_dict = col_dict = columns[col]
                        t['Col. Info'] = (
                            ['{} : {}'.format(i, colinfo_dict[i])
                             for i in colinfo_dict.keys()] +
                            ['' for j in range(size2 - len(colinfo_dict))])
                    else:
                        t['Columns'] = temp_columns

                # if column has not been specified
                else:
                    tblcols_dict = self.db_dict[db]['tables'][table].keys()
                    col_dict = (
                        self.db_dict[db]['tables'][table]['columns'].keys())
                    reordered = sorted(col_dict, key=len)
                    if len(tblcols_dict) < size2:
                        size_diff = size2 - len(tblcols_dict)
                        tmp_table = self.db_dict[db]['tables'][table]
                        t['Table Items'] = (
                            ['@ {}'.format(i) if isinstance(tmp_table[i], dict)
                             else '{}:'.format(i) for i in tblcols_dict] +
                            ['' for i in range(size_diff)])
                        t['Table Info'] = (
                            [str(tmp_table[i])
                             if not isinstance(tmp_table[i], dict)
                             else '' for i in tblcols_dict] +
                            ['' for i in range(size_diff)])
                        if len(col_dict) < size2:
                            t['Columns'] = (
                                ['@ {}'.format(i)
                                 if isinstance(tmp_table['columns'][i], dict)
                                 else str(i)
                                 for i in reordered] +
                                ['' for i in range(size2 - len(col_dict))])
                        else:
                            t['Columns'] = reordered
                    else:
                        t['Table Items'] = tblcols_dict
            t.pprint()

[docs]    def download(self, jobid=None, filename=None, format=None, cache=True):
        """
        A public function to download data from a job with COMPLETED phase.

        Parameters
        ----------
        jobid :
            Completed jobid to be downloaded
        filename : str
            If left blank, downloaded to the terminal. If specified, data is
            written out to file (directory can be included here).
        format : str
            The format of the data to be downloaded. Options are ``'csv'``,
            ``'votable'``, ``'votableB1'``, and ``'votableB2'``.
        cache : bool
            Whether to cache the data. By default, this is set to True.

        Returns
        -------
        headers, data : list, list
        """

        self.check_all_jobs()

        if not jobid:
            try:
                jobid = self.current_job
            except AttributeError:
                warnings.warn("No current job has been defined for "
                              "this session.")
                return

        if self.job_dict[str(jobid)] == 'COMPLETED':
            if not format:
                warnings.warn("Must specify a format:")
                t = Table()
                t['Format'] = ['csv', 'votable', 'votableB1', 'votableB2']
                t['Description'] = ['Comma-separated values file',
                                    'Put In Description',
                                    'Put In Description',
                                    'Put In Description']
                t.pprint()
            if format:
                results = self._request(
                    'GET', self.QUERY_URL + "/{}/results".format(jobid),
                    auth=(self.username, self.password))
                soup = BeautifulSoup(results.content, "lxml")
                urls = [i.get('xlink:href')
                        for i in soup.findAll({'uws:result'})]
                formatlist = [urls[i].split('/')[-1].upper()
                              for i in range(len(urls))]

            if format.upper() in formatlist:
                index = formatlist.index(format.upper())
                downloadurl = urls[index]
                if filename:
                    self._download_file(downloadurl,
                                        local_filepath=filename,
                                        auth=(self.username, self.password))
                elif not filename:
                    if format.upper() == 'CSV':
                        raw_table_data = self._request(
                            'GET', downloadurl,
                            auth=(self.username, self.password),
                            cache=cache).content
                        raw_headers = raw_table_data.split('\n')[0]
                        num_cols = len(raw_headers.split(','))
                        num_rows = len(raw_table_data.split('\n')) - 2
                        headers = [raw_headers.split(',')[i].strip('"')
                                   for i in range(num_cols)]
                        raw_data = [
                            raw_table_data.split('\n')[i + 1].split(",")
                            for i in range(num_rows)]
                        data = [map(eval, raw_data[i])
                                for i in range(num_rows)]
                        return headers, data
                    elif format.upper() in ['VOTABLEB1', 'VOTABLEB2']:
                        warnings.warn("Cannot view binary versions of votable "
                                      "within the terminal.\n Try saving them "
                                      "to the disk with the 'filename' option")
                        return
                    elif format.upper() == 'VOTABLE':
                        # for terminal output, get data in csv format
                        tmp_downloadurl = urls[formatlist.index('CSV')]
                        raw_table_data = self._request(
                            'GET', tmp_downloadurl,
                            auth=(self.username, self.password),
                            cache=cache).content
                        raw_headers = raw_table_data.split('\n')[0]
                        num_cols = len(raw_headers.split(','))
                        num_rows = len(raw_table_data.split('\n')) - 2
                        headers = [raw_headers.split(',')[i].strip('"')
                                   for i in range(num_cols)]
                        raw_data = [
                            raw_table_data.split('\n')[i + 1].split(",")
                            for i in range(num_rows)]
                        data = [map(eval, raw_data[i])
                                for i in range(num_rows)]
                        # store in astropy.Table object
                        tbl = Table(data=map(list, zip(*data)), names=headers)
                        # convert to votable format
                        votbl = votable.from_table(tbl)
                        return votbl

            elif format.upper() not in formatlist:
                print('Format not recognized. Please see formatting options:')
                t = Table()
                t['Format'] = ['csv', 'votable', 'votableB1', 'votableB2']
                t['Description'] = ['Comma-Separated Values File',
                                    'IVOA VOTable Format',
                                    'IVOA VOTable Format, Binary 1',
                                    'IVOA VOTable Format, Binary 2']
                t.pprint()

    def _check_phase(self, jobid):
        """
        A private function which checks the job phase of a query.

        Parameters
        ----------
        jobid : string
            The jobid of the sql query.
        """

        self._existing_tables()

        time.sleep(1)

        if jobid not in self.job_dict.keys():
            log.error("Job not present in job dictionary.")
            return

        else:
            phase = self.job_dict[str(jobid)]
            return phase

    def _mail(self, to, subject, text, *attach):
        """
        A private function which sends an SMS message to an email address.

        Parameters
        ----------
        to : string
            The email address receiving the job alert.
        subject : string
            The subject of the job alert.
        text : string
            The content of the job alert.
        """

        msg = MIMEMultipart()
        msg['From'] = self._smsaddress
        msg['To'] = to
        msg['Subject'] = subject
        msg.attach(MIMEText(text))
        n = len(attach)
        for i in range(n):
            part = MIMEBase('application', 'octet-stream')
            part.set_payload(open(attach[i], 'rb').read())
            message.email.Encoders.encode_base64(part)
            part.add_header('Content-Disposition', 'attachment; filename="{0}"'
                            .format(os.path.basename(attach[i])))
            msg.attach(part)
        mailServer = smtplib.SMTP('smtp.gmail.com', 587)
        mailServer.ehlo()
        mailServer.starttls()
        mailServer.ehlo()
        mailServer.login(self._smsaddress, self._smspw)
        mailServer.sendmail(self._smsaddress, to, msg.as_string())
        mailServer.quit()

    def _text(self, fromwhom, number, text):
        """
        A private function which sends an SMS message to a cell phone number.

        Parameters
        ----------
        fromwhom : string
            The email address sending the alert:
            "donotreply.astroquery.cosmosim@gmail.com"
        number : string
            The user-provided cell phone receiving the job alert.
        text : string
            The content of the job alert.
        """

        server = smtplib.SMTP("smtp.gmail.com", 587)
        server.starttls()
        server.login(self._smsaddress, self._smspw)
        server.sendmail(str(fromwhom), '{}@vtext.com'.format(number),
                        str(text))
        server.quit()

    def _initialize_alerting(self, jobid, mail=None, text=None):
        """
        A private function which initializes the email/text alert service
        credentials.  Also preemptively checks for job phase being
        COMPLETED, ABORTED, or ERROR so that users don't simply send alerts
        for old jobs.

        Parameters
        ----------
        jobid : string
            The jobid of the sql query.
        mail : string
            The user-provided email address receiving the job alert.
        text : string
            The user-provided cell phone receiving the job alert.
        """

        self._smsaddress = "donotreply.astroquery.cosmosim@gmail.com"
        password_from_keyring = keyring.get_password(
            "astroquery:cosmosim.SMSAlert", self._smsaddress)

        if password_from_keyring:
            self._smspw = password_from_keyring

        if not password_from_keyring:
            log.warning("CosmoSim SMS alerting has not been initialized.")
            warnings.warn("Initializing SMS alerting.")
            keyring.set_password("astroquery:cosmosim.SMSAlert",
                                 self._smsaddress, "LambdaCDM")

        self.alert_email = mail
        self.alert_text = text

        # first check to see if the job has errored (or is a job that has
        # already completed) before running on a loop
        phase = self._check_phase(jobid)
        if phase in ['COMPLETED', 'ABORTED', 'ERROR']:
            warnings.warn("JobID {0} has finished with status {1}."
                          .format(jobid, phase))
            self.alert_completed = True
        elif phase in ['EXECUTING', 'PENDING', 'QUEUED']:
            self.alert_completed = False
        else:
            self.alert_completed = False


class AlertThread:
    """ Alert threading class

    The _alert() method will be started and it will run in the background
    until the application exits.
    """

    def __init__(self, jobid, queue='short'):
        """
        Parameters
        ----------
        jobid : string
            The jobid of the sql query.
        queue : string
            The short/long queue option. Default is short.
        """
        self.jobid = jobid
        self.queue = queue

        thread = threading.Thread(target=self._alert, args=(self.jobid,
                                                            self.queue))
        thread.daemon = True                            # Daemonize thread
        thread.start()                                  # Start the execution

    def _alert(self, jobid, queue):
        """
        A private function which runs checks for job completion every 10
        seconds for short-queue jobs and 60 seconds for long-queue
        jobs. Once job phase is COMPLETED, ERROR, or ABORTED, emails and/or
        texts the results of the query to the user.

        Parameters
        ----------
        jobid : string
            The jobid of the sql query.
        queue : string
            The short/long queue option. Default is short.
        """

        if queue == 'long':
            deltat = 60
        else:
            deltat = 10

        while self.alert_completed is False:

            phase = self._check_phase(jobid)
            if phase in ['COMPLETED', 'ABORTED', 'ERROR']:
                warnings.warn("JobID {0} has finished with status {1}."
                              .format(jobid, phase))
                self.alert_completed = True
                time.sleep(1)
                self.general_job_info(jobid)
                if self.alert_email:
                    self._mail(
                        self.alert_email, ("Job {0} Completed with phase {1}."
                                           .format(jobid, phase)),
                        "{}".format(
                            self.response_dict_current[jobid]['content']))

                if self.alert_text:
                    self._text(self._smsaddress,
                               self.alert_text,
                               ("Job {0} Completed with phase {1}."
                                .format(jobid, phase)))

            time.sleep(deltat)


CosmoSim = CosmoSimClass()
Navigation

Source code for astroquery.cosmosim.core