Source code for astroquery.cosmosim.core

# Licensed under a 3-clause BSD style license - see LICENSE.rst


import requests
import warnings
import numpy as np
import sys
from bs4 import BeautifulSoup
import keyring
import time
import smtplib
import re
import os
import threading
from email.mime.multipart import MIMEMultipart
from email.mime.base import MIMEBase, message
from email.mime.text import MIMEText

# Astropy imports
from astropy.table import Table
import astropy.io.votable as votable
from astroquery import log

# Astroquery imports
from ..query import QueryWithLogin
from . import conf
from ..exceptions import LoginError

conf.max_lines = -1
conf.max_width = -1

__all__ = ['CosmoSim']


[docs]class CosmoSimClass(QueryWithLogin): QUERY_URL = conf.query_url SCHEMA_URL = conf.schema_url TIMEOUT = conf.timeout USERNAME = conf.username def __init__(self): super(CosmoSimClass, self).__init__() def _login(self, username=None, password=None, store_password=False, reenter_password=False): """ Login to the CosmoSim database. Parameters ---------- username : str, optional Username to the CosmoSim database. If not given, it should be specified in the config file. store_password : bool, optional Stores the password securely in your keyring. Default is False. reenter_password : bool, optional Asks for the password even if it is already stored in the keyring. This is the way to overwrite an already stored passwork on the keyring. Default is False. """ if username is None: if self.USERNAME == "": raise LoginError("If you do not pass a username to login(), " "you should configure a default one!") else: username = self.USERNAME # login after logging out (interactive) if not hasattr(self, 'session'): self.session = requests.session() # login after login (interactive) if hasattr(self, 'username'): log.warning("Attempting to login while another user ({0}) " "is already logged in.".format(self.username)) self.check_login_status() return self.username = username # Get password from keyring or prompt self.password, password_from_keyring = self._get_password( "astroquery:www.cosmosim.org", username, reenter=reenter_password) # Authenticate warnings.warn("Authenticating {0} on www.cosmosim.org..." .format(self.username)) authenticated = self._request('POST', CosmoSim.QUERY_URL, auth=(self.username, self.password), cache=False) if authenticated.status_code == 200: warnings.warn("Authentication successful!") elif (authenticated.status_code == 401 or authenticated.status_code == 403): warnings.warn("Authentication failed!") elif authenticated.status_code == 503: warnings.warn("Service Temporarily Unavailable...") # Generating dictionary of existing tables self._existing_tables() if (authenticated.status_code == 200 and password_from_keyring is None and store_password): keyring.set_password("astroquery:www.cosmosim.org", self.username, self.password) # Delete job; prevent them from piling up with phase PENDING if authenticated.status_code == 200: soup = BeautifulSoup(authenticated.content, "lxml") self.delete_job(jobid=str(soup.find("uws:jobref")["id"]), squash=True) return authenticated
[docs] def logout(self, deletepw=False): """ Public function which allows the user to logout of their cosmosim credentials. Parameters ---------- deletepw : bool A hard logout - delete the password to the associated username from the keychain. The default is True. Returns ------- """ if (hasattr(self, 'username') and hasattr(self, 'password') and hasattr(self, 'session')): if deletepw is True: try: keyring.delete_password("astroquery:www.cosmosim.org", self.username) warnings.warn("Removed password for {0} in the keychain." .format(self.username)) except keyring.errors.PasswordDeleteError: warnings.warn("Password for {0} was never stored in the " "keychain.".format(self.username)) del self.session del self.username del self.password else: log.error("You must log in before attempting to logout.")
[docs] def check_login_status(self): """ Public function which checks the status of a user login attempt. """ if (hasattr(self, 'username') and hasattr(self, 'password') and hasattr(self, 'session')): authenticated = self._request('POST', CosmoSim.QUERY_URL, auth=(self.username, self.password), cache=False) if authenticated.status_code == 200: warnings.warn("Status: You are logged in as {0}." .format(self.username)) soup = BeautifulSoup(authenticated.content, "lxml") self.delete_job(jobid=str(soup.find("uws:jobref")["id"]), squash=True) else: warnings.warn("Status: The username/password combination " "for {0} appears to be incorrect." .format(self.username)) warnings.warn("Please re-attempt to login with your cosmosim " "credentials.") else: warnings.warn("Status: You are not logged in.")
[docs] def run_sql_query(self, query_string, tablename=None, queue=None, mail=None, text=None, cache=True): """ Public function which sends a POST request containing the sql query string. Parameters ---------- query_string : string The sql query to be sent to the CosmoSim.org server. tablename : string The name of the table for which the query data will be stored under. If left blank or if it already exists, one will be generated automatically. queue : string The short/long queue option. Default is short. mail : string The user's email address for receiving job completion alerts. text : string The user's cell phone number for receiving job completion alerts. cache : bool Whether to cache the query locally Returns ------- result : jobid The jobid of the query """ self._existing_tables() if not queue: queue = 'short' if tablename in self.table_dict.values(): result = self._request('POST', CosmoSim.QUERY_URL, auth=(self.username, self.password), data={'query': query_string, 'phase': 'run', 'queue': queue}, cache=cache) soup = BeautifulSoup(result.content, "lxml") phase = soup.find("uws:phase").string if phase in ['ERROR']: warnings.warn("No table was generated for job with phase " "`{0}`".format(phase)) gen_tablename = "{0}".format(phase) else: gen_tablename = str(soup.find(id="table").string) log.warning("Table name {0} is already taken." .format(tablename)) warnings.warn("Generated table name: {0}".format(gen_tablename)) elif tablename is None: result = self._request('POST', CosmoSim.QUERY_URL, auth=(self.username, self.password), data={'query': query_string, 'phase': 'run', 'queue': queue}, cache=cache) else: result = self._request('POST', CosmoSim.QUERY_URL, auth=(self.username, self.password), data={'query': query_string, 'table': str(tablename), 'phase': 'run', 'queue': queue}, cache=cache) self._existing_tables() soup = BeautifulSoup(result.content, "lxml") self.current_job = str(soup.find("uws:jobref")["id"]) warnings.warn("Job created: {}".format(self.current_job)) if mail or text: self._initialize_alerting(self.current_job, mail=mail, text=text) return self.current_job
def _existing_tables(self): """ Internal function which builds a dictionary of the tables already in use for a given set of user credentials. Keys are jobids and values are the tables which are stored under those keys. """ checkalljobs = self.check_all_jobs() completed_jobs = [key for key in self.job_dict.keys() if self.job_dict[key] in ['COMPLETED', 'EXECUTING']] soup = BeautifulSoup(checkalljobs.content, "lxml") self.table_dict = {} for tag in soup.find_all({"uws:jobref"}): jobid = tag.get('xlink:href').split('/')[-1] if jobid in completed_jobs: self.table_dict[jobid] = str(tag.get('id'))
[docs] def check_job_status(self, jobid=None): """ A public function which sends an http GET request for a given jobid, and checks the server status. If no jobid is provided, it uses the most recent query (if one exists). Parameters ---------- jobid : string The jobid of the sql query. If no jobid is given, it attempts to use the most recent job (if it exists in this session). Returns ------- result : content of `~requests.Response` object The requests response phase """ if jobid is None: if hasattr(self, 'current_job'): jobid = self.current_job else: jobid = self.current_job response = self._request( 'GET', CosmoSim.QUERY_URL + '/{}'.format(jobid) + '/phase', auth=(self.username, self.password), data={'print': 'b'}, cache=False) log.info("Job {}: {}".format(jobid, response.content)) return response.content
[docs] def check_all_jobs(self, phase=None, regex=None, sortby=None): """ Public function which builds a dictionary whose keys are each jobid for a given set of user credentials and whose values are the phase status (e.g. - EXECUTING,COMPLETED,PENDING,ERROR). Parameters ---------- phase : list A list of phase(s) of jobs to be checked on. If nothing provided, all are checked. regex : string A regular expression to match all tablenames to. Matching table names will be included. Note - Only tables/starttimes are associated with jobs which have phase COMPLETED. sortby : string An option to sort jobs (after phase and regex criteria have been taken into account) by either the execution start time (``starttime``), or by the table name (``'tablename'``). Returns ------- checkalljobs : `~requests.Response` object The requests response for the GET request for finding all existing jobs. """ checkalljobs = self._request('GET', CosmoSim.QUERY_URL, auth=(self.username, self.password), params={'print': 'b'}, cache=False) self.job_dict = {} soup = BeautifulSoup(checkalljobs.content, "lxml") for tag in soup.find_all({"uws:jobref"}): tag_phase = str(tag.find('uws:phase').string) if tag_phase in ['COMPLETED', 'EXECUTING', 'ABORTED', 'ERROR']: self.job_dict['{0}'.format(tag.get('xlink:href') .split('/')[-1])] = tag_phase else: self.job_dict[str(tag.get('id'))] = tag_phase if phase: phase = [phase[i].upper() for i in range(len(phase))] if regex: pattern = re.compile("{}".format(regex)) try: groups = [pattern.match(self.table_dict.values()[i]).group() for i in range(len(self.table_dict.values())) if pattern.match(self.table_dict.values()[i]) is not None] matching_tables = [groups[i] for i in range(len(groups)) if groups[i] in self.table_dict.values()] except AttributeError: warnings.warn('No tables matching the regular expression ' '`{0}` were found.'.format(regex)) matching_tables = self.table_dict.values() if phase: if "COMPLETED" not in phase: warnings.warn("No jobs with phase `{0}` matching " "the regular expression `{1}` were found.\n" "Matching regular expression `{1}` to all " "jobs with phase `COMPLETED` instead " "(unsorted):".format(phase, regex)) else: matching_tables = [[self.table_dict[i] for i in self.table_dict.keys() if self.table_dict[i] == miter and self.job_dict[i] in phase][0] for miter in matching_tables] self._existing_tables() # creates a fresh up-to-date table_dict self._starttime_dict() if not sortby: if regex: matching = zip(*[[(i, self.job_dict[i], self.starttime_dict[i]) for i in self.table_dict.keys() if self.table_dict[i] == miter][0] for miter in matching_tables]) (matching_jobids, matching_phases, matching_starttimes) = matching if sortby: if sortby.upper() == "TABLENAME": if 'matching_tables' not in locals(): matching_tables = sorted(self.table_dict.values()) else: matching_tables = sorted(matching_tables) matching = zip(*[[(i, self.job_dict[i], self.starttime_dict[i]) for i in self.table_dict.keys() if self.table_dict[i] == miter][0] for miter in matching_tables]) (matching_jobids, matching_phases, matching_starttimes) = matching elif sortby.upper() == 'STARTTIME': if 'matching_tables' not in locals(): matching_starttimes = sorted(self.starttime_dict.values()) matching = zip(*[[(i, self.job_dict[i], self.table_dict[i]) for i in self.starttime_dict.keys() if self.starttime_dict[i] == miter][0] for miter in matching_starttimes]) (matching_jobids, matching_phases, matching_tables) = matching else: matching_starttimes = [[self.starttime_dict[i] for i in self.table_dict.keys() if self.table_dict[i] == miter][0] for miter in matching_tables] matching = zip(*[[(i, self.job_dict[i], self.table_dict[i]) for i in self.starttime_dict.keys() if self.starttime_dict[i] == miter][0] for miter in matching_starttimes]) (matching_jobids, matching_phases, matching_tables) = matching frame = sys._getframe(1) # list of methods which use check_all_jobs() for which I would not # like job_dict to be printed to the terminal do_not_print_job_dict = ['completed_job_info', 'general_job_info', 'delete_all_jobs', '_existing_tables', 'delete_job', 'download'] if frame.f_code.co_name in do_not_print_job_dict: return checkalljobs else: if not phase and not regex: if not sortby: t = Table() t['JobID'] = list(self.job_dict.keys()) t['Phase'] = list(self.job_dict.values()) t.pprint() else: if sortby.upper() == 'TABLENAME': t = Table() t['Tablename'] = matching_tables t['Starttime'] = matching_starttimes t['JobID'] = matching_jobids t['Phase'] = matching_phases t.pprint() if sortby.upper() == 'STARTTIME': t = Table() t['Starttime'] = matching_starttimes t['Tablename'] = matching_tables t['JobID'] = matching_jobids t['Phase'] = matching_phases t.pprint() elif not phase and regex: t = Table() if sortby: if sortby.upper() == 'STARTTIME': t['Starttime'] = matching_starttimes t['Tablename'] = matching_tables if sortby.upper() == 'TABLENAME': t['Tablename'] = matching_tables t['Starttime'] = matching_starttimes if not sortby: t['Tablename'] = matching_tables t['Starttime'] = matching_starttimes t['JobID'] = matching_jobids t['Phase'] = matching_phases t.pprint() if phase and not regex: if len(phase) == 1 and "COMPLETED" in phase: if not sortby: matching_jobids = [key for key in self.job_dict.keys() if self.job_dict[key] in phase] matching = zip(*[[(self.table_dict[i], self.job_dict[i], self.starttime_dict[i]) for i in self.table_dict.keys() if i == miter][0] for miter in matching_jobids]) (matching_tables, matching_phases, matching_starttimes) = matching t = Table() t['JobID'] = matching_jobids t['Phase'] = matching_phases t['Tablename'] = matching_tables t['Starttime'] = matching_starttimes if sortby: if sortby.upper() == 'TABLENAME': t['Tablename', 'Starttime', 'JobID', 'Phase'].pprint() if sortby.upper() == 'STARTTIME': t['Starttime', 'Tablename', 'JobID', 'Phase'].pprint() else: t.pprint() else: if sortby: warnings.warn('Sorting can only be applied to jobs ' 'with phase `COMPLETED`.') if not sortby: matching_jobids = [key for key in self.job_dict.keys() if self.job_dict[key] in phase] matching_phases = [self.job_dict[key] for key in self.job_dict.keys() if self.job_dict[key] in phase] t = Table() t['JobID'] = matching_jobids t['Phase'] = matching_phases t.pprint() if phase and regex: t = Table() t['Tablename'] = matching_tables t['Starttime'] = matching_starttimes t['JobID'] = matching_jobids t['Phase'] = matching_phases if sortby: if sortby.upper() == 'TABLENAME': t.pprint() if sortby.upper() == 'STARTTIME': t['Starttime', 'Tablename', 'JobID', 'Phase'].pprint() else: t.pprint() return checkalljobs
[docs] def completed_job_info(self, jobid=None, output=False): """ A public function which sends an http GET request for a given jobid with phase COMPLETED. If output is True, the function prints a dictionary to the screen, while always generating a global dictionary ``response_dict_current``. If no jobid is provided, a visual of all responses with phase COMPLETED is generated. Parameters ---------- jobid : string The jobid of the sql query. output : bool Print output of response(s) to the terminal """ self.check_all_jobs() if jobid is None: completed_jobids = [key for key in self.job_dict.keys() if self.job_dict[key] == 'COMPLETED'] response_list = [ self._request( 'GET', CosmoSim.QUERY_URL + "/{}".format(completed_jobids[i]), auth=(self.username, self.password), cache=False) for i in range(len(completed_jobids))] self.response_dict_current = {} for i, vals in enumerate(completed_jobids): self.response_dict_current[vals] = ( self._generate_response_dict(response_list[i])) else: if self.job_dict[jobid] == 'COMPLETED': response_list = [ self._request( 'GET', CosmoSim.QUERY_URL + "/{}".format(jobid), auth=(self.username, self.password), cache=False)] self.response_dict_current = {} self.response_dict_current[jobid] = ( self._generate_response_dict(response_list[0])) else: warnings.warn("JobID must refer to a query with a phase " "of 'COMPLETED'.") return if output is True: dictkeys = self.response_dict_current.keys() if len(dictkeys) > 1: keys = [i for i in self.response_dict_current.keys()] phases = [self.job_dict[key] for key in keys] t = Table() t['JobID'] = keys t['Phase'] = phases t.pprint() warnings.warn("Use specific jobid to get more information, or " "explore `self.response_dict_current`.") elif len(dictkeys) == 1: print(self.response_dict_current[dictkeys[0]]['content']) else: log.error('No completed jobs found.') return else: return
def _generate_response_dict(self, response): """ A private function which takes in a response object and creates a response dictionary. Parameters ---------- response : `~requests.Response` requests response object Returns ------- response_dict : dict A dictionary of some of the response object's methods """ R = response response_dict = {'content': R.content, 'cookies': R.cookies, 'elapsed': R.elapsed, 'encoding': R.encoding, 'headers': R.headers, 'ok': R.ok, 'request': R.request, 'url': R.url} return response_dict def _starttime_dict(self): """ A private function which generates a dictionary of jobids (must have phase COMPLETED) linked to starttimes. """ completed_ids = [key for key in self.job_dict.keys() if self.job_dict[key] == 'COMPLETED'] response_list = [self._request('GET', CosmoSim.QUERY_URL + "/{}".format(i), auth=(self.username, self.password), cache=False) for i in completed_ids] soups = [BeautifulSoup(response_list[i].content, "lxml") for i in range(len(response_list))] self.starttime_dict = {} for i in range(len(soups)): self.starttime_dict[str(completed_ids[i])] = str( soups[i].find('uws:starttime').string)
[docs] def general_job_info(self, jobid=None, output=False): """ A public function which sends an http GET request for a given jobid in any phase. If no jobid is provided, a summary of all jobs is generated. Parameters ---------- jobid : string The jobid of the sql query. output : bool Print output of response(s) to the terminal """ self.check_all_jobs() if jobid is None: print("Job Summary:\n" "There are {0} jobs with phase: COMPLETED.\n" "There are {1} jobs with phase: ERROR.\n" "There are {2} jobs with phase: ABORTED.\n" "There are {3} jobs with phase: PENDING.\n" "There are {4} jobs with phase: EXECUTING.\n" "There are {5} jobs with phase: QUEUED.\n" "Try providing a jobid for the job you'd like to " "know more about.\n To see a list of all jobs, use " "`check_all_jobs()`." .format(self.job_dict.values().count('COMPLETED'), self.job_dict.values().count('ERROR'), self.job_dict.values().count('ABORTED'), self.job_dict.values().count('PENDING'), self.job_dict.values().count('EXECUTING'), self.job_dict.values().count('QUEUED'))) return else: response_list = [self._request( 'GET', CosmoSim.QUERY_URL + "/{}".format(jobid), auth=(self.username, self.password), cache=False)] if response_list[0].ok is False: log.error('Must provide a valid jobid.') return else: self.response_dict_current = {} self.response_dict_current[jobid] = ( self._generate_response_dict(response_list[0])) if output is True: dictkeys = self.response_dict_current.keys() print(self.response_dict_current[dictkeys[0]]['content']) return else: return
[docs] def delete_job(self, jobid=None, squash=None): """ A public function which deletes a stored job from the server in any phase. If no jobid is given, it attempts to use the most recent job (if it exists in this session). If jobid is specified, then it deletes the corresponding job, and if it happens to match the existing current job, that variable gets deleted. Parameters ---------- jobid : string The jobid of the sql query. If no jobid is given, it attempts to use the most recent job (if it exists in this session). output : bool Print output of response(s) to the terminal Returns ------- result : list A list of response object(s) """ self.check_all_jobs() if jobid is None: if hasattr(self, 'current_job'): jobid = self.current_job if jobid: if hasattr(self, 'current_job'): if jobid == self.current_job: del self.current_job if self.job_dict[jobid] in ['COMPLETED', 'ERROR', 'ABORTED', 'PENDING']: result = self.session.delete( CosmoSim.QUERY_URL + "/{}".format(jobid), auth=(self.username, self.password), data={'follow': ''}) else: warnings.warn("Can only delete a job with phase: " "'COMPLETED', 'ERROR', 'ABORTED', or 'PENDING'.") return if not result.ok: result.raise_for_status() if squash is None: warnings.warn('Deleted job: {}'.format(jobid)) return result
[docs] def abort_job(self, jobid=None): """ """ self.check_all_jobs()
[docs] def delete_all_jobs(self, phase=None, regex=None): """ A public function which deletes any/all jobs from the server in any phase and/or with its tablename matching any desired regular expression. Parameters ---------- phase : list A list of job phases to be deleted. If nothing provided, all are deleted. regex : string A regular expression to match all tablenames to. Matching table names will be deleted. """ self.check_all_jobs() if regex: pattern = re.compile("{}".format(regex)) groups = [pattern.match(self.table_dict.values()[i]).group() for i in range(len(self.table_dict.values()))] matching_tables = [groups[i] for i in range(len(groups)) if groups[i] in self.table_dict.values()] if phase: phase = [phase[i].upper() for i in range(len(phase))] if regex: for key in self.job_dict.keys(): if self.job_dict[key] in phase: if key in self.table_dict.keys(): if self.table_dict[key] in matching_tables: result = self.session.delete( CosmoSim.QUERY_URL + "/{}".format(key), auth=(self.username, self.password), data={'follow': ''}) if not result.ok: result.raise_for_status() warnings.warn("Deleted job: {0} (Table: {1})" .format(key, self.table_dict[key])) if not regex: for key in self.job_dict.keys(): if self.job_dict[key] in phase: result = self.session.delete( CosmoSim.QUERY_URL + "/{}".format(key), auth=(self.username, self.password), data={'follow': ''}) if not result.ok: result.raise_for_status() warnings.warn("Deleted job: {}".format(key)) if not phase: if regex: for key in self.job_dict.keys(): if key in self.table_dict.keys(): if self.table_dict[key] in matching_tables: result = self.session.delete( CosmoSim.QUERY_URL + "/{}".format(key), auth=(self.username, self.password), data={'follow': ''}) if not result.ok: result.raise_for_status() warnings.warn("Deleted job: {0} (Table: {1})" .format(key, self.table_dict[key])) if not regex: for key in self.job_dict.keys(): result = self.session.delete( CosmoSim.QUERY_URL + "/{}".format(key), auth=(self.username, self.password), data={'follow': ''}) if not result.ok: result.raise_for_status() warnings.warn("Deleted job: {}".format(key)) self._existing_tables() return
def _generate_schema(self): """ Internal function which builds a schema of all simulations within the database (in the form of a dictionary). """ response = self._request('GET', CosmoSim.SCHEMA_URL, auth=(self.username, self.password), headers={'Accept': 'application/json'}, cache=False) data = response.json() self.db_dict = {} for i in range(len(data['databases'])): self.db_dict[str(data['databases'][i]['name'])] = {} sstr = str(data['databases'][i]['name']) sid = str(data['databases'][i]['id']) self.db_dict[sstr]['id'] = sid sdesc = str(data['databases'][i]['description']) self.db_dict[sstr]['description'] = sdesc self.db_dict[sstr]['tables'] = {} for j in range(len(data['databases'][i]['tables'])): sstr2 = str(data['databases'][i]['tables'][j]['name']) self.db_dict[sstr]['tables'][sstr2] = {} sdata = data['databases'][i]['tables'][j]['id'] self.db_dict[sstr]['tables'][sstr2]['id'] = sdata sdesc2 = data['databases'][i]['tables'][j]['description'] self.db_dict[sstr]['tables'][sstr2]['description'] = sdesc2 self.db_dict[sstr]['tables'][sstr2]['columns'] = {} tmpval = len(data['databases'][i]['tables'][j]['columns']) for k in range(tmpval): sdata2 = data['databases'][i]['tables'][j]['columns'][k] sdata2_id = sdata2['id'] sstr3 = str(sdata2['name']) sdesc3 = sdata2['description'] self.db_dict[sstr]['tables'][sstr2]['columns'][sstr3] = { 'id': sdata2_id, 'description': sdesc3} return response
[docs] def explore_db(self, db=None, table=None, col=None): """ A public function which allows for the exploration of any simulation and its tables within the database. This function is meant to aid the user in constructing sql queries. Parameters ---------- db : string The database to explore. table : string The table to explore. col : string The column to explore. """ try: self.db_dict except AttributeError: self._generate_schema() projects = np.sort(list(self.db_dict.keys())) largest = max([len(projects[i]) for i in range(len(projects))]) t = Table() # db not specified if not db: warnings.warn("Must first specify a database.") proj_list = [] attr_list = [] info_list = [] tmp2_largest = 0 for proj in projects: size = len((self.db_dict['{0}'.format(proj)].keys())) proj_list += (['@ {}'.format(proj)] + ['' for i in range(size - 1)] + ['-' * (largest + 2)]) tmp_largest = max([len(str(key)) for key in self.db_dict[proj].keys()]) attr_list += (['@ {}'.format(key) if isinstance(self.db_dict[proj][key], dict) else '{}:'.format(key) for key in self.db_dict[proj].keys()] + ['-' * (tmp_largest + 2)]) tmpinfosize = max([len(self.db_dict[proj][key]) if isinstance(self.db_dict[proj][key], str) else 0 for key in self.db_dict[proj].keys()]) if tmpinfosize > tmp2_largest: tmp2_largest = tmpinfosize for proj in projects: info_list += ([self.db_dict[proj][key] if isinstance(self.db_dict[proj][key], str) else "" for key in self.db_dict[proj].keys()] + ['-' * tmp2_largest]) t['Projects'] = proj_list t['Project Items'] = attr_list t['Information'] = info_list t.pprint() # db specified if db: try: size1 = len(list(self.db_dict[str(db)].keys())) slist = [list(self.db_dict[db][key].keys()) if isinstance(self.db_dict[db][key], dict) else key for key in self.db_dict[db].keys()] size2 = len(max(slist, key=np.size)) except (KeyError, NameError): log.error("Must first specify a valid database.") return # if col is specified, table must be specified, and I need to # check the max size of any given column in the structure if table: try: size2 = max(size2, len(list(self.db_dict[db]['tables'] [table]['columns'].keys()))) if col: try: size2 = max(size2, len(list(self.db_dict[db]['tables'] [table]['columns'] [col].keys()))) except(KeyError, NameError): log.error("Must first specify a valid column " "of the `{0}` table within the `{1}`" " db".format(table, db)) return except (KeyError, NameError): log.error("Must first specify a valid table within " "the `{0}` db.".format(db)) return t['Projects'] = (['--> @ {}:'.format(db)] + ['' for i in range(size2 - 1)]) t['Project Items'] = ( ['--> @ {}:'.format(key) if (isinstance(self.db_dict[db][key], dict) and (len(list(self.db_dict[db][key].keys())) == len(list(self.db_dict[db]['tables'].keys())))) else '@ {}'.format(key) if (isinstance(self.db_dict[db][key], dict) and (len(list(self.db_dict[db][key].keys())) != len(self.db_dict[db]['tables'].keys()))) else str(key) for key in self.db_dict[db].keys()] + ['' for i in range(size2 - size1)]) # if only db is specified if not table: if not col: reordered = sorted(max(slist, key=np.size), key=len) t['Tables'] = ['@ {}'.format(i) if isinstance(self.db_dict[db]['tables'][i], dict) else str(i) for i in reordered] # if table has been specified else: reordered = ( [str(table)] + sorted([key for key in self.db_dict[db]['tables'].keys() if key != table], key=len)) t['Tables'] = ( ['--> @ {}:'.format(i) if (i == table and isinstance(self.db_dict[db]['tables'][i], dict)) else '@ {}'.format(i) if (i != table and isinstance(self.db_dict[db]['tables'][i], dict)) else str(i) for i in reordered] + ['' for j in range(size2 - len(reordered))]) # if column has been specified if col: tblcols_dict = self.db_dict[db]['tables'][table].keys() t['Table Items'] = ( ['--> @ columns:'] + [i for i in tblcols_dict if i != 'columns'] + ['' for j in range(size2 - len(tblcols_dict))]) col_dict = (self.db_dict[db]['tables'][table] ['columns'].keys()) reordered = ([str(col)] + [i for i in col_dict if i != col]) temp_columns = [] columns = self.db_dict[db]['tables'][table]['columns'] for i in reordered: c = columns[i] if isinstance(c, dict) and i == col: temp_columns.append('--> @ {}:'.format(i)) elif not isinstance(c, dict) and i == col: temp_columns.append('--> {}:'.format(i)) elif isinstance(c, dict) and i != col: temp_columns.append('@ {}'.format(i)) else: temp_columns.append(str(i)) if len(col_dict) < size2: size_diff = size2 - len(col_dict) t['Columns'] = (temp_columns + ['' for j in range(size_diff)]) colinfo_dict = col_dict = columns[col] t['Col. Info'] = ( ['{} : {}'.format(i, colinfo_dict[i]) for i in colinfo_dict.keys()] + ['' for j in range(size2 - len(colinfo_dict))]) else: t['Columns'] = temp_columns # if column has not been specified else: tblcols_dict = self.db_dict[db]['tables'][table].keys() col_dict = ( self.db_dict[db]['tables'][table]['columns'].keys()) reordered = sorted(col_dict, key=len) if len(tblcols_dict) < size2: size_diff = size2 - len(tblcols_dict) tmp_table = self.db_dict[db]['tables'][table] t['Table Items'] = ( ['@ {}'.format(i) if isinstance(tmp_table[i], dict) else '{}:'.format(i) for i in tblcols_dict] + ['' for i in range(size_diff)]) t['Table Info'] = ( [str(tmp_table[i]) if not isinstance(tmp_table[i], dict) else '' for i in tblcols_dict] + ['' for i in range(size_diff)]) if len(col_dict) < size2: t['Columns'] = ( ['@ {}'.format(i) if isinstance(tmp_table['columns'][i], dict) else str(i) for i in reordered] + ['' for i in range(size2 - len(col_dict))]) else: t['Columns'] = reordered else: t['Table Items'] = tblcols_dict t.pprint()
[docs] def download(self, jobid=None, filename=None, format=None, cache=True): """ A public function to download data from a job with COMPLETED phase. Parameters ---------- jobid : Completed jobid to be downloaded filename : str If left blank, downloaded to the terminal. If specified, data is written out to file (directory can be included here). format : str The format of the data to be downloaded. Options are ``'csv'``, ``'votable'``, ``'votableB1'``, and ``'votableB2'``. cache : bool Whether to cache the data. By default, this is set to True. Returns ------- headers, data : list, list """ self.check_all_jobs() if not jobid: try: jobid = self.current_job except AttributeError: warnings.warn("No current job has been defined for " "this session.") return if self.job_dict[str(jobid)] == 'COMPLETED': if not format: warnings.warn("Must specify a format:") t = Table() t['Format'] = ['csv', 'votable', 'votableB1', 'votableB2'] t['Description'] = ['Comma-separated values file', 'Put In Description', 'Put In Description', 'Put In Description'] t.pprint() if format: results = self._request( 'GET', self.QUERY_URL + "/{}/results".format(jobid), auth=(self.username, self.password)) soup = BeautifulSoup(results.content, "lxml") urls = [i.get('xlink:href') for i in soup.findAll({'uws:result'})] formatlist = [urls[i].split('/')[-1].upper() for i in range(len(urls))] if format.upper() in formatlist: index = formatlist.index(format.upper()) downloadurl = urls[index] if filename: self._download_file(downloadurl, local_filepath=filename, auth=(self.username, self.password)) elif not filename: if format.upper() == 'CSV': raw_table_data = self._request( 'GET', downloadurl, auth=(self.username, self.password), cache=cache).content raw_headers = raw_table_data.split('\n')[0] num_cols = len(raw_headers.split(',')) num_rows = len(raw_table_data.split('\n')) - 2 headers = [raw_headers.split(',')[i].strip('"') for i in range(num_cols)] raw_data = [ raw_table_data.split('\n')[i + 1].split(",") for i in range(num_rows)] data = [map(eval, raw_data[i]) for i in range(num_rows)] return headers, data elif format.upper() in ['VOTABLEB1', 'VOTABLEB2']: warnings.warn("Cannot view binary versions of votable " "within the terminal.\n Try saving them " "to the disk with the 'filename' option") return elif format.upper() == 'VOTABLE': # for terminal output, get data in csv format tmp_downloadurl = urls[formatlist.index('CSV')] raw_table_data = self._request( 'GET', tmp_downloadurl, auth=(self.username, self.password), cache=cache).content raw_headers = raw_table_data.split('\n')[0] num_cols = len(raw_headers.split(',')) num_rows = len(raw_table_data.split('\n')) - 2 headers = [raw_headers.split(',')[i].strip('"') for i in range(num_cols)] raw_data = [ raw_table_data.split('\n')[i + 1].split(",") for i in range(num_rows)] data = [map(eval, raw_data[i]) for i in range(num_rows)] # store in astropy.Table object tbl = Table(data=map(list, zip(*data)), names=headers) # convert to votable format votbl = votable.from_table(tbl) return votbl elif format.upper() not in formatlist: print('Format not recognized. Please see formatting options:') t = Table() t['Format'] = ['csv', 'votable', 'votableB1', 'votableB2'] t['Description'] = ['Comma-Separated Values File', 'IVOA VOTable Format', 'IVOA VOTable Format, Binary 1', 'IVOA VOTable Format, Binary 2'] t.pprint()
def _check_phase(self, jobid): """ A private function which checks the job phase of a query. Parameters ---------- jobid : string The jobid of the sql query. """ self._existing_tables() time.sleep(1) if jobid not in self.job_dict.keys(): log.error("Job not present in job dictionary.") return else: phase = self.job_dict[str(jobid)] return phase def _mail(self, to, subject, text, *attach): """ A private function which sends an SMS message to an email address. Parameters ---------- to : string The email address receiving the job alert. subject : string The subject of the job alert. text : string The content of the job alert. """ msg = MIMEMultipart() msg['From'] = self._smsaddress msg['To'] = to msg['Subject'] = subject msg.attach(MIMEText(text)) n = len(attach) for i in range(n): part = MIMEBase('application', 'octet-stream') part.set_payload(open(attach[i], 'rb').read()) message.email.Encoders.encode_base64(part) part.add_header('Content-Disposition', 'attachment; filename="{0}"' .format(os.path.basename(attach[i]))) msg.attach(part) mailServer = smtplib.SMTP('smtp.gmail.com', 587) mailServer.ehlo() mailServer.starttls() mailServer.ehlo() mailServer.login(self._smsaddress, self._smspw) mailServer.sendmail(self._smsaddress, to, msg.as_string()) mailServer.quit() def _text(self, fromwhom, number, text): """ A private function which sends an SMS message to a cell phone number. Parameters ---------- fromwhom : string The email address sending the alert: "donotreply.astroquery.cosmosim@gmail.com" number : string The user-provided cell phone receiving the job alert. text : string The content of the job alert. """ server = smtplib.SMTP("smtp.gmail.com", 587) server.starttls() server.login(self._smsaddress, self._smspw) server.sendmail(str(fromwhom), '{}@vtext.com'.format(number), str(text)) server.quit() def _initialize_alerting(self, jobid, mail=None, text=None): """ A private function which initializes the email/text alert service credentials. Also preemptively checks for job phase being COMPLETED, ABORTED, or ERROR so that users don't simply send alerts for old jobs. Parameters ---------- jobid : string The jobid of the sql query. mail : string The user-provided email address receiving the job alert. text : string The user-provided cell phone receiving the job alert. """ self._smsaddress = "donotreply.astroquery.cosmosim@gmail.com" password_from_keyring = keyring.get_password( "astroquery:cosmosim.SMSAlert", self._smsaddress) if password_from_keyring: self._smspw = password_from_keyring if not password_from_keyring: log.warning("CosmoSim SMS alerting has not been initialized.") warnings.warn("Initializing SMS alerting.") keyring.set_password("astroquery:cosmosim.SMSAlert", self._smsaddress, "LambdaCDM") self.alert_email = mail self.alert_text = text # first check to see if the job has errored (or is a job that has # already completed) before running on a loop phase = self._check_phase(jobid) if phase in ['COMPLETED', 'ABORTED', 'ERROR']: warnings.warn("JobID {0} has finished with status {1}." .format(jobid, phase)) self.alert_completed = True elif phase in ['EXECUTING', 'PENDING', 'QUEUED']: self.alert_completed = False else: self.alert_completed = False
class AlertThread: """ Alert threading class The _alert() method will be started and it will run in the background until the application exits. """ def __init__(self, jobid, queue='short'): """ Parameters ---------- jobid : string The jobid of the sql query. queue : string The short/long queue option. Default is short. """ self.jobid = jobid self.queue = queue thread = threading.Thread(target=self._alert, args=(self.jobid, self.queue)) thread.daemon = True # Daemonize thread thread.start() # Start the execution def _alert(self, jobid, queue): """ A private function which runs checks for job completion every 10 seconds for short-queue jobs and 60 seconds for long-queue jobs. Once job phase is COMPLETED, ERROR, or ABORTED, emails and/or texts the results of the query to the user. Parameters ---------- jobid : string The jobid of the sql query. queue : string The short/long queue option. Default is short. """ if queue == 'long': deltat = 60 else: deltat = 10 while self.alert_completed is False: phase = self._check_phase(jobid) if phase in ['COMPLETED', 'ABORTED', 'ERROR']: warnings.warn("JobID {0} has finished with status {1}." .format(jobid, phase)) self.alert_completed = True time.sleep(1) self.general_job_info(jobid) if self.alert_email: self._mail( self.alert_email, ("Job {0} Completed with phase {1}." .format(jobid, phase)), "{}".format( self.response_dict_current[jobid]['content'])) if self.alert_text: self._text(self._smsaddress, self.alert_text, ("Job {0} Completed with phase {1}." .format(jobid, phase))) time.sleep(deltat) CosmoSim = CosmoSimClass()