Source code for src.magpy_rv.plotting

"""
Various plotting functions to allow the user to interpret data from the mcmc
"""

# Contains:
#     offset_subtract function
#     data_plot function
#     gp_plot function
#     mixing_plot function
#     corner_plot function
#     keplerian_only_plot function
#     phase_plot function
#    
#    
# Author: Federica Rescigno, Bryce Dixon
# Version: 22.08.2023    


import matplotlib.pyplot as plt
import numpy as np
import scipy.interpolate as interp

import magpy_rv.auxiliary as aux
import magpy_rv.gp_likelihood as gp
from magpy_rv.mcmc_aux import get_model
import magpy_rv.parameters as par



###### allows me not to show plots
# matplotlib.use('Agg')
plt.rcParams.update({'font.size': 10})

[docs]def offset_subtract(rv, flags, offsets): """Function to return the RVs with offsets subtracted Parameters ---------- rv: array of floats y-axis array of the RVs flags: array of floats array of flags representing offsets generated by the combine_data function offsets: list or tuple of floats list or tuple of offsets ordered accordingly to the inputs in the combine_data function Returns ------- y: array of floats array of RVs with offsets subtracted """ y_off = [] for i in range(len(flags)): if flags[i] == 0.: y_off.append(0.) else: for a in range(len(offsets)): off = a+1. if flags[i] == off: y_off.append(offsets[a]) y = rv - y_off return y
# data plot function
[docs]def data_plot(time, rv, xlabel = "time [BJD]", ylabel = "RV [m/s]", legend = True, y_err = None, flags = None, offsets = None, save_folder = None, savefilename = None): """ Function to plot the rv data against the times with any given offsets subtracted from the data. It accepts a maximum of 6 offsets. Parameters ---------- time: array of floats x-axis array of the observation times rv: array of floats y-axis array of the RVs xlabel: string, optional label of x-axis, defaults to "time [BJD]" ylabel: string, optional label of y-axis, defaults to "RV [m/s]" legend: bool, optional enable the plotting of a legend, defaults to True y_err: array of floats, optional array of rv errors, defaults to None flags: array of floats, optional array of flags representing offsets generated by the combine_data function, defaults to None offsets: list or tuple of floats, optional list or tuple of offsets ordered accordingly to the inputs in the combine_data function, defaults to None save_folder: string, optional folder to save the plot, defaults to None savefilename: string, optional name of the save file, defaults to None Returns ------- rv plot with any offsets subtracted from the data """ # set up the figure fig = plt.figure(figsize = (10,7)) ax = fig.add_subplot(1,1,1) # simple plot for no offsets if offsets is None and flags is None: ax.errorbar(time, rv, yerr = y_err, fmt = '.', color = 'tab:blue', label = 'Data') ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) if legend is True: ax.legend(loc='upper left', borderaxespad=0, fontsize = 10) elif offsets is None and flags is not None: raise KeyError("offsets must be provided when flags are given") elif offsets is not None and flags is None: raise KeyError("flags must be provided if offsets are given") else: # offset subtract function to get subtracted y y = offset_subtract(rv, flags, offsets) # set up list of colours for points, currently can support 6 datasets c_list = ['tab:blue', 'tab:red', 'tab:green', 'tab:purple', 'tab:brown', 'tab:pink'] c_dict = {'tab:blue':'Dataset 1', 'tab:red':'Dataset 2', 'tab:green':'Dataset 3', 'tab:purple':'Dataset 4', 'tab:brown':'Dataset 5', 'tab:pink':'Dataset 6'} c_array = [] for i in range(len(flags)): for a in range(len(c_list)): if flags[i] == a: c_array.append(c_list[a]) # c_array is an array in the same form as the flags but where numbers are replaced by colours c_array = np.array(c_array) rv_err = np.array(y_err) for g in np.unique(c_array): # plot the data in different colours and labeled depending on its offset ix = np.where(c_array == g) ax.errorbar(time[ix], y[ix], yerr = rv_err[ix], fmt = '.', c = g, label = c_dict[g]) ax.set_xlabel(xlabel, fontsize = 10) ax.set_ylabel(ylabel, fontsize = 10) if legend is True: ax.legend(loc='upper left', borderaxespad=0, fontsize = 10) if save_folder is not None: assert savefilename is not None, "You need to give both save_folder and savefilename to save the figure" plt.savefig(str(save_folder)+"/"+str(savefilename)+".png", bbox_inches='tight') if savefilename is not None and save_folder is None: print("Figure not saved, you need to provide savefilename and save_folder both") plt.show()
# gp plot funciton
[docs]def GP_plot(time, rv, hparam, kernel_name, rv_err = None, model_list = None, model_param = None, flags = None, xpred = None, residuals=False, xlabel='Time [BJD]', ylabel='RV [m/s]', legend = True, save_folder=None, savefilename=None): """ Function to plot the rv data against the times with any offsets subtracted and the gp and models plotted on top Parameters ---------- time: array of floats x-axis array of the observation times rv: array of floats y-axis array of the RVs hparam: dictionary dictionary of the hyperparameters kernel_name: string name of the kernel rv_err: array of floats, optional array of rv errors, defaults to None model_list: list of strings, optional list of the models in use, defaults to None model_param: dictionary, optional dictionary of model parameters, defaults to None flags: array of floats, optional array of flags representing offsets generated by the combine_data function, defaults to None xpred: array of floats, optional array of prediced times to be used for the smooth model, defaults to intervals of 0.1 between the max and min times residuals: bool, optional enable the plotting of residuals, defaults to false xlabel: string, optional label of x-axis, defaults to "time [BJD]" ylabel: string, optional label of y-axis, defaults to "RV [m/s]" legend: bool, optional enable the plotting of a legend, defaults to True save_folder: string, optional folder to save the plot, defaults to None savefilename: string, optional name of the save file, defaults to None Returns ------- rv plot with the predicted GP and any offsets subtracted from the data along with any models and residuals """ # generate a predicted x array for the smooth models if none is given if xpred is None: xpred = np.arange(time.min()-1, time.max()+1, 0.1) if model_list is None and model_param is None: # if no model is in use there should be no flags and residuals assert flags is None, "flags should not be provided if no model is in use" # calculate GP_y for no model loglik = gp.GPLikelihood(time, rv, rv_err, hparam, kernel_name) GP_y, GP_err = loglik.predict(xpred) if residuals is False: fig = plt.figure(figsize = (10,7)) ax = fig.add_subplot(1,1,1) # plots for no model ax.errorbar(time, rv, yerr = rv_err, fmt = '.', color = 'tab:blue', label = 'Data') ax.plot(xpred, GP_y, linestyle = '--', color = 'orange', label = 'Predicted GP') ax.fill_between(xpred, GP_y+GP_err, GP_y-GP_err, alpha=0.5, color='gray') ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) if legend is True: ax.legend(loc='upper left', borderaxespad=0, fontsize = 10) if residuals is True: # set up two plots for residuals fig, ax = plt.subplots(ncols=1, nrows=2, sharex=True, figsize=(10,7), gridspec_kw={'height_ratios': [3,1]}) fig.subplots_adjust(hspace=0) ax[0].errorbar(time, rv, yerr = rv_err, fmt = '.', color = 'tab:blue', label = 'Data') ax[0].plot(xpred, GP_y, linestyle = '--', color = 'tab:orange', label = 'Predicted GP') ax[0].fill_between(xpred, GP_y+GP_err, GP_y-GP_err, alpha=0.3, color='tab:orange') ax[0].set_ylabel(ylabel) if legend is True: ax[0].legend(loc='upper left', borderaxespad=0, fontsize = 10) # interpolate the smooth GP to get the points relating to the time array f = interp.interp1d(xpred, GP_y, kind='cubic') new_pred_y = f(time) # subtract the points relating to the time array from the rv data to get residuals res = (rv-new_pred_y) ax[1].errorbar(time, res, yerr = rv_err, fmt = '.', color = 'tab:blue') ax[1].set_ylabel("Residuals") ax[1].set_xlabel(xlabel) # check inputs are correct elif model_list is None and model_param is not None: raise KeyError("model_list must be provided alongside model_param if a model is in use") elif model_list is not None and model_param is None: raise KeyError("model_param must be provided alongside model_list if a model is in use") else: # if a model is in use if flags is None: # no flags suggests no offsets are in use, check if this is the case for i in model_list: if i.startswith('off') or i.startswith('Off'): raise KeyError("flags must be provided if using offsets") else: pass # ganerate model and GP_y for no offsets model_y = get_model(model_list, time, model_param, to_ecc=False) loglik = gp.GPLikelihood(time, rv, rv_err, hparam, kernel_name, model_y, model_param) GP_y, GP_err = loglik.predict(xpred) # smooth model generated based on xpred smooth_model_y = get_model(model_list, xpred, model_param, to_ecc=False) # GP_y + smooth_model_y will be plotted gp_mod_y = smooth_model_y + GP_y if flags is not None: # flags suggests an offset model is in use, check if this is the case if 'offset' in model_list: pass elif 'Offset' in model_list: pass else: raise KeyError("offsets should be provided if flags are given") # GP_y can be generated from the model_y before it is adjusted for offsets model_y = get_model(model_list, time, model_param, to_ecc=False, flags = flags) loglik = gp.GPLikelihood(time, rv, rv_err, hparam, kernel_name, model_y, model_param) GP_y, GP_err = loglik.predict(xpred) # put the offset values in a list to be entered into the offset_subtract function offsets = [] try: off = model_param['offset'].value offsets.append(off) except: for i in range(len(model_list)): try: off = model_param['offset_'+str(i)].value offsets.append(off) except: continue # obtain the offset subtracted rv data rv = offset_subtract(rv, flags, offsets) # create a new model list without offsets to be used in the new model y new_model_list = [] for i in model_list: if i.startswith('off') or i.startswith('Off'): continue else: new_model_list.append(i) # create a new model parameter dictionary without offsets so they can be used with the new model y new_model_param = dict(model_param) mod_key = list(new_model_param.keys()) for i in mod_key: if i.startswith('off'): del new_model_param[i] # smooth_model_y is generated from the offset subtracted data so there is no need to have offsets in the model parameters or model list any more smooth_model_y = get_model(new_model_list, xpred, new_model_param, to_ecc=False) gp_mod_y = smooth_model_y + GP_y # set up the list of colours for points, currently can support 6 datasets c_list = ['tab:blue', 'tab:red', 'tab:green', 'tab:purple', 'tab:brown', 'tab:pink'] c_dict = {'tab:blue':'Dataset_1', 'tab:red':'Dataset_2', 'tab:green':'Dataset_3', 'tab:purple':'Dataset_4', 'tab:brown':'Dataset_5', 'tab:pink':'Dataset_6'} c_array = [] for i in range(len(flags)): for a in range(len(c_list)): if flags[i] == a: c_array.append(c_list[a]) # c_array is an array in the same form as the flags but where numbers are replaced by colours c_array = np.array(c_array) if residuals is False: # set up one plot for no residuals fig = plt.figure(figsize = (10,7)) ax = fig.add_subplot(1,1,1) try: # try plotting multiple colours for if offsets are present rv_err = np.array(rv_err) for g in np.unique(c_array): ix = np.where(c_array == g) ax.errorbar(time[ix], rv[ix], yerr = rv_err[ix], fmt = '.', c = g, label = c_dict[g]) except: # plot data if no offsets are present ax.errorbar(time, rv, yerr = rv_err, fmt = '.', color = 'tab:blue', label = 'Data') # plots for predicted GP and model+GP ax.plot(xpred, GP_y, linestyle = '--', color = 'tab:orange', label = 'Predicted GP') ax.fill_between(xpred, gp_mod_y+GP_err, gp_mod_y-GP_err, alpha=0.5, color='gray') ax.plot(xpred, gp_mod_y, color = 'gray', label = 'Predicted Model+GP') ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) if legend is True: ax.legend(loc='upper left', borderaxespad=0, fontsize = 10) if residuals is True: # set up two plots for residuals fig, axs = plt.subplots(ncols=1, nrows=2, sharex=True, figsize=(10,7), gridspec_kw={'height_ratios': [3,1]}) fig.subplots_adjust(hspace=0) try: # try plotting multiple colours for if offsets are present rv_err = np.array(rv_err) for g in np.unique(c_array): ix = np.where(c_array == g) axs[0].errorbar(time[ix], rv[ix], yerr = rv_err[ix], fmt = '.', c = g, label = c_dict[g]) except: # plot data if no offsets are present axs[0].errorbar(time, rv, yerr = rv_err, fmt = '.', color = 'tab:blue', label = 'Data') # plots for predicted GP and model+GP on the first plot axs[0].plot(xpred, GP_y, linestyle = '--', color = 'orange', label = 'Predicted GP') axs[0].plot(xpred, gp_mod_y, color = 'gray', label = 'Predicted Model+GP') axs[0].fill_between(xpred, gp_mod_y+GP_err, gp_mod_y-GP_err, alpha=0.5, color='gray') axs[0].tick_params(axis='y', labelsize=10) axs[0].set_ylabel(ylabel, size = 10) if legend is True: axs[0].legend(loc='upper left', borderaxespad=0, fontsize = 10) # interpolate the smooth GP+model to get the points relating to the time array f = interp.interp1d(xpred, gp_mod_y, kind='cubic') new_pred_y = f(time) # subtract the points relating to the time array from the rv data to get residuals res = (rv-new_pred_y) try: # try plotting multiple colours for offsets on the second plot for g in np.unique(c_array): ix = np.where(c_array == g) axs[1].errorbar(time[ix], res[ix], yerr = rv_err[ix], fmt = '.', c = g) except: # plotting for no offsets on the second plot axs[1].errorbar(time, res, yerr = rv_err, fmt = '.', color = 'tab:blue') axs[1].set_ylabel("Residuals", size = 10) axs[1].set_xlabel(xlabel, size = 10) axs[1].tick_params(axis='y', labelsize=10) axs[1].tick_params(axis='x', labelsize=10) if save_folder is not None: assert savefilename is not None, "You need to give both save_folder and savefilename to save the figure" plt.savefig(str(save_folder)+"/"+str(savefilename)+".png", bbox_inches='tight') if savefilename is not None and save_folder is None: print("Figure not saved, you need to provide savefilename and save_folder both") plt.show()
# mixing plots
[docs]def mixing_plot(hparam_chain, kernel_name, model_param_chain, model_name, LogL_chain, mass = None, save_folder=None, savefilename="mixing"): ''' Function to plot the mixing chains from the mcmc for each parameter Parameters ---------- hparam_chain : array Array of all the sets of hyperparameters of the MCMC, now in chains kernel_name : string Name of the kernel model_param_chain : array Array of all the sets of model parameters of the MCMC, now in chains model_name : string Name of the model LogL_chain : array Array containing all the Log Likelihood mass: array, optional Array of all the masses for all planets produced by the MCMC, defaults to None save_folder : str, optional Folder to save the plot, by default None. If None, the plot is not saved. savefilename : str, optional Name of the file to save the plot, by default Mixing. Returns ------- Mixing plots for each parameter ''' iterations = len(LogL_chain[:,0,0])-1 numb_chains = len(LogL_chain[0,:,0]) hparam_names = aux.hparam_names(kernel_name) model_param_names = aux.model_param_names(model_name, SkCk=True) if mass is None: xs = list(range(iterations+1)) n_subplots = len(hparam_chain[0,0,:])+len(model_param_chain[0,0,:])+1 fig, axs = plt.subplots(n_subplots, sharex=True, figsize=(15,15)) fig.subplots_adjust(hspace=0.) axs[0].set_title("Mixing Chains") plt.xlabel("Number of iterations") for chain in range(numb_chains): for i in range(n_subplots): if i == 0: axs[i].plot(xs, LogL_chain[:, chain, :], c='xkcd:bluish', alpha=0.2) axs[i].set_ylabel(r"log$\mathcal{L}$") if i != 0 and i <= len(hparam_chain[0, 0, :]): axs[i].plot(xs, hparam_chain[:, chain, i-1], c='xkcd:bluish', alpha=0.2) axs[i].set_ylabel("{}".format(hparam_names[i-1])) if i != 0 and i > len(hparam_chain[0, 0, :]): axs[i].plot(xs, model_param_chain[:, chain, i-1-len(hparam_chain[0, 0, :])], c='xkcd:bluish', alpha=0.2) axs[i].set_ylabel("{}".format(model_param_names[i-1-len(hparam_chain[0, 0, :])])) else: xs = list(range(iterations+1)) n_subplots = len(hparam_chain[0,0,:])+len(model_param_chain[0,0,:])+len(mass[0,0,:])+1 fig, axs = plt.subplots(n_subplots, sharex=True, figsize=(15,15)) fig.subplots_adjust(hspace=0.) axs[0].set_title("Mixing Chains") plt.xlabel("Number of iterations") for chain in range(numb_chains): for i in range(n_subplots): if i == 0: axs[i].plot(xs, LogL_chain[:, chain, :], c='xkcd:bluish', alpha=0.2) axs[i].set_ylabel(r"log$\mathcal{L}$") if i != 0 and i <= len(hparam_chain[0, 0, :]): axs[i].plot(xs, hparam_chain[:, chain, i-1], c='xkcd:bluish', alpha=0.2) axs[i].set_ylabel("{}".format(hparam_names[i-1])) if i != 0 and i > len(hparam_chain[0, 0, :]) and i <= len(hparam_chain[0, 0, :])+len(model_param_chain[0, 0, :]): axs[i].plot(xs, model_param_chain[:, chain, i-1-len(hparam_chain[0, 0, :])], c='xkcd:bluish', alpha=0.2) axs[i].set_ylabel("{}".format(model_param_names[i-1-len(hparam_chain[0, 0, :])])) if i != 0 and i > len(hparam_chain[0, 0, :])+len(model_param_chain[0, 0, :]): if len(mass[0,0,:]) == 1 and len(model_name) == 1: axs[i].plot(xs, mass[:, chain, 0], c='xkcd:bluish', alpha = 0.2) axs[i].set_ylabel("mass") else: axs[i].plot(xs, mass[:, chain, i-1-len(hparam_chain[0,0,:])-len(model_param_chain[0,0,:])], c='xkcd:bluish', alpha = 0.2) axs[i].set_ylabel(r"mass$_{}$".format(i-1-len(hparam_chain[0,0,:])-len(model_param_chain[0,0,:]))) if save_folder is not None: assert savefilename is not None, "You need to give both save_folder and savefilename to save the figure" plt.savefig(str(save_folder)+"/"+str(savefilename)+".png") plt.show()
# corner plots
[docs]def corner_plot(hparam_chain, kernel_name, model_param_chain, model_name, masses = None, save_folder=None, savefilename="corner", errors=False): ''' Function to plot the posteriors of each parameter from the mcmc as corner plots. If there is no dynamic range in any of the parameters the plots will automatically show up as empty. Parameters ---------- hparam_chain : array Array of all the sets of hyperparameters of the MCMC kernel_name : string Name of the kernel model_param_chain : array Array of all the sets of model parameters of the MCMC. model_name : string Name of the model masses: array, optional Array of all the masses for all planets produced by the MCMC, defaults to None save_folder : str, optional Folder to save the plot, by default None. If None, the plot is not saved. savefilename : str, optional Name of the file to save the plot, by default Corner. Returns ------- Corner plots of the posteriors of each parameter ''' import corner hparam_names = aux.hparam_names(kernel_name) model_param_names = aux.model_param_names(model_name, SkCk=True) # Resizing of arrays: create 2d array, nrows=iteration*chians, ncols = nparam hp = np.array(hparam_chain) shapes = hp.shape #print("shape",shapes) numb_chains = shapes[1] nparam = shapes[2] depth = shapes[0] hparams = np.zeros((((depth)*numb_chains),nparam)) for p in range(nparam): numb=0 for c in range(numb_chains): for i in range(depth): hparams[numb,p] = hparam_chain[i,c,p] numb += 1 par = np.array(model_param_chain) shapes2 = par.shape #print("shape",shapes) numb_chains2 = shapes2[1] nparam2 = shapes2[2] depth2 = shapes2[0] modpar = np.zeros((((depth2)*numb_chains2),nparam2)) for p in range(nparam2): numb=0 for c in range(numb_chains2): for i in range(depth2): modpar[numb,p] = model_param_chain[i,c,p] numb += 1 if masses is not None: mass = np.array(masses) shapes3 = mass.shape numb_chains3 = shapes3[1] nparam3 = shapes3[2] depth3 = shapes3[0] massval = np.zeros((((depth3)*numb_chains3), nparam3)) for p in range(nparam3): numb = 0 for c in range(numb_chains3): for i in range(depth2): massval[numb][p] = masses[i,c,p] numb += 1 mass_list = [] if len(masses[0,0,:]) == 1 and len(model_name) == 1: name = "mass" mass_list.append(name) else: for i in range(len(masses[0,0,:])): name = r"mass$_{}$".format(i) mass_list.append(name) CORNER_KWARGS = dict(label_kwargs=dict(fontsize=10), title_kwargs=dict(fontsize=10)) # Corner plot of the hyperparameters try: fig = corner.corner(hparams, labels = hparam_names, show_titles=True, **CORNER_KWARGS) if save_folder is not None: assert savefilename is not None, "You need to give both save_folder and savefilename to save the figure" plt.savefig(str(save_folder)+"/"+str(savefilename)+"_hparam"+".pdf") plt.show() except ValueError: print("Inside: No dynamic range in kernel") try: # Corner plot of model parameters fig = corner.corner(modpar, labels=model_param_names, show_titles=True, **CORNER_KWARGS) if save_folder is not None: assert savefilename is not None, "You need to give both save_folder and savefilename to save the figure" plt.savefig(str(save_folder)+"/"+str(savefilename)+"_modelpar"+".pdf") plt.show() except ValueError: print("Inside: No dynamic range in model") if masses is not None: try: #corner plot of the masses fig = corner.corner(massval, labels=mass_list, show_titles=True, **CORNER_KWARGS) if save_folder is not None: assert savefilename is not None, "You need to give both save_folder and savefilename to save the figure" plt.savefig(str(save_folder)+"/"+str(savefilename)+"_masses"+".pdf") plt.show() except ValueError: print("insideL No dynamic range in masses") # Full corner plot if masses is None: try: full_param_chain = np.concatenate((hparams, modpar), axis=1) full_names = hparam_names + model_param_names fig = corner.corner(full_param_chain, labels=full_names, show_titles=True, **CORNER_KWARGS) if save_folder is not None: assert savefilename is not None, "You need to give both save_folder and savefilename to save the figure" plt.savefig(str(save_folder)+"/"+str(savefilename)+"_full"+".pdf") plt.show() final_param_values = [] final_param_erru = [] final_param_errd = [] for a in range(len(hparam_chain[0,0,:])): errd, quantile, erru = corner.quantile(hparam_chain[:,:,a], [0.16,0.5,0.84]) final_param_values.append(quantile) final_param_erru.append(erru) final_param_errd.append(errd) for b in range(len(model_param_chain[0,0,:])): errd, quantile, erru = corner.quantile(modpar[:,b], [0.16,0.5,0.84]) final_param_values.append(quantile) final_param_erru.append(erru) final_param_errd.append(errd) except ValueError: try: plt.show() print("Inside: No dynamic range in model") final_param_values = [] final_param_erru = [] final_param_errd = [] for a in range(len(hparam_chain[0,0,:])): errd, quantile, erru = corner.quantile(hparam_chain[:,:,a], [0.16,0.5,0.84]) final_param_values.append(quantile) final_param_erru.append(erru) final_param_errd.append(errd) except ValueError: plt.show() print("Inside: No dynamic range in kernel") final_param_values = [] final_param_erru = [] final_param_errd = [] for a in range(len(model_param_chain[0,0,:])): errd, quantile, erru = corner.quantile(modpar[:,a], [0.16,0.5,0.84]) final_param_values.append(quantile) final_param_erru.append(erru) final_param_errd.append(errd) else: try: full_param_chain = np.concatenate((hparams, modpar, massval), axis=1) full_names = hparam_names + model_param_names + mass_list fig = corner.corner(full_param_chain, labels=full_names, show_titles=True, **CORNER_KWARGS) if save_folder is not None: assert savefilename is not None, "You need to give both save_folder and savefilename to save the figure" plt.savefig(str(save_folder)+"/"+str(savefilename)+"_full"+".pdf") plt.show() final_param_values = [] final_param_erru = [] final_param_errd = [] for a in range(len(hparam_chain[0,0,:])): errd, quantile, erru = corner.quantile(hparam_chain[:,:,a], [0.16,0.5,0.84]) final_param_values.append(quantile) final_param_erru.append(erru) final_param_errd.append(errd) for b in range(len(model_param_chain[0,0,:])): errd, quantile, erru = corner.quantile(modpar[:,b], [0.16,0.5,0.84]) final_param_values.append(quantile) final_param_erru.append(erru) final_param_errd.append(errd) for c in range(len(masses[0,0,:])): errd, quantile, erru = corner.quantile(masses[:,:,c], [0.16, 0.5, 0.84]) final_param_values.append(quantile) final_param_erru.append(erru) final_param_errd.append(errd) except ValueError: try: plt.show() print("Inside: No dynamic range in model") final_param_values = [] final_param_erru = [] final_param_errd = [] for a in range(len(hparam_chain[0,0,:])): errd, quantile, erru = corner.quantile(hparam_chain[:,:,a], [0.16,0.5,0.84]) final_param_values.append(quantile) final_param_erru.append(erru) final_param_errd.append(errd) for c in range(len(masses[0,0,:])): errd, quantile, erru = corner.quantile(masses[:,:,c], [0.16, 0.5, 0.84]) final_param_values.append(quantile) final_param_erru.append(erru) final_param_errd.append(errd) except ValueError: try: plt.show() print("Inside: No dynamic range in kernel") final_param_values = [] final_param_erru = [] final_param_errd = [] for a in range(len(model_param_chain[0,0,:])): errd, quantile, erru = corner.quantile(modpar[:,a], [0.16,0.5,0.84]) final_param_values.append(quantile) final_param_erru.append(erru) final_param_errd.append(errd) for c in range(len(masses[0,0,:])): errd, quantile, erru = corner.quantile(masses[:,:,c], [0.16, 0.5, 0.84]) final_param_values.append(quantile) final_param_erru.append(erru) final_param_errd.append(errd) except ValueError: try: plt.show() print("Inside: No dynamic range in mass") final_param_values = [] final_param_erru = [] final_param_errd = [] for a in range(len(model_param_chain[0,0,:])): errd, quantile, erru = corner.quantile(modpar[:,a], [0.16,0.5,0.84]) final_param_values.append(quantile) final_param_erru.append(erru) final_param_errd.append(errd) for a in range(len(hparam_chain[0,0,:])): errd, quantile, erru = corner.quantile(hparam_chain[:,:,a], [0.16,0.5,0.84]) final_param_values.append(quantile) final_param_erru.append(erru) final_param_errd.append(errd) except ValueError: try: plt.show() print("Inside: No dynamic range in kernel or model") final_param_values = [] final_param_erru = [] final_param_errd = [] for c in range(len(masses[0,0,:])): errd, quantile, erru = corner.quantile(masses[:,:,c], [0.16, 0.5, 0.84]) final_param_values.append(quantile) final_param_erru.append(erru) final_param_errd.append(errd) except ValueError: try: plt.show() print("Inside: No dynamic range in kernel or mass") final_param_values = [] final_param_erru = [] final_param_errd = [] for a in range(len(model_param_chain[0,0,:])): errd, quantile, erru = corner.quantile(modpar[:,a], [0.16,0.5,0.84]) final_param_values.append(quantile) final_param_erru.append(erru) final_param_errd.append(errd) except ValueError: plt.show() print("Inside: No dynamic range in model or mass") final_param_values = [] final_param_erru = [] final_param_errd = [] for a in range(len(hparam_chain[0,0,:])): errd, quantile, erru = corner.quantile(hparam_chain[:,:,a], [0.16,0.5,0.84]) final_param_values.append(quantile) final_param_erru.append(erru) final_param_errd.append(errd) print("Parameter values after MCMC: ", final_param_values) if errors: return final_param_values, final_param_erru, final_param_errd else: return final_param_values
# keplerian only plot
[docs]def keplerian_only_plot(time, rv, hparam, kernel_name, model_list, model_param, rv_err = None, keplerian_number = 0, flags = None, xpred = None, residuals=False, xlabel='Time [BJD]', ylabel='RV [m/s]', legend = True, save_folder=None, savefilename=None, plot = True): """ Function to return a plot of the data with the GP and all models except from the chosen keplerian subtracted from it, with the chosen keplerian model plotted on top Parameters ---------- time: array of floats x-axis array of the observation times rv: array of floats y-axis array of the RVs hparam: dictionary dictionary of the hyperparameters kernel_name: string name of the kernel model_list: list of strings list of the models in use model_param: dictionary dictionary of model parameters rv_err: array of floats, optional array of rv errors, defaults to None keplerian_number: int, optional number representing the chosen keplerian to plot, 0 to plot first keplerian, 1 to plot second keplerian etc... defaults to None flags: array of floats, optional array of flags representing offsets generated by the combine_data function, defaults to None xpred: array of floats, optional array of prediced times to be used for the smooth model, defaults to intervals of 0.1 between the max and min times residuals: bool, optional enable the plotting of residuals, defaults to false xlabel: string, optional label of x-axis, defaults to "time [BJD]" ylabel: string, optional label of y-axis, defaults to "RV [m/s]" legend: bool, optional enable the plotting of a legend, defaults to True save_folder: string, optional folder to save the plot, defaults to None savefilename: string, optional name of the save file, defaults to None plot: bool, optional true returns the plot, false returns parameter and rv data, defaults to True Returns ------- plot of subtracted data and chosen keplerian model if plot is True parameter and rv data with optional residuals if plot is False """ # generate a predicted x array for the smooth models if none is given if xpred is None: xpred = np.arange(time.min()-1, time.max()+1, 0.1) # check if a keplerian is in the model if not 'keplerian' in model_list and not 'Keplerian' in model_list: raise KeyError("no keplerain in model") if len(model_list) == 1: # model list has been check for a keplerian so if len=1 that means the model is just one keplerian model_y = get_model(model_list, time, model_param, to_ecc=False) loglik = gp.GPLikelihood(time, rv, rv_err, hparam, kernel_name, model_y, model_param) GP_y, GP_err = loglik.predict(xpred) # smooth model generated based on xpred smooth_model_y = get_model(model_list, xpred, model_param, to_ecc=False) # interpolate the gp to get points relating to the times f = interp.interp1d(xpred, GP_y, kind='cubic', bounds_error=False) new_pred_y = f(time) # subtract the points relating to the time array from the rv data to get the pg subtracted data rv = (rv-new_pred_y) else: # if the length of the model list is not 1, ensure offsets are set up properly if flags is None: # no flags suggests no offsets are in use, check if this is the case for i in model_list: if i.startswith('off') or i.startswith('Off'): raise KeyError("flags must be provided if using offsets") if flags is not None: # flags suggests an offset model is in use, check if this is the case if 'offset' in model_list: pass elif 'Offset' in model_list: pass else: raise KeyError("offsets should be provided if flags are given") # if offsets are in use, set up the dictionary for plotting the offsets as different colours c_list = ['tab:blue', 'tab:red', 'tab:green', 'tab:purple', 'tab:brown', 'tab:pink'] c_dict = {'tab:blue':'Subtracted Dataset_1', 'tab:red':'Subtracted Dataset_2', 'tab:green':'Subtracted Dataset_3', 'tab:purple':'Subtracted Dataset_4', 'tab:brown':'Subtracted Dataset_5', 'tab:pink':'Subtracted Dataset_6'} c_array = [] for i in range(len(flags)): for a in range(len(c_list)): if flags[i] == a: c_array.append(c_list[a]) # c_array is an array in the same form as the flags but where numbers are replaced by colours c_array = np.array(c_array) # run the model_y and gp likelihood normally for the time array and model parameters model_y = get_model(model_list, time, model_param, to_ecc=False, flags = flags) loglik = gp.GPLikelihood(time, rv, rv_err, hparam, kernel_name, model_y, model_param) GP_y, GP_err = loglik.predict(xpred) # interpolate the gp to get points relating to the times f = interp.interp1d(xpred, GP_y, kind='cubic', bounds_error=False) new_pred_y = f(time) # subtract the points relating to the time array and then the model_y from the data to effectively get residuals, data with all models and the gp subtracted from it blank_rv = (rv-new_pred_y) blank_rv = blank_rv - model_y # identify the number of keplerians in the model list and create a new list of just those kep_num_list = [] for i in model_list: if i.startswith('kep') or i.startswith('Kep'): kep_num_list.append(i) # create a new model list of just one keplerian to be used when making the new model new_model_list = ["Keplerian"] # range kep_num_list ensures it will always iterate for the number of keplerians for i in range(len(kep_num_list)): if keplerian_number == i: # create a new model parameter dictionary based on the chosen keplerian new_model_param = {"P":model_param["P_"+str(i)], "K":model_param["K_"+str(i)], "ecc":model_param["ecc_"+str(i)], "omega":model_param["omega_"+str(i)], "t0":model_param["t0_"+str(i)]} model_param = new_model_param # create a smooth model and data model based on just this keplerian using the new model list and parameters for one keplerain smooth_model_y = get_model(new_model_list, xpred, model_param, to_ecc=False) new_data_model_y = get_model(new_model_list, time, model_param, to_ecc=False) # add the data model back onto the rv data as this was subtracted earlier as a part of subtracting the whole model rv = blank_rv + new_data_model_y if residuals is False: # set up one plot for no residuals fig = plt.figure(figsize = (10,7)) ax = fig.add_subplot(1,1,1) try: # try plotting multiple colours for if offsets are present rv_err = np.array(rv_err) for g in np.unique(c_array): ix = np.where(c_array == g) ax.errorbar(time[ix], rv[ix], yerr = rv_err[ix], fmt = '.', c = g, label = c_dict[g]) except: # plot data if no offsets are present ax.errorbar(time, rv, yerr = rv_err, fmt = '.', color = 'tab:blue', label = 'Subtracted Data') ax.plot(xpred, smooth_model_y, color = 'blue', label = 'Predicted Keplerian Model') ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) if legend is True: ax.legend(loc='upper left', borderaxespad=0, fontsize = 10) if residuals is True: # set up two plots for residuals fig, axs = plt.subplots(ncols=1, nrows=2, sharex=True, figsize=(10,7), gridspec_kw={'height_ratios': [3,1]}) fig.subplots_adjust(hspace=0) try: # try plotting multiple colours for if offsets are present rv_err = np.array(rv_err) for g in np.unique(c_array): ix = np.where(c_array == g) axs[0].errorbar(time[ix], rv[ix], yerr = rv_err[ix], fmt = '.', c = g, label = c_dict[g]) except: # plot data if no offsets are present axs[0].errorbar(time, rv, yerr = rv_err, fmt = '.', color = 'tab:blue', label = 'Subtracted Data') # plots for data and model on the first plot axs[0].plot(xpred, smooth_model_y, color = 'blue', label = 'Predicted Keplerian Model') axs[0].set_ylabel(ylabel) if legend is True: axs[0].legend(loc='upper left', borderaxespad=0, fontsize = 10) # interpolate the smooth model to get the points relating to the time array f = interp.interp1d(xpred, smooth_model_y, kind='cubic', bounds_error=False) new_pred_y = f(time) # subtract the points relating to the time array from the rv data to get residuals res = (rv-new_pred_y) try: # try plotting multiple colours for offsets on the second plot for g in np.unique(c_array): ix = np.where(c_array == g) axs[1].errorbar(time[ix], res[ix], yerr = rv_err[ix], fmt = '.', c = g) except: # plotting for no offsets on the second plot axs[1].errorbar(time, res, yerr = rv_err, fmt = '.', color = 'tab:blue') axs[1].set_ylabel("Residuals") axs[1].set_xlabel(xlabel) if save_folder is not None: assert savefilename is not None, "You need to give both save_folder and savefilename to save the figure" plt.savefig(str(save_folder)+"/"+str(savefilename)+".png", bbox_inches='tight') if savefilename is not None and save_folder is None: print("Figure not saved, you need to provide savefilename and save_folder both") if plot is True: plt.show() if plot is False: plt.close() if residuals is False: try: return model_param, rv, c_array, c_dict except: return model_param, rv if residuals is True: try: return model_param, rv, c_array, c_dict, res except: return model_param, rv, res
# phase folded plot
[docs]def phase_plot(time, rv, hparam, kernel_name, model_list, model_param, rv_err = None, keplerian_number = 0, flags = None, xpred = None, residuals=False, xlabel='Time [BJD]', ylabel='RV [m/s]', legend = True, save_folder=None, savefilename=None): """ Function to plot the phase folded rv curve of the chosen keplerian model along with the data representing that model Parameters ---------- time: array of floats x-axis array of the observation times rv: array of floats y-axis array of the RVs hparam: dictionary dictionary of the hyperparameters kernel_name: string name of the kernel model_list: list of strings list of the models in use model_param: dictionary dictionary of model parameters rv_err: array of floats, optional array of rv errors, defaults to None keplerian_number: int, optional number representing the chosen keplerian to plot, 0 to plot first keplerian, 1 to plot second keplerian etc... defaults to None flags: array of floats, optional array of flags representing offsets generated by the combine_data function, defaults to None xpred: array of floats, optional array of prediced times to be used for the smooth model, defaults to intervals of 0.1 between the max and min times residuals: bool, optional enable the plotting of residuals, defaults to false xlabel: string, optional label of x-axis, defaults to "time [BJD]" ylabel: string, optional label of y-axis, defaults to "RV [m/s]" legend: bool, optional enable the plotting of a legend, defaults to True save_folder: string, optional folder to save the plot, defaults to None savefilename: string, optional name of the save file, defaults to None Returns ------- phase-folded plot of the chosen keplerian along with the subtracted data """ # get essential data from keplerian only function depending on whether residuals and offsets are present if residuals is False: try: model_param, rv, c_array, c_dict = keplerian_only_plot(time, rv, hparam, kernel_name, model_list, model_param, rv_err, keplerian_number, flags, xpred, residuals, xlabel, ylabel, legend, save_folder, savefilename, plot = False) except: model_param, rv = keplerian_only_plot(time, rv, hparam, kernel_name, model_list, model_param, rv_err, keplerian_number, flags, xpred, residuals, xlabel, ylabel, legend, save_folder, savefilename, plot = False) if residuals is True: try: model_param, rv, c_array, c_dict, res = keplerian_only_plot(time, rv, hparam, kernel_name, model_list, model_param, rv_err, keplerian_number, flags, xpred, residuals, xlabel, ylabel, legend, save_folder, savefilename, plot = False) except: model_param, rv, res = keplerian_only_plot(time, rv, hparam, kernel_name, model_list, model_param, rv_err, keplerian_number, flags, xpred, residuals, xlabel, ylabel, legend, save_folder, savefilename, plot = False) # only one keplerian will be present in the model params due to how the keplerian only function works model_list = ["Keplerian"] # obtain period and t0 values to phase fold the time period = model_param["P"].value t0 = model_param["t0"].value # phase fold the times phase = (time - t0)/period # Want phase to be between 0 and 1 epoch = np.floor(phase) true_phase = phase - epoch # set the center at zero end = np.where(true_phase >= 0.5)[0] true_phase[end] -= 1.0 # create an xpred array in order to make a smooth model phase_xpred = np.arange(-0.5, 0.5, 0.001) # phase of the keplerian has effectively been set to 1 by the phase folding, set it to 1 to run the model new_model_param = {"P":0, "K":model_param["K"], "ecc":model_param["ecc"], "omega":model_param["omega"], "t0":0} new_model_param["P"] = par.parameter(1., 0., True) # similarly with t0 new_model_param["t0"] = par.parameter(0., 0., True) # generate the smooth model smooth_model_y = get_model(model_list, phase_xpred, new_model_param, to_ecc = False) # put the new phase folded times in order and order the RVs, c_array, and residuals in the same order if residuals is False: try: true_phase, rv, c_array = zip(*sorted(zip(true_phase, rv, c_array))) except: true_phase, rv = zip(*sorted(zip(true_phase, rv))) if residuals is True: try: true_phase, rv, c_array, res = zip(*sorted(zip(true_phase, rv, c_array, res))) except: true_phase, rv , res = zip(*sorted(zip(true_phase, rv, res))) # generate empty lists to hold information needed to extend the plot end_rv_num = [] start_rv_num = [] start_time = [] end_time = [] for N,i in enumerate(true_phase): if i < -0.4: # append the indices of all times less than -0.4 to an start list start_rv_num.append(N) # append all times less than -0.4 to an start list and add 1 to them so they extend the data start_time.append(i+1) if i > 0.4: # append the indices of all times larger than 0.4 to an end list end_rv_num.append(N) # append all times larger than 0.4 to an end list and minus 1 to them so they extend the data end_time.append(i-1) else: pass # start and end rv is a list of RVs correlating to those start and end time positions start_rv = rv[:start_rv_num[-1]+1] end_rv = rv[end_rv_num[0]:] # similarly for the rv_err start_yerr = rv_err[:start_rv_num[-1]+1] end_yerr = rv_err[end_rv_num[0]:] # create xpreds for extending the model end_xpred = np.arange(0.5, 0.6, 0.001) start_xpred = np.arange(-0.6, -0.5, 0.001) # generate extended models for the start and the end end_model = get_model(model_list, end_xpred, new_model_param, to_ecc = False) start_model = get_model(model_list, start_xpred, new_model_param, to_ecc = False) if residuals is False: fig = plt.figure(figsize = (10,7)) ax = fig.add_subplot(1,1,1) try: # try plotting multiple colours for if offsets are present rv_err = np.array(rv_err) true_phase = np.array(true_phase) rv = np.array(rv) c_array = np.array(c_array) for g in np.unique(c_array): ix = np.where(c_array == g) ax.errorbar(true_phase[ix], rv[ix], yerr = rv_err[ix], fmt = '.', c = g, label = c_dict[g]) except: # plot data if no offsets are present ax.errorbar(true_phase, rv, yerr = rv_err, fmt = '.', color = 'tab:blue', label = 'Subtracted Data') # plot the model for the data in blue ax.plot(phase_xpred, smooth_model_y, c = 'blue', label = 'Keplerian Model') # plot the extended data points and model in grey ax.errorbar(start_time, start_rv, yerr = start_yerr, fmt = '.', color = 'gray') ax.errorbar(end_time, end_rv, yerr = end_yerr, fmt = '.', color = 'gray') ax.plot(start_xpred, start_model, c = 'gray') ax.plot(end_xpred, end_model, c = 'gray') ax.set_ylabel(ylabel) ax.set_xlabel(xlabel) if legend is True: ax.legend(loc='upper left', borderaxespad=0, fontsize = 10) if residuals is True: # set up two plots for residuals fig, axs = plt.subplots(ncols=1, nrows=2, sharex=True, figsize=(10,7), gridspec_kw={'height_ratios': [3,1]}) fig.subplots_adjust(hspace=0) try: # try plotting multiple colours for if offsets are present rv_err = np.array(rv_err) true_phase = np.array(true_phase) rv = np.array(rv) c_array = np.array(c_array) for g in np.unique(c_array): ix = np.where(c_array == g) axs[0].errorbar(true_phase[ix], rv[ix], yerr = rv_err[ix], fmt = '.', c = g, label = c_dict[g]) except: # plot data if no offsets are present axs[0].errorbar(true_phase, rv, yerr = rv_err, fmt = '.', color = 'tab:blue', label = 'Subtracted Data') # plot the model for the data in blue axs[0].plot(phase_xpred, smooth_model_y, c = 'blue', label = 'Keplerian Model') # plot the extended data points and model in grey axs[0].errorbar(start_time, start_rv, yerr = start_yerr, fmt = '.', color = 'gray') axs[0].errorbar(end_time, end_rv, yerr = end_yerr, fmt = '.', color = 'gray') axs[0].plot(start_xpred, start_model, c = 'gray') axs[0].plot(end_xpred, end_model, c = 'gray') axs[0].set_ylabel(ylabel) if legend is True: axs[0].legend(loc='upper left', borderaxespad=0, fontsize = 10) try: # try plotting multiple colours for offsets on the second plot res = np.array(res) for g in np.unique(c_array): ix = np.where(c_array == g) axs[1].errorbar(true_phase[ix], res[ix], yerr = rv_err[ix], fmt = '.', c = g) except: # plotting for no offsets on the second plot axs[1].errorbar(true_phase, res, yerr = rv_err, fmt = '.', color = 'tab:blue') start_res = res[:start_rv_num[-1]+1] end_res = res[end_rv_num[0]:] # plotting extended residuals axs[1].scatter(start_time, start_res, c = 'gray', s = 10) axs[1].scatter(end_time, end_res, c = 'gray', s = 10) axs[1].set_ylabel("Residuals") axs[1].set_xlabel(xlabel) if save_folder is not None: assert savefilename is not None, "You need to give both save_folder and savefilename to save the figure" plt.savefig(str(save_folder)+"/"+str(savefilename)+".png", bbox_inches='tight') if savefilename is not None and save_folder is None: print("Figure not saved, you need to provide savefilename and save_folder both") plt.show()