"""
Various plotting functions to allow the user to interpret data from the mcmc
"""
# Contains:
# offset_subtract function
# data_plot function
# gp_plot function
# mixing_plot function
# corner_plot function
# keplerian_only_plot function
# phase_plot function
#
#
# Author: Federica Rescigno, Bryce Dixon
# Version: 22.08.2023
import matplotlib.pyplot as plt
import numpy as np
import scipy.interpolate as interp
import magpy_rv.auxiliary as aux
import magpy_rv.gp_likelihood as gp
from magpy_rv.mcmc_aux import get_model
import magpy_rv.parameters as par
###### allows me not to show plots
# matplotlib.use('Agg')
plt.rcParams.update({'font.size': 10})
[docs]def offset_subtract(rv, flags, offsets):
"""Function to return the RVs with offsets subtracted
Parameters
----------
rv: array of floats
y-axis array of the RVs
flags: array of floats
array of flags representing offsets generated by the combine_data function
offsets: list or tuple of floats
list or tuple of offsets ordered accordingly to the inputs in the combine_data function
Returns
-------
y: array of floats
array of RVs with offsets subtracted
"""
y_off = []
for i in range(len(flags)):
if flags[i] == 0.:
y_off.append(0.)
else:
for a in range(len(offsets)):
off = a+1.
if flags[i] == off:
y_off.append(offsets[a])
y = rv - y_off
return y
# data plot function
[docs]def data_plot(time, rv, xlabel = "time [BJD]", ylabel = "RV [m/s]", legend = True, y_err = None, flags = None, offsets = None, save_folder = None, savefilename = None):
"""
Function to plot the rv data against the times with any given offsets subtracted from the data. It accepts a maximum of 6 offsets.
Parameters
----------
time: array of floats
x-axis array of the observation times
rv: array of floats
y-axis array of the RVs
xlabel: string, optional
label of x-axis, defaults to "time [BJD]"
ylabel: string, optional
label of y-axis, defaults to "RV [m/s]"
legend: bool, optional
enable the plotting of a legend, defaults to True
y_err: array of floats, optional
array of rv errors, defaults to None
flags: array of floats, optional
array of flags representing offsets generated by the combine_data function, defaults to None
offsets: list or tuple of floats, optional
list or tuple of offsets ordered accordingly to the inputs in the combine_data function, defaults to None
save_folder: string, optional
folder to save the plot, defaults to None
savefilename: string, optional
name of the save file, defaults to None
Returns
-------
rv plot with any offsets subtracted from the data
"""
# set up the figure
fig = plt.figure(figsize = (10,7))
ax = fig.add_subplot(1,1,1)
# simple plot for no offsets
if offsets is None and flags is None:
ax.errorbar(time, rv, yerr = y_err, fmt = '.', color = 'tab:blue', label = 'Data')
ax.set_xlabel(xlabel)
ax.set_ylabel(ylabel)
if legend is True:
ax.legend(loc='upper left', borderaxespad=0, fontsize = 10)
elif offsets is None and flags is not None:
raise KeyError("offsets must be provided when flags are given")
elif offsets is not None and flags is None:
raise KeyError("flags must be provided if offsets are given")
else:
# offset subtract function to get subtracted y
y = offset_subtract(rv, flags, offsets)
# set up list of colours for points, currently can support 6 datasets
c_list = ['tab:blue', 'tab:red', 'tab:green', 'tab:purple', 'tab:brown', 'tab:pink']
c_dict = {'tab:blue':'Dataset 1', 'tab:red':'Dataset 2', 'tab:green':'Dataset 3', 'tab:purple':'Dataset 4', 'tab:brown':'Dataset 5', 'tab:pink':'Dataset 6'}
c_array = []
for i in range(len(flags)):
for a in range(len(c_list)):
if flags[i] == a:
c_array.append(c_list[a])
# c_array is an array in the same form as the flags but where numbers are replaced by colours
c_array = np.array(c_array)
rv_err = np.array(y_err)
for g in np.unique(c_array):
# plot the data in different colours and labeled depending on its offset
ix = np.where(c_array == g)
ax.errorbar(time[ix], y[ix], yerr = rv_err[ix], fmt = '.', c = g, label = c_dict[g])
ax.set_xlabel(xlabel, fontsize = 10)
ax.set_ylabel(ylabel, fontsize = 10)
if legend is True:
ax.legend(loc='upper left', borderaxespad=0, fontsize = 10)
if save_folder is not None:
assert savefilename is not None, "You need to give both save_folder and savefilename to save the figure"
plt.savefig(str(save_folder)+"/"+str(savefilename)+".png", bbox_inches='tight')
if savefilename is not None and save_folder is None:
print("Figure not saved, you need to provide savefilename and save_folder both")
plt.show()
# gp plot funciton
[docs]def GP_plot(time, rv, hparam, kernel_name, rv_err = None, model_list = None, model_param = None, flags = None, xpred = None, residuals=False, xlabel='Time [BJD]', ylabel='RV [m/s]', legend = True, save_folder=None, savefilename=None):
"""
Function to plot the rv data against the times with any offsets subtracted and the gp and models plotted on top
Parameters
----------
time: array of floats
x-axis array of the observation times
rv: array of floats
y-axis array of the RVs
hparam: dictionary
dictionary of the hyperparameters
kernel_name: string
name of the kernel
rv_err: array of floats, optional
array of rv errors, defaults to None
model_list: list of strings, optional
list of the models in use, defaults to None
model_param: dictionary, optional
dictionary of model parameters, defaults to None
flags: array of floats, optional
array of flags representing offsets generated by the combine_data function, defaults to None
xpred: array of floats, optional
array of prediced times to be used for the smooth model, defaults to intervals of 0.1 between the max and min times
residuals: bool, optional
enable the plotting of residuals, defaults to false
xlabel: string, optional
label of x-axis, defaults to "time [BJD]"
ylabel: string, optional
label of y-axis, defaults to "RV [m/s]"
legend: bool, optional
enable the plotting of a legend, defaults to True
save_folder: string, optional
folder to save the plot, defaults to None
savefilename: string, optional
name of the save file, defaults to None
Returns
-------
rv plot with the predicted GP and any offsets subtracted from the data along with any models and residuals
"""
# generate a predicted x array for the smooth models if none is given
if xpred is None:
xpred = np.arange(time.min()-1, time.max()+1, 0.1)
if model_list is None and model_param is None:
# if no model is in use there should be no flags and residuals
assert flags is None, "flags should not be provided if no model is in use"
# calculate GP_y for no model
loglik = gp.GPLikelihood(time, rv, rv_err, hparam, kernel_name)
GP_y, GP_err = loglik.predict(xpred)
if residuals is False:
fig = plt.figure(figsize = (10,7))
ax = fig.add_subplot(1,1,1)
# plots for no model
ax.errorbar(time, rv, yerr = rv_err, fmt = '.', color = 'tab:blue', label = 'Data')
ax.plot(xpred, GP_y, linestyle = '--', color = 'orange', label = 'Predicted GP')
ax.fill_between(xpred, GP_y+GP_err, GP_y-GP_err, alpha=0.5, color='gray')
ax.set_xlabel(xlabel)
ax.set_ylabel(ylabel)
if legend is True:
ax.legend(loc='upper left', borderaxespad=0, fontsize = 10)
if residuals is True:
# set up two plots for residuals
fig, ax = plt.subplots(ncols=1, nrows=2, sharex=True, figsize=(10,7), gridspec_kw={'height_ratios': [3,1]})
fig.subplots_adjust(hspace=0)
ax[0].errorbar(time, rv, yerr = rv_err, fmt = '.', color = 'tab:blue', label = 'Data')
ax[0].plot(xpred, GP_y, linestyle = '--', color = 'tab:orange', label = 'Predicted GP')
ax[0].fill_between(xpred, GP_y+GP_err, GP_y-GP_err, alpha=0.3, color='tab:orange')
ax[0].set_ylabel(ylabel)
if legend is True:
ax[0].legend(loc='upper left', borderaxespad=0, fontsize = 10)
# interpolate the smooth GP to get the points relating to the time array
f = interp.interp1d(xpred, GP_y, kind='cubic')
new_pred_y = f(time)
# subtract the points relating to the time array from the rv data to get residuals
res = (rv-new_pred_y)
ax[1].errorbar(time, res, yerr = rv_err, fmt = '.', color = 'tab:blue')
ax[1].set_ylabel("Residuals")
ax[1].set_xlabel(xlabel)
# check inputs are correct
elif model_list is None and model_param is not None:
raise KeyError("model_list must be provided alongside model_param if a model is in use")
elif model_list is not None and model_param is None:
raise KeyError("model_param must be provided alongside model_list if a model is in use")
else:
# if a model is in use
if flags is None:
# no flags suggests no offsets are in use, check if this is the case
for i in model_list:
if i.startswith('off') or i.startswith('Off'):
raise KeyError("flags must be provided if using offsets")
else:
pass
# ganerate model and GP_y for no offsets
model_y = get_model(model_list, time, model_param, to_ecc=False)
loglik = gp.GPLikelihood(time, rv, rv_err, hparam, kernel_name, model_y, model_param)
GP_y, GP_err = loglik.predict(xpred)
# smooth model generated based on xpred
smooth_model_y = get_model(model_list, xpred, model_param, to_ecc=False)
# GP_y + smooth_model_y will be plotted
gp_mod_y = smooth_model_y + GP_y
if flags is not None:
# flags suggests an offset model is in use, check if this is the case
if 'offset' in model_list:
pass
elif 'Offset' in model_list:
pass
else:
raise KeyError("offsets should be provided if flags are given")
# GP_y can be generated from the model_y before it is adjusted for offsets
model_y = get_model(model_list, time, model_param, to_ecc=False, flags = flags)
loglik = gp.GPLikelihood(time, rv, rv_err, hparam, kernel_name, model_y, model_param)
GP_y, GP_err = loglik.predict(xpred)
# put the offset values in a list to be entered into the offset_subtract function
offsets = []
try:
off = model_param['offset'].value
offsets.append(off)
except:
for i in range(len(model_list)):
try:
off = model_param['offset_'+str(i)].value
offsets.append(off)
except:
continue
# obtain the offset subtracted rv data
rv = offset_subtract(rv, flags, offsets)
# create a new model list without offsets to be used in the new model y
new_model_list = []
for i in model_list:
if i.startswith('off') or i.startswith('Off'):
continue
else:
new_model_list.append(i)
# create a new model parameter dictionary without offsets so they can be used with the new model y
new_model_param = dict(model_param)
mod_key = list(new_model_param.keys())
for i in mod_key:
if i.startswith('off'):
del new_model_param[i]
# smooth_model_y is generated from the offset subtracted data so there is no need to have offsets in the model parameters or model list any more
smooth_model_y = get_model(new_model_list, xpred, new_model_param, to_ecc=False)
gp_mod_y = smooth_model_y + GP_y
# set up the list of colours for points, currently can support 6 datasets
c_list = ['tab:blue', 'tab:red', 'tab:green', 'tab:purple', 'tab:brown', 'tab:pink']
c_dict = {'tab:blue':'Dataset_1', 'tab:red':'Dataset_2', 'tab:green':'Dataset_3', 'tab:purple':'Dataset_4', 'tab:brown':'Dataset_5', 'tab:pink':'Dataset_6'}
c_array = []
for i in range(len(flags)):
for a in range(len(c_list)):
if flags[i] == a:
c_array.append(c_list[a])
# c_array is an array in the same form as the flags but where numbers are replaced by colours
c_array = np.array(c_array)
if residuals is False:
# set up one plot for no residuals
fig = plt.figure(figsize = (10,7))
ax = fig.add_subplot(1,1,1)
try:
# try plotting multiple colours for if offsets are present
rv_err = np.array(rv_err)
for g in np.unique(c_array):
ix = np.where(c_array == g)
ax.errorbar(time[ix], rv[ix], yerr = rv_err[ix], fmt = '.', c = g, label = c_dict[g])
except:
# plot data if no offsets are present
ax.errorbar(time, rv, yerr = rv_err, fmt = '.', color = 'tab:blue', label = 'Data')
# plots for predicted GP and model+GP
ax.plot(xpred, GP_y, linestyle = '--', color = 'tab:orange', label = 'Predicted GP')
ax.fill_between(xpred, gp_mod_y+GP_err, gp_mod_y-GP_err, alpha=0.5, color='gray')
ax.plot(xpred, gp_mod_y, color = 'gray', label = 'Predicted Model+GP')
ax.set_xlabel(xlabel)
ax.set_ylabel(ylabel)
if legend is True:
ax.legend(loc='upper left', borderaxespad=0, fontsize = 10)
if residuals is True:
# set up two plots for residuals
fig, axs = plt.subplots(ncols=1, nrows=2, sharex=True, figsize=(10,7), gridspec_kw={'height_ratios': [3,1]})
fig.subplots_adjust(hspace=0)
try:
# try plotting multiple colours for if offsets are present
rv_err = np.array(rv_err)
for g in np.unique(c_array):
ix = np.where(c_array == g)
axs[0].errorbar(time[ix], rv[ix], yerr = rv_err[ix], fmt = '.', c = g, label = c_dict[g])
except:
# plot data if no offsets are present
axs[0].errorbar(time, rv, yerr = rv_err, fmt = '.', color = 'tab:blue', label = 'Data')
# plots for predicted GP and model+GP on the first plot
axs[0].plot(xpred, GP_y, linestyle = '--', color = 'orange', label = 'Predicted GP')
axs[0].plot(xpred, gp_mod_y, color = 'gray', label = 'Predicted Model+GP')
axs[0].fill_between(xpred, gp_mod_y+GP_err, gp_mod_y-GP_err, alpha=0.5, color='gray')
axs[0].tick_params(axis='y', labelsize=10)
axs[0].set_ylabel(ylabel, size = 10)
if legend is True:
axs[0].legend(loc='upper left', borderaxespad=0, fontsize = 10)
# interpolate the smooth GP+model to get the points relating to the time array
f = interp.interp1d(xpred, gp_mod_y, kind='cubic')
new_pred_y = f(time)
# subtract the points relating to the time array from the rv data to get residuals
res = (rv-new_pred_y)
try:
# try plotting multiple colours for offsets on the second plot
for g in np.unique(c_array):
ix = np.where(c_array == g)
axs[1].errorbar(time[ix], res[ix], yerr = rv_err[ix], fmt = '.', c = g)
except:
# plotting for no offsets on the second plot
axs[1].errorbar(time, res, yerr = rv_err, fmt = '.', color = 'tab:blue')
axs[1].set_ylabel("Residuals", size = 10)
axs[1].set_xlabel(xlabel, size = 10)
axs[1].tick_params(axis='y', labelsize=10)
axs[1].tick_params(axis='x', labelsize=10)
if save_folder is not None:
assert savefilename is not None, "You need to give both save_folder and savefilename to save the figure"
plt.savefig(str(save_folder)+"/"+str(savefilename)+".png", bbox_inches='tight')
if savefilename is not None and save_folder is None:
print("Figure not saved, you need to provide savefilename and save_folder both")
plt.show()
# mixing plots
[docs]def mixing_plot(hparam_chain, kernel_name, model_param_chain, model_name, LogL_chain, mass = None, save_folder=None, savefilename="mixing"):
'''
Function to plot the mixing chains from the mcmc for each parameter
Parameters
----------
hparam_chain : array
Array of all the sets of hyperparameters of the MCMC, now in chains
kernel_name : string
Name of the kernel
model_param_chain : array
Array of all the sets of model parameters of the MCMC, now in chains
model_name : string
Name of the model
LogL_chain : array
Array containing all the Log Likelihood
mass: array, optional
Array of all the masses for all planets produced by the MCMC, defaults to None
save_folder : str, optional
Folder to save the plot, by default None. If None, the plot is not saved.
savefilename : str, optional
Name of the file to save the plot, by default Mixing.
Returns
-------
Mixing plots for each parameter
'''
iterations = len(LogL_chain[:,0,0])-1
numb_chains = len(LogL_chain[0,:,0])
hparam_names = aux.hparam_names(kernel_name)
model_param_names = aux.model_param_names(model_name, SkCk=True)
if mass is None:
xs = list(range(iterations+1))
n_subplots = len(hparam_chain[0,0,:])+len(model_param_chain[0,0,:])+1
fig, axs = plt.subplots(n_subplots, sharex=True, figsize=(15,15))
fig.subplots_adjust(hspace=0.)
axs[0].set_title("Mixing Chains")
plt.xlabel("Number of iterations")
for chain in range(numb_chains):
for i in range(n_subplots):
if i == 0:
axs[i].plot(xs, LogL_chain[:, chain, :], c='xkcd:bluish', alpha=0.2)
axs[i].set_ylabel(r"log$\mathcal{L}$")
if i != 0 and i <= len(hparam_chain[0, 0, :]):
axs[i].plot(xs, hparam_chain[:, chain, i-1], c='xkcd:bluish', alpha=0.2)
axs[i].set_ylabel("{}".format(hparam_names[i-1]))
if i != 0 and i > len(hparam_chain[0, 0, :]):
axs[i].plot(xs, model_param_chain[:, chain, i-1-len(hparam_chain[0, 0, :])], c='xkcd:bluish', alpha=0.2)
axs[i].set_ylabel("{}".format(model_param_names[i-1-len(hparam_chain[0, 0, :])]))
else:
xs = list(range(iterations+1))
n_subplots = len(hparam_chain[0,0,:])+len(model_param_chain[0,0,:])+len(mass[0,0,:])+1
fig, axs = plt.subplots(n_subplots, sharex=True, figsize=(15,15))
fig.subplots_adjust(hspace=0.)
axs[0].set_title("Mixing Chains")
plt.xlabel("Number of iterations")
for chain in range(numb_chains):
for i in range(n_subplots):
if i == 0:
axs[i].plot(xs, LogL_chain[:, chain, :], c='xkcd:bluish', alpha=0.2)
axs[i].set_ylabel(r"log$\mathcal{L}$")
if i != 0 and i <= len(hparam_chain[0, 0, :]):
axs[i].plot(xs, hparam_chain[:, chain, i-1], c='xkcd:bluish', alpha=0.2)
axs[i].set_ylabel("{}".format(hparam_names[i-1]))
if i != 0 and i > len(hparam_chain[0, 0, :]) and i <= len(hparam_chain[0, 0, :])+len(model_param_chain[0, 0, :]):
axs[i].plot(xs, model_param_chain[:, chain, i-1-len(hparam_chain[0, 0, :])], c='xkcd:bluish', alpha=0.2)
axs[i].set_ylabel("{}".format(model_param_names[i-1-len(hparam_chain[0, 0, :])]))
if i != 0 and i > len(hparam_chain[0, 0, :])+len(model_param_chain[0, 0, :]):
if len(mass[0,0,:]) == 1 and len(model_name) == 1:
axs[i].plot(xs, mass[:, chain, 0], c='xkcd:bluish', alpha = 0.2)
axs[i].set_ylabel("mass")
else:
axs[i].plot(xs, mass[:, chain, i-1-len(hparam_chain[0,0,:])-len(model_param_chain[0,0,:])], c='xkcd:bluish', alpha = 0.2)
axs[i].set_ylabel(r"mass$_{}$".format(i-1-len(hparam_chain[0,0,:])-len(model_param_chain[0,0,:])))
if save_folder is not None:
assert savefilename is not None, "You need to give both save_folder and savefilename to save the figure"
plt.savefig(str(save_folder)+"/"+str(savefilename)+".png")
plt.show()
# corner plots
[docs]def corner_plot(hparam_chain, kernel_name, model_param_chain, model_name, masses = None, save_folder=None, savefilename="corner", errors=False):
'''
Function to plot the posteriors of each parameter from the mcmc as corner plots.
If there is no dynamic range in any of the parameters the plots will automatically show up as empty.
Parameters
----------
hparam_chain : array
Array of all the sets of hyperparameters of the MCMC
kernel_name : string
Name of the kernel
model_param_chain : array
Array of all the sets of model parameters of the MCMC.
model_name : string
Name of the model
masses: array, optional
Array of all the masses for all planets produced by the MCMC, defaults to None
save_folder : str, optional
Folder to save the plot, by default None. If None, the plot is not saved.
savefilename : str, optional
Name of the file to save the plot, by default Corner.
Returns
-------
Corner plots of the posteriors of each parameter
'''
import corner
hparam_names = aux.hparam_names(kernel_name)
model_param_names = aux.model_param_names(model_name, SkCk=True)
# Resizing of arrays: create 2d array, nrows=iteration*chians, ncols = nparam
hp = np.array(hparam_chain)
shapes = hp.shape
#print("shape",shapes)
numb_chains = shapes[1]
nparam = shapes[2]
depth = shapes[0]
hparams = np.zeros((((depth)*numb_chains),nparam))
for p in range(nparam):
numb=0
for c in range(numb_chains):
for i in range(depth):
hparams[numb,p] = hparam_chain[i,c,p]
numb += 1
par = np.array(model_param_chain)
shapes2 = par.shape
#print("shape",shapes)
numb_chains2 = shapes2[1]
nparam2 = shapes2[2]
depth2 = shapes2[0]
modpar = np.zeros((((depth2)*numb_chains2),nparam2))
for p in range(nparam2):
numb=0
for c in range(numb_chains2):
for i in range(depth2):
modpar[numb,p] = model_param_chain[i,c,p]
numb += 1
if masses is not None:
mass = np.array(masses)
shapes3 = mass.shape
numb_chains3 = shapes3[1]
nparam3 = shapes3[2]
depth3 = shapes3[0]
massval = np.zeros((((depth3)*numb_chains3), nparam3))
for p in range(nparam3):
numb = 0
for c in range(numb_chains3):
for i in range(depth2):
massval[numb][p] = masses[i,c,p]
numb += 1
mass_list = []
if len(masses[0,0,:]) == 1 and len(model_name) == 1:
name = "mass"
mass_list.append(name)
else:
for i in range(len(masses[0,0,:])):
name = r"mass$_{}$".format(i)
mass_list.append(name)
CORNER_KWARGS = dict(label_kwargs=dict(fontsize=10), title_kwargs=dict(fontsize=10))
# Corner plot of the hyperparameters
try:
fig = corner.corner(hparams, labels = hparam_names, show_titles=True, **CORNER_KWARGS)
if save_folder is not None:
assert savefilename is not None, "You need to give both save_folder and savefilename to save the figure"
plt.savefig(str(save_folder)+"/"+str(savefilename)+"_hparam"+".pdf")
plt.show()
except ValueError:
print("Inside: No dynamic range in kernel")
try:
# Corner plot of model parameters
fig = corner.corner(modpar, labels=model_param_names, show_titles=True, **CORNER_KWARGS)
if save_folder is not None:
assert savefilename is not None, "You need to give both save_folder and savefilename to save the figure"
plt.savefig(str(save_folder)+"/"+str(savefilename)+"_modelpar"+".pdf")
plt.show()
except ValueError:
print("Inside: No dynamic range in model")
if masses is not None:
try:
#corner plot of the masses
fig = corner.corner(massval, labels=mass_list, show_titles=True, **CORNER_KWARGS)
if save_folder is not None:
assert savefilename is not None, "You need to give both save_folder and savefilename to save the figure"
plt.savefig(str(save_folder)+"/"+str(savefilename)+"_masses"+".pdf")
plt.show()
except ValueError:
print("insideL No dynamic range in masses")
# Full corner plot
if masses is None:
try:
full_param_chain = np.concatenate((hparams, modpar), axis=1)
full_names = hparam_names + model_param_names
fig = corner.corner(full_param_chain, labels=full_names, show_titles=True, **CORNER_KWARGS)
if save_folder is not None:
assert savefilename is not None, "You need to give both save_folder and savefilename to save the figure"
plt.savefig(str(save_folder)+"/"+str(savefilename)+"_full"+".pdf")
plt.show()
final_param_values = []
final_param_erru = []
final_param_errd = []
for a in range(len(hparam_chain[0,0,:])):
errd, quantile, erru = corner.quantile(hparam_chain[:,:,a], [0.16,0.5,0.84])
final_param_values.append(quantile)
final_param_erru.append(erru)
final_param_errd.append(errd)
for b in range(len(model_param_chain[0,0,:])):
errd, quantile, erru = corner.quantile(modpar[:,b], [0.16,0.5,0.84])
final_param_values.append(quantile)
final_param_erru.append(erru)
final_param_errd.append(errd)
except ValueError:
try:
plt.show()
print("Inside: No dynamic range in model")
final_param_values = []
final_param_erru = []
final_param_errd = []
for a in range(len(hparam_chain[0,0,:])):
errd, quantile, erru = corner.quantile(hparam_chain[:,:,a], [0.16,0.5,0.84])
final_param_values.append(quantile)
final_param_erru.append(erru)
final_param_errd.append(errd)
except ValueError:
plt.show()
print("Inside: No dynamic range in kernel")
final_param_values = []
final_param_erru = []
final_param_errd = []
for a in range(len(model_param_chain[0,0,:])):
errd, quantile, erru = corner.quantile(modpar[:,a], [0.16,0.5,0.84])
final_param_values.append(quantile)
final_param_erru.append(erru)
final_param_errd.append(errd)
else:
try:
full_param_chain = np.concatenate((hparams, modpar, massval), axis=1)
full_names = hparam_names + model_param_names + mass_list
fig = corner.corner(full_param_chain, labels=full_names, show_titles=True, **CORNER_KWARGS)
if save_folder is not None:
assert savefilename is not None, "You need to give both save_folder and savefilename to save the figure"
plt.savefig(str(save_folder)+"/"+str(savefilename)+"_full"+".pdf")
plt.show()
final_param_values = []
final_param_erru = []
final_param_errd = []
for a in range(len(hparam_chain[0,0,:])):
errd, quantile, erru = corner.quantile(hparam_chain[:,:,a], [0.16,0.5,0.84])
final_param_values.append(quantile)
final_param_erru.append(erru)
final_param_errd.append(errd)
for b in range(len(model_param_chain[0,0,:])):
errd, quantile, erru = corner.quantile(modpar[:,b], [0.16,0.5,0.84])
final_param_values.append(quantile)
final_param_erru.append(erru)
final_param_errd.append(errd)
for c in range(len(masses[0,0,:])):
errd, quantile, erru = corner.quantile(masses[:,:,c], [0.16, 0.5, 0.84])
final_param_values.append(quantile)
final_param_erru.append(erru)
final_param_errd.append(errd)
except ValueError:
try:
plt.show()
print("Inside: No dynamic range in model")
final_param_values = []
final_param_erru = []
final_param_errd = []
for a in range(len(hparam_chain[0,0,:])):
errd, quantile, erru = corner.quantile(hparam_chain[:,:,a], [0.16,0.5,0.84])
final_param_values.append(quantile)
final_param_erru.append(erru)
final_param_errd.append(errd)
for c in range(len(masses[0,0,:])):
errd, quantile, erru = corner.quantile(masses[:,:,c], [0.16, 0.5, 0.84])
final_param_values.append(quantile)
final_param_erru.append(erru)
final_param_errd.append(errd)
except ValueError:
try:
plt.show()
print("Inside: No dynamic range in kernel")
final_param_values = []
final_param_erru = []
final_param_errd = []
for a in range(len(model_param_chain[0,0,:])):
errd, quantile, erru = corner.quantile(modpar[:,a], [0.16,0.5,0.84])
final_param_values.append(quantile)
final_param_erru.append(erru)
final_param_errd.append(errd)
for c in range(len(masses[0,0,:])):
errd, quantile, erru = corner.quantile(masses[:,:,c], [0.16, 0.5, 0.84])
final_param_values.append(quantile)
final_param_erru.append(erru)
final_param_errd.append(errd)
except ValueError:
try:
plt.show()
print("Inside: No dynamic range in mass")
final_param_values = []
final_param_erru = []
final_param_errd = []
for a in range(len(model_param_chain[0,0,:])):
errd, quantile, erru = corner.quantile(modpar[:,a], [0.16,0.5,0.84])
final_param_values.append(quantile)
final_param_erru.append(erru)
final_param_errd.append(errd)
for a in range(len(hparam_chain[0,0,:])):
errd, quantile, erru = corner.quantile(hparam_chain[:,:,a], [0.16,0.5,0.84])
final_param_values.append(quantile)
final_param_erru.append(erru)
final_param_errd.append(errd)
except ValueError:
try:
plt.show()
print("Inside: No dynamic range in kernel or model")
final_param_values = []
final_param_erru = []
final_param_errd = []
for c in range(len(masses[0,0,:])):
errd, quantile, erru = corner.quantile(masses[:,:,c], [0.16, 0.5, 0.84])
final_param_values.append(quantile)
final_param_erru.append(erru)
final_param_errd.append(errd)
except ValueError:
try:
plt.show()
print("Inside: No dynamic range in kernel or mass")
final_param_values = []
final_param_erru = []
final_param_errd = []
for a in range(len(model_param_chain[0,0,:])):
errd, quantile, erru = corner.quantile(modpar[:,a], [0.16,0.5,0.84])
final_param_values.append(quantile)
final_param_erru.append(erru)
final_param_errd.append(errd)
except ValueError:
plt.show()
print("Inside: No dynamic range in model or mass")
final_param_values = []
final_param_erru = []
final_param_errd = []
for a in range(len(hparam_chain[0,0,:])):
errd, quantile, erru = corner.quantile(hparam_chain[:,:,a], [0.16,0.5,0.84])
final_param_values.append(quantile)
final_param_erru.append(erru)
final_param_errd.append(errd)
print("Parameter values after MCMC: ", final_param_values)
if errors:
return final_param_values, final_param_erru, final_param_errd
else:
return final_param_values
# keplerian only plot
[docs]def keplerian_only_plot(time, rv, hparam, kernel_name, model_list, model_param, rv_err = None, keplerian_number = 0, flags = None, xpred = None, residuals=False, xlabel='Time [BJD]', ylabel='RV [m/s]', legend = True, save_folder=None, savefilename=None, plot = True):
"""
Function to return a plot of the data with the GP and all models except from the chosen keplerian subtracted from it, with the chosen keplerian model plotted on top
Parameters
----------
time: array of floats
x-axis array of the observation times
rv: array of floats
y-axis array of the RVs
hparam: dictionary
dictionary of the hyperparameters
kernel_name: string
name of the kernel
model_list: list of strings
list of the models in use
model_param: dictionary
dictionary of model parameters
rv_err: array of floats, optional
array of rv errors, defaults to None
keplerian_number: int, optional
number representing the chosen keplerian to plot, 0 to plot first keplerian, 1 to plot second keplerian etc... defaults to None
flags: array of floats, optional
array of flags representing offsets generated by the combine_data function, defaults to None
xpred: array of floats, optional
array of prediced times to be used for the smooth model, defaults to intervals of 0.1 between the max and min times
residuals: bool, optional
enable the plotting of residuals, defaults to false
xlabel: string, optional
label of x-axis, defaults to "time [BJD]"
ylabel: string, optional
label of y-axis, defaults to "RV [m/s]"
legend: bool, optional
enable the plotting of a legend, defaults to True
save_folder: string, optional
folder to save the plot, defaults to None
savefilename: string, optional
name of the save file, defaults to None
plot: bool, optional
true returns the plot, false returns parameter and rv data, defaults to True
Returns
-------
plot of subtracted data and chosen keplerian model if plot is True
parameter and rv data with optional residuals if plot is False
"""
# generate a predicted x array for the smooth models if none is given
if xpred is None:
xpred = np.arange(time.min()-1, time.max()+1, 0.1)
# check if a keplerian is in the model
if not 'keplerian' in model_list and not 'Keplerian' in model_list:
raise KeyError("no keplerain in model")
if len(model_list) == 1:
# model list has been check for a keplerian so if len=1 that means the model is just one keplerian
model_y = get_model(model_list, time, model_param, to_ecc=False)
loglik = gp.GPLikelihood(time, rv, rv_err, hparam, kernel_name, model_y, model_param)
GP_y, GP_err = loglik.predict(xpred)
# smooth model generated based on xpred
smooth_model_y = get_model(model_list, xpred, model_param, to_ecc=False)
# interpolate the gp to get points relating to the times
f = interp.interp1d(xpred, GP_y, kind='cubic', bounds_error=False)
new_pred_y = f(time)
# subtract the points relating to the time array from the rv data to get the pg subtracted data
rv = (rv-new_pred_y)
else:
# if the length of the model list is not 1, ensure offsets are set up properly
if flags is None:
# no flags suggests no offsets are in use, check if this is the case
for i in model_list:
if i.startswith('off') or i.startswith('Off'):
raise KeyError("flags must be provided if using offsets")
if flags is not None:
# flags suggests an offset model is in use, check if this is the case
if 'offset' in model_list:
pass
elif 'Offset' in model_list:
pass
else:
raise KeyError("offsets should be provided if flags are given")
# if offsets are in use, set up the dictionary for plotting the offsets as different colours
c_list = ['tab:blue', 'tab:red', 'tab:green', 'tab:purple', 'tab:brown', 'tab:pink']
c_dict = {'tab:blue':'Subtracted Dataset_1', 'tab:red':'Subtracted Dataset_2', 'tab:green':'Subtracted Dataset_3', 'tab:purple':'Subtracted Dataset_4', 'tab:brown':'Subtracted Dataset_5', 'tab:pink':'Subtracted Dataset_6'}
c_array = []
for i in range(len(flags)):
for a in range(len(c_list)):
if flags[i] == a:
c_array.append(c_list[a])
# c_array is an array in the same form as the flags but where numbers are replaced by colours
c_array = np.array(c_array)
# run the model_y and gp likelihood normally for the time array and model parameters
model_y = get_model(model_list, time, model_param, to_ecc=False, flags = flags)
loglik = gp.GPLikelihood(time, rv, rv_err, hparam, kernel_name, model_y, model_param)
GP_y, GP_err = loglik.predict(xpred)
# interpolate the gp to get points relating to the times
f = interp.interp1d(xpred, GP_y, kind='cubic', bounds_error=False)
new_pred_y = f(time)
# subtract the points relating to the time array and then the model_y from the data to effectively get residuals, data with all models and the gp subtracted from it
blank_rv = (rv-new_pred_y)
blank_rv = blank_rv - model_y
# identify the number of keplerians in the model list and create a new list of just those
kep_num_list = []
for i in model_list:
if i.startswith('kep') or i.startswith('Kep'):
kep_num_list.append(i)
# create a new model list of just one keplerian to be used when making the new model
new_model_list = ["Keplerian"]
# range kep_num_list ensures it will always iterate for the number of keplerians
for i in range(len(kep_num_list)):
if keplerian_number == i:
# create a new model parameter dictionary based on the chosen keplerian
new_model_param = {"P":model_param["P_"+str(i)], "K":model_param["K_"+str(i)], "ecc":model_param["ecc_"+str(i)], "omega":model_param["omega_"+str(i)], "t0":model_param["t0_"+str(i)]}
model_param = new_model_param
# create a smooth model and data model based on just this keplerian using the new model list and parameters for one keplerain
smooth_model_y = get_model(new_model_list, xpred, model_param, to_ecc=False)
new_data_model_y = get_model(new_model_list, time, model_param, to_ecc=False)
# add the data model back onto the rv data as this was subtracted earlier as a part of subtracting the whole model
rv = blank_rv + new_data_model_y
if residuals is False:
# set up one plot for no residuals
fig = plt.figure(figsize = (10,7))
ax = fig.add_subplot(1,1,1)
try:
# try plotting multiple colours for if offsets are present
rv_err = np.array(rv_err)
for g in np.unique(c_array):
ix = np.where(c_array == g)
ax.errorbar(time[ix], rv[ix], yerr = rv_err[ix], fmt = '.', c = g, label = c_dict[g])
except:
# plot data if no offsets are present
ax.errorbar(time, rv, yerr = rv_err, fmt = '.', color = 'tab:blue', label = 'Subtracted Data')
ax.plot(xpred, smooth_model_y, color = 'blue', label = 'Predicted Keplerian Model')
ax.set_xlabel(xlabel)
ax.set_ylabel(ylabel)
if legend is True:
ax.legend(loc='upper left', borderaxespad=0, fontsize = 10)
if residuals is True:
# set up two plots for residuals
fig, axs = plt.subplots(ncols=1, nrows=2, sharex=True, figsize=(10,7), gridspec_kw={'height_ratios': [3,1]})
fig.subplots_adjust(hspace=0)
try:
# try plotting multiple colours for if offsets are present
rv_err = np.array(rv_err)
for g in np.unique(c_array):
ix = np.where(c_array == g)
axs[0].errorbar(time[ix], rv[ix], yerr = rv_err[ix], fmt = '.', c = g, label = c_dict[g])
except:
# plot data if no offsets are present
axs[0].errorbar(time, rv, yerr = rv_err, fmt = '.', color = 'tab:blue', label = 'Subtracted Data')
# plots for data and model on the first plot
axs[0].plot(xpred, smooth_model_y, color = 'blue', label = 'Predicted Keplerian Model')
axs[0].set_ylabel(ylabel)
if legend is True:
axs[0].legend(loc='upper left', borderaxespad=0, fontsize = 10)
# interpolate the smooth model to get the points relating to the time array
f = interp.interp1d(xpred, smooth_model_y, kind='cubic', bounds_error=False)
new_pred_y = f(time)
# subtract the points relating to the time array from the rv data to get residuals
res = (rv-new_pred_y)
try:
# try plotting multiple colours for offsets on the second plot
for g in np.unique(c_array):
ix = np.where(c_array == g)
axs[1].errorbar(time[ix], res[ix], yerr = rv_err[ix], fmt = '.', c = g)
except:
# plotting for no offsets on the second plot
axs[1].errorbar(time, res, yerr = rv_err, fmt = '.', color = 'tab:blue')
axs[1].set_ylabel("Residuals")
axs[1].set_xlabel(xlabel)
if save_folder is not None:
assert savefilename is not None, "You need to give both save_folder and savefilename to save the figure"
plt.savefig(str(save_folder)+"/"+str(savefilename)+".png", bbox_inches='tight')
if savefilename is not None and save_folder is None:
print("Figure not saved, you need to provide savefilename and save_folder both")
if plot is True:
plt.show()
if plot is False:
plt.close()
if residuals is False:
try:
return model_param, rv, c_array, c_dict
except:
return model_param, rv
if residuals is True:
try:
return model_param, rv, c_array, c_dict, res
except:
return model_param, rv, res
# phase folded plot
[docs]def phase_plot(time, rv, hparam, kernel_name, model_list, model_param, rv_err = None, keplerian_number = 0, flags = None, xpred = None, residuals=False, xlabel='Time [BJD]', ylabel='RV [m/s]', legend = True, save_folder=None, savefilename=None):
"""
Function to plot the phase folded rv curve of the chosen keplerian model along with the data representing that model
Parameters
----------
time: array of floats
x-axis array of the observation times
rv: array of floats
y-axis array of the RVs
hparam: dictionary
dictionary of the hyperparameters
kernel_name: string
name of the kernel
model_list: list of strings
list of the models in use
model_param: dictionary
dictionary of model parameters
rv_err: array of floats, optional
array of rv errors, defaults to None
keplerian_number: int, optional
number representing the chosen keplerian to plot, 0 to plot first keplerian, 1 to plot second keplerian etc... defaults to None
flags: array of floats, optional
array of flags representing offsets generated by the combine_data function, defaults to None
xpred: array of floats, optional
array of prediced times to be used for the smooth model, defaults to intervals of 0.1 between the max and min times
residuals: bool, optional
enable the plotting of residuals, defaults to false
xlabel: string, optional
label of x-axis, defaults to "time [BJD]"
ylabel: string, optional
label of y-axis, defaults to "RV [m/s]"
legend: bool, optional
enable the plotting of a legend, defaults to True
save_folder: string, optional
folder to save the plot, defaults to None
savefilename: string, optional
name of the save file, defaults to None
Returns
-------
phase-folded plot of the chosen keplerian along with the subtracted data
"""
# get essential data from keplerian only function depending on whether residuals and offsets are present
if residuals is False:
try:
model_param, rv, c_array, c_dict = keplerian_only_plot(time, rv, hparam, kernel_name, model_list, model_param, rv_err, keplerian_number, flags, xpred, residuals, xlabel, ylabel, legend, save_folder, savefilename, plot = False)
except:
model_param, rv = keplerian_only_plot(time, rv, hparam, kernel_name, model_list, model_param, rv_err, keplerian_number, flags, xpred, residuals, xlabel, ylabel, legend, save_folder, savefilename, plot = False)
if residuals is True:
try:
model_param, rv, c_array, c_dict, res = keplerian_only_plot(time, rv, hparam, kernel_name, model_list, model_param, rv_err, keplerian_number, flags, xpred, residuals, xlabel, ylabel, legend, save_folder, savefilename, plot = False)
except:
model_param, rv, res = keplerian_only_plot(time, rv, hparam, kernel_name, model_list, model_param, rv_err, keplerian_number, flags, xpred, residuals, xlabel, ylabel, legend, save_folder, savefilename, plot = False)
# only one keplerian will be present in the model params due to how the keplerian only function works
model_list = ["Keplerian"]
# obtain period and t0 values to phase fold the time
period = model_param["P"].value
t0 = model_param["t0"].value
# phase fold the times
phase = (time - t0)/period
# Want phase to be between 0 and 1
epoch = np.floor(phase)
true_phase = phase - epoch
# set the center at zero
end = np.where(true_phase >= 0.5)[0]
true_phase[end] -= 1.0
# create an xpred array in order to make a smooth model
phase_xpred = np.arange(-0.5, 0.5, 0.001)
# phase of the keplerian has effectively been set to 1 by the phase folding, set it to 1 to run the model
new_model_param = {"P":0, "K":model_param["K"], "ecc":model_param["ecc"], "omega":model_param["omega"], "t0":0}
new_model_param["P"] = par.parameter(1., 0., True)
# similarly with t0
new_model_param["t0"] = par.parameter(0., 0., True)
# generate the smooth model
smooth_model_y = get_model(model_list, phase_xpred, new_model_param, to_ecc = False)
# put the new phase folded times in order and order the RVs, c_array, and residuals in the same order
if residuals is False:
try:
true_phase, rv, c_array = zip(*sorted(zip(true_phase, rv, c_array)))
except:
true_phase, rv = zip(*sorted(zip(true_phase, rv)))
if residuals is True:
try:
true_phase, rv, c_array, res = zip(*sorted(zip(true_phase, rv, c_array, res)))
except:
true_phase, rv , res = zip(*sorted(zip(true_phase, rv, res)))
# generate empty lists to hold information needed to extend the plot
end_rv_num = []
start_rv_num = []
start_time = []
end_time = []
for N,i in enumerate(true_phase):
if i < -0.4:
# append the indices of all times less than -0.4 to an start list
start_rv_num.append(N)
# append all times less than -0.4 to an start list and add 1 to them so they extend the data
start_time.append(i+1)
if i > 0.4:
# append the indices of all times larger than 0.4 to an end list
end_rv_num.append(N)
# append all times larger than 0.4 to an end list and minus 1 to them so they extend the data
end_time.append(i-1)
else:
pass
# start and end rv is a list of RVs correlating to those start and end time positions
start_rv = rv[:start_rv_num[-1]+1]
end_rv = rv[end_rv_num[0]:]
# similarly for the rv_err
start_yerr = rv_err[:start_rv_num[-1]+1]
end_yerr = rv_err[end_rv_num[0]:]
# create xpreds for extending the model
end_xpred = np.arange(0.5, 0.6, 0.001)
start_xpred = np.arange(-0.6, -0.5, 0.001)
# generate extended models for the start and the end
end_model = get_model(model_list, end_xpred, new_model_param, to_ecc = False)
start_model = get_model(model_list, start_xpred, new_model_param, to_ecc = False)
if residuals is False:
fig = plt.figure(figsize = (10,7))
ax = fig.add_subplot(1,1,1)
try:
# try plotting multiple colours for if offsets are present
rv_err = np.array(rv_err)
true_phase = np.array(true_phase)
rv = np.array(rv)
c_array = np.array(c_array)
for g in np.unique(c_array):
ix = np.where(c_array == g)
ax.errorbar(true_phase[ix], rv[ix], yerr = rv_err[ix], fmt = '.', c = g, label = c_dict[g])
except:
# plot data if no offsets are present
ax.errorbar(true_phase, rv, yerr = rv_err, fmt = '.', color = 'tab:blue', label = 'Subtracted Data')
# plot the model for the data in blue
ax.plot(phase_xpred, smooth_model_y, c = 'blue', label = 'Keplerian Model')
# plot the extended data points and model in grey
ax.errorbar(start_time, start_rv, yerr = start_yerr, fmt = '.', color = 'gray')
ax.errorbar(end_time, end_rv, yerr = end_yerr, fmt = '.', color = 'gray')
ax.plot(start_xpred, start_model, c = 'gray')
ax.plot(end_xpred, end_model, c = 'gray')
ax.set_ylabel(ylabel)
ax.set_xlabel(xlabel)
if legend is True:
ax.legend(loc='upper left', borderaxespad=0, fontsize = 10)
if residuals is True:
# set up two plots for residuals
fig, axs = plt.subplots(ncols=1, nrows=2, sharex=True, figsize=(10,7), gridspec_kw={'height_ratios': [3,1]})
fig.subplots_adjust(hspace=0)
try:
# try plotting multiple colours for if offsets are present
rv_err = np.array(rv_err)
true_phase = np.array(true_phase)
rv = np.array(rv)
c_array = np.array(c_array)
for g in np.unique(c_array):
ix = np.where(c_array == g)
axs[0].errorbar(true_phase[ix], rv[ix], yerr = rv_err[ix], fmt = '.', c = g, label = c_dict[g])
except:
# plot data if no offsets are present
axs[0].errorbar(true_phase, rv, yerr = rv_err, fmt = '.', color = 'tab:blue', label = 'Subtracted Data')
# plot the model for the data in blue
axs[0].plot(phase_xpred, smooth_model_y, c = 'blue', label = 'Keplerian Model')
# plot the extended data points and model in grey
axs[0].errorbar(start_time, start_rv, yerr = start_yerr, fmt = '.', color = 'gray')
axs[0].errorbar(end_time, end_rv, yerr = end_yerr, fmt = '.', color = 'gray')
axs[0].plot(start_xpred, start_model, c = 'gray')
axs[0].plot(end_xpred, end_model, c = 'gray')
axs[0].set_ylabel(ylabel)
if legend is True:
axs[0].legend(loc='upper left', borderaxespad=0, fontsize = 10)
try:
# try plotting multiple colours for offsets on the second plot
res = np.array(res)
for g in np.unique(c_array):
ix = np.where(c_array == g)
axs[1].errorbar(true_phase[ix], res[ix], yerr = rv_err[ix], fmt = '.', c = g)
except:
# plotting for no offsets on the second plot
axs[1].errorbar(true_phase, res, yerr = rv_err, fmt = '.', color = 'tab:blue')
start_res = res[:start_rv_num[-1]+1]
end_res = res[end_rv_num[0]:]
# plotting extended residuals
axs[1].scatter(start_time, start_res, c = 'gray', s = 10)
axs[1].scatter(end_time, end_res, c = 'gray', s = 10)
axs[1].set_ylabel("Residuals")
axs[1].set_xlabel(xlabel)
if save_folder is not None:
assert savefilename is not None, "You need to give both save_folder and savefilename to save the figure"
plt.savefig(str(save_folder)+"/"+str(savefilename)+".png", bbox_inches='tight')
if savefilename is not None and save_folder is None:
print("Figure not saved, you need to provide savefilename and save_folder both")
plt.show()