Source code for src.magpy_rv.saving

"""
Function to allow the user to save any or all outputs from the mcmc
"""

# Contains:
#     saving function
#
#    
# Author: Federica Rescigno, Bryce Dixon
# Version: 22.08.2023    

import os
import pickle as pk

import numpy as np
import pandas as pd

import magpy_rv.parameters as par
import magpy_rv.models as mod
import magpy_rv.gp_likelihood as gp
from magpy_rv.mcmc_aux import get_model
from magpy_rv.plotting import offset_subtract
import magpy_rv.auxiliary as aux


# saving function


[docs]def save(folder_name, rv, time, rv_err, model_list = None, init_hparam = None, kernel = None, init_param = None, prior_list = [], fin_hparam_post = None, fin_param_post = None, logl_chain = None, masses = None, fin_param_values = None, fin_param_erru = None, fin_param_errd = None, flags = None, Mstar = None, fin_to_skck = False, burnin = 0):
    """
    Saves offset subtracted and combined RVs and times, rv_error, kernel name, model list, initial hyperparameters and parameters, initial LogL, priors, final hyperparameter and parameter posteriors as txt and pickle files, final LogL posterior as txt and pickle files, mass posteriors as txt and pickle, final hyperparameter, parameter and mass values along with errors in a readable form and as a latex table, final logL value
    
    Parameters
    ----------
    folder_name: string
        file destination of the folder to save to
    rv: array of floats
        array of the rv values
    time: array of floats
        array of the time values
    rv_err: array of floats
        array of the rv error values
    model_list: list of strings, optional
        list of the names of the model in use, defaults to None
    init_hparam: dictionary, optional
        dictionary of the initial hyperparameters, defaults to None
    kernel: string, optional
        name of the chosen kernel, defaults to None
    init_param: dictionary, optional
        dictionary of the initial model parameters, defaults to None
    prior_list: dictionary, optional
        dictionary of the priors, defaults to empty list
    fin_hparam_post: array of floats, optional
        3d array of the hypermarater posteriors, defaults to None
    fin_param_post: array of floats, optional
        3d array of the model parameter posteriors, defaults to None
    logl_chain: array of floats, optional
        3d array of the logL posteriors, defaults to None
    masses: array of floats, optional
        3d array of the mass posteriors, defaults to None
    fin_param_values: list of floats, optional
        list of the final hyperparameter, parameter, and mass values from the posteriors, defaults to None
    fin_param_erru: list of floats, optional
        list of the final hyperparameter, parameter, and mass upper errors from the posteriors, defaults to None
    fin_param_errd: list of floats, optional
        list of the final hyperparameter, parameter, and mass lower errors form the posteriors, defaults to None
    flags: array of floats, optional
        array of floats representing the offsets, defaults to None
    Mstar: float, optional
        mass of the host star in solar masses
    fin_to_skck: bool, optional
        if True, returns final keplerian parameters with Sk and Ck, if False returns final keplerian parameters with ecc and omega, defaults to False
    burnin: integer, optional
        integer value to specify the length of the burn in, defaults to 0 for no burn in
    """
    
    # create new folder titled current run, at the moment the code will not run if the folder already exists
    if os.path.exists(folder_name):
        pass
    else:
        os.mkdir(folder_name)
    
    # create files for rv, time, and rv error
    rv_file = os.path.join(folder_name, "rv_data.txt")
    time_file = os.path.join(folder_name, "time_data.txt")
    rv_err_file = os.path.join(folder_name, "rv_err.txt")
    
    if flags is not None:
        
        # if offsets exist append them to a list
        offsets = []
        try:
            off = init_param['offset'].value
            offsets.append(off)
        except:
            for i in range(len(model_list)):
                try:
                    off = init_param['offset_'+str(i)].value
                    offsets.append(off)
                except:
                    continue
        
        # subtract the offsets and save the offset rv to a file        
        off_rv = offset_subtract(rv, flags, offsets)
        rv_off_file = os.path.join(folder_name, "rv_data_offset_sub.txt")
        np.savetxt(rv_off_file, off_rv)
    
    # save the times, rvs, and rv error to their files
    np.savetxt(time_file, time)
    np.savetxt(rv_file, rv)
    np.savetxt(rv_err_file, rv_err)
    
    # if a kernel has been entered, save all the hyperparameter information to an initial conditions file
    if kernel is not None:
        initial_file = os.path.join(folder_name, "initial_conditions.txt")
        initial_cond_file = open(initial_file, "w+")
        initial_cond_file.write("\nKernel Name:\n")
        initial_cond_file.write(kernel)
        initial_cond_file.write("\nInitial Hyperparameters:\n")
        initial_cond_file.write(init_hparam.__str__())
    # if a model has been entered, add the model parameters to this file
    if model_list is not None:
        initial_cond_file.write("\nModel List:\n")
        initial_cond_file.write(model_list.__str__())
        initial_cond_file.write("\nInitial Parameters:\n")
        initial_cond_file.write(init_param.__str__())
    # if Mstar is given, save that aswell
    if Mstar is not None:
        initial_cond_file.write("\nHost Star Mass:\n")
        initial_cond_file.write(Mstar.__str__())
        initial_cond_file.write(" Solar Masses")
        
        # generate the model to get the log likelihood
        model_y = get_model(model_list, time, init_param, to_ecc = False, flags = flags)    
        loglik = gp.GPLikelihood(time, rv, rv_err, init_hparam, kernel, model_y, init_param)
        logL = loglik.LogL(prior_list)
        
        # add the initial log likelihood to the file
        initial_cond_file.write("\nInitial LogL:\n")
        initial_cond_file.write(logL.__str__())
    initial_cond_file.close()
    
    # create a prior file and save the prior list to the file
    prior_file = os.path.join(folder_name, "priors.txt")
    prior_file = open(prior_file, "w+")
    prior_file.write("\nPriors:\n")
    prior_file.write(prior_list.__str__())
    prior_file.close()

    assert type(burnin) == int, "burnin should be an integer"
    # if a hyperparameter posterior has been entered, save these to files based on hyperparameter as arrays with ncolumns = chains, and nrows = iterations
    if fin_hparam_post is not None:
        hparams = aux.hparam_names(kernel, plotting = False)
        # first save hparam posteriors as a pickle file then save each posterior seperately as a txt file
        hparam_pkl = os.path.join(folder_name, "hparam_posteriors.pkl")
        hparam_pkl = open(hparam_pkl, "wb")
        pk.dump(fin_hparam_post[burnin:], hparam_pkl)
        hparam_pkl.close()
        for N,i in enumerate(hparams):
            hparam_post = os.path.join(folder_name, "{}_posteriors.txt".format(i))
            np.savetxt(hparam_post, fin_hparam_post[burnin:,:,N])
    
    # do the same with model parameters    
    if fin_param_post is not None:
        params = aux.model_param_names(model_list, SkCk = True, plotting = False)
        param_pkl = os.path.join(folder_name, "param_posteriors.pkl")
        param_pkl = open(param_pkl, "wb")
        pk.dump(fin_param_post[burnin:], param_pkl)
        param_pkl.close()
        for N,i in enumerate(params):
            param_post = os.path.join(folder_name, "{}_posteriors.txt".format(i))
            np.savetxt(param_post, fin_param_post[burnin:,:,N])
    
    # do the same with the logL chain
    if logl_chain is not None:
        logl_post = os.path.join(folder_name, "logL_posteriors.txt")
        np.savetxt(logl_post, logl_chain[burnin:,:,0])
        logl_pkl = os.path.join(folder_name, "logl_posteriors.pkl")
        logl_pkl = open(logl_pkl, "wb")
        pk.dump(logl_chain[burnin:], logl_pkl)
        logl_pkl.close()
    
    # do the same with the masses
    if masses is not None:
        mass_list = []
        if len(masses[0,0,:]) == 1 and len(model_list) == 1:
            name = "mass"
            mass_list.append(name)
        else:
            for i in range(len(masses[0,0,:])):
                name = "mass_{}".format(i)
                mass_list.append(name)
        
        mass_pkl = os.path.join(folder_name, "mass_posteriors.pkl")
        mass_pkl = open(mass_pkl, "wb")
        pk.dump(masses[burnin:], mass_pkl)
        mass_pkl.close()
        for N,i in enumerate(mass_list):
            mass_post = os.path.join(folder_name, "{}_posteriors.txt".format(i))
            np.savetxt(mass_post, masses[burnin:,:,N])
    
    # if final parameter values have been entered, start by getting a list of all the existing parameters, should match the length of the final parameter values list        
    if fin_param_values is not None:
        try:
            hparams = aux.hparam_names(kernel, plotting = False)
            # SkCk will acocunt for whether the user wants ecc and omega returned or Sk and Ck returned
            params = aux.model_param_names(model_list, SkCk = fin_to_skck, plotting = False)
            mass_list = []
            if len(fin_param_values) > (len(hparams)+len(params)):
                try:
                    init_param['P'].value
                    mass_list.append('mass')
                except:
                    for i in range(len(model_list)):
                        try:
                            init_param['P_'+str(i)].value
                            mass_list.append('mass_'+str(i))
                        except:
                            continue
            # mass list will just be empty if there are no masses
            param_list = hparams + params + mass_list
        except:
            # if there is no model the list will just be hyperparameters
            param_list = hparams

        # create the final parameter file
        fin_param_file = os.path.join(folder_name, "final_parameter_values.txt")
        fin_param_file = open(fin_param_file, "w+")
        fin_param_table = os.path.join(folder_name, "final_parameter_table.txt")
        fin_param_table = open(fin_param_table, "w+")
        value_list = []
        erru_list = []
        errd_list = []
        
        # if the user wants to return Sk and Ck, read the values as they are form the fin_param_values list, naming the files after the parameter in param_list
        if fin_to_skck is True:
            for N,i in enumerate(param_list):
                value_list.append(fin_param_values[N])
                fin_param_file.write("\n{}:\n".format(i))
                fin_param_file.write(fin_param_values[N].__str__())
                try:
                    # if errors are given try to include the errors
                    fin_param_file.write("\n+")
                    # noting the errors given in fin_param_err and fin_param_errd do not have the value subtracted from them so this must be done
                    erru = fin_param_erru[N] - fin_param_values[N]
                    erru_list.append(erru)
                    fin_param_file.write(erru.__str__())
                except:
                    continue
                try:
                    fin_param_file.write("\n")
                    errd = fin_param_errd[N] - fin_param_values[N]
                    errd_list.append(errd)
                    fin_param_file.write(errd.__str__())
                except:
                    continue
            
        if fin_to_skck is False:
            # if the user wants to return ecc and omega, Sk and Ck in fin_param_values will need converting
            for N,i in enumerate(param_list):
                if i.startswith('ecc'):
                    # if we are on ecc in the list, this corresponds to ck in fin_param_values, pull that out along with the next value, sk
                    sk = fin_param_values[N]
                    ck = fin_param_values[N+1]
                    # run them through to_ecc to get ecc and omega
                    ecc, omega = aux.to_ecc(sk, ck)
                    # add ecc to the file
                    value_list.append(ecc)
                    fin_param_file.write("\n{}:\n".format(i))
                    fin_param_file.write(ecc.__str__())
                elif i.startswith('omega'):
                    # omega is checked next so can just be added to the file
                    value_list.append(omega)
                    fin_param_file.write("\n{}:\n".format(i))
                    fin_param_file.write(omega.__str__())
                else:
                    # the rest added as normal
                    value_list.append(fin_param_values[N])
                    fin_param_file.write("\n{}:\n".format(i))
                    fin_param_file.write(fin_param_values[N].__str__())
                try:
                    if i.startswith('ecc'):
                        # perform similar steps for errors
                        sk_erru = fin_param_erru[N]
                        ck_erru = fin_param_erru[N+1]
                        ecc_erru, omega_erru = aux.to_ecc(sk_erru, ck_erru)
                        fin_param_file.write("\n+")
                        ecc_erru = ecc_erru - ecc
                        erru_list.append(ecc_erru)
                        fin_param_file.write(ecc_erru.__str__())
                    elif i.startswith('omega'):
                        fin_param_file.write("\n+")
                        omega_erru = omega_erru - omega
                        erru_list.append(omega_erru)
                        fin_param_file.write(omega_erru.__str__())
                    else:
                        fin_param_file.write("\n+")
                        erru = fin_param_erru[N] - fin_param_values[N]
                        erru_list.append(erru)
                        fin_param_file.write(erru.__str__())
                except:
                    continue
                try:
                    if i.startswith('ecc'):
                        sk_errd = fin_param_errd[N]
                        ck_errd = fin_param_errd[N+1]
                        ecc_errd, omega_errd = aux.to_ecc(sk_errd, ck_errd)
                        fin_param_file.write("\n")
                        ecc_errd = ecc_errd - ecc
                        errd_list.append(ecc_errd)
                        fin_param_file.write(ecc_errd.__str__())
                    elif i.startswith('omega'):
                        fin_param_file.write("\n")
                        omega_errd = omega_errd - omega
                        errd_list.append(omega_errd)
                        fin_param_file.write(omega_errd.__str__())
                    else:
                        fin_param_file.write("\n")
                        errd = fin_param_errd[N] - fin_param_values[N]
                        errd_list.append(errd)
                        fin_param_file.write(errd.__str__())
                except:
                    continue
        fin_param_file.close()
        value_list = [ '%.3f' % elem for elem in value_list ]
        erru_list = [ '%.3f' % elem for elem in erru_list ]
        errd_list = [ '%.3f' % elem for elem in errd_list ]
            
        # try to create a final logl value if we have the correct inputs
        try:
            # require a new hparam and param list from the fin_param_values
            hparams_list = aux.hparam_names(kernel, plotting = False)
            params_list = aux.model_param_names(model_list, SkCk = False, plotting = False)
            new_hparam = par.par_create(kernel)
            for N,i in enumerate(hparams_list):
                # new hparam list can be created by taking the first few values from fin_param_values (based on the length of hparams_list)
                value = fin_param_values[N]
                error = fin_param_erru[N] - fin_param_values[N]
                new_hparam[i] = par.parameter(value, error, True)
            new_param = mod.mod_create(model_list)
            for N,i in enumerate(params_list):
                # new param list can be made similarly but requires ck and sk to be converted into ecc and omega, done in a similar way to above
                if i.startswith('ecc'):
                    # read the correct fin_param_values value by adding the length of hparams
                    sk = fin_param_values[N + len(hparams)]
                    ck = fin_param_values[N + len(hparams) +1]
                    sk_err = fin_param_erru[N + len(hparams)]
                    ck_err = fin_param_erru[N + len(hparams) +1]
                    # convert to ecc and omega
                    ecc, omega = aux.to_ecc(sk, ck)
                    ecc_err, omega_err = aux.to_ecc(sk_err, ck_err)
                    ecc_err = ecc_err - ecc
                    new_param[i] = par.parameter(ecc, ecc_err, True)
                elif i.startswith('omega'):
                    omega_err = omega_err - omega
                    new_param[i] = par.parameter(omega, omega_err, True)
                else:
                    # create the rest as normal
                    value = fin_param_values[N + len(hparams)]
                    error = fin_param_erru[N + len(hparams)] - value
                    new_param[i] = par.parameter(value, error, True)

            # create a new model y based on the new parameters in order to get a new logL based on the new parameters
            model_y = get_model(model_list, time, new_param, to_ecc = False, flags = flags)    
            loglik = gp.GPLikelihood(time, rv, rv_err, new_hparam, kernel, model_y, new_param)
            logL = loglik.LogL(prior_list)
            # save the final logL to a final logL file
            fin_logl_file = os.path.join(folder_name, "final_info.txt")
            fin_logl_file = open(fin_logl_file, "w+")
            fin_logl_file.write("\nFinal LogL:\n")
            fin_logl_file.write(logL.__str__())
            try:
            # if a hyperparameter posterior chain has been entered, add the number of chains and iterations to the initial conditions file
                num_chains = len(fin_hparam_post[0,:,0])
                iterations = len(fin_hparam_post[:,0,0])-1
                fin_logl_file.write("\nNumber of Chains:\n")
                fin_logl_file.write(num_chains.__str__())
                fin_logl_file.write("\nCompleted Iterations:\n")
                fin_logl_file.write(iterations.__str__())
            except:
                pass
            fin_logl_file.close()
        except:
            pass
        
        try:
            hparams = aux.hparam_names(kernel, plotting = True)
            # SkCk will acocunt for whether the user wants ecc and omega returned or Sk and Ck returned
            params = aux.model_param_names(model_list, SkCk = fin_to_skck, plotting = True)
            mass_list = []
            if len(fin_param_values) > (len(hparams)+len(params)):
                try:
                    init_param['P'].value
                    mass_list.append('mass')
                except:
                    for i in range(len(model_list)):
                        try:
                            init_param['P_'+str(i)].value
                            mass_list.append('mass_'+str(i))
                        except:
                            continue
            # mass list will just be empty if there are no masses
            param_list = hparams + params + mass_list
        except:
            # if there is no model the list will just be hyperparameters
            param_list = hparams
            
        try:
            # build the latex tabel using the plot names
            logL = "%.3f" % logL
            param_list = param_list + ['Final LogL', 'Num Iterations', 'Num Chains']
            value_list = value_list + [logL, iterations, num_chains]
            erru_list = erru_list + [0,0,0]
            errd_list = errd_list + [0,0,0]
            param_tab = np.array([errd_list, erru_list, value_list, param_list])
            param_tab = np.rot90(param_tab, 1, axes = (1,0))
            param_tab = pd.DataFrame(param_tab)
            param_tab = param_tab.to_latex(index = False, header = ['Parameter', 'Value', 'Error Up', 'Error Down'])
            fin_param_table.write(param_tab.__str__())
            fin_param_table.close()
        except:
            try:
                param_tab = np.array([errd_list, erru_list, value_list, param_list])
                param_tab = np.rot90(param_tab, 1, axes = (1,0))
                param_tab = pd.DataFrame(param_tab)
                param_tab = param_tab.to_latex(index = False, header = ['Parameter', 'Value', 'Error Up', 'Error Down'])
                fin_param_table.write(param_tab.__str__())
                fin_param_table.close()
            except:
                pass