Source code for pacman.lib.mcmc

import pickle
from multiprocessing import Pool

import emcee
import numpy as np

from . import plots
from . import util
from . import read_fit_par


[docs]def mcmc_fit(data, model, params, file_name, meta, fit_par):
    """
    Calls the emcee package and does the sampling.
    """
    nvisit = int(meta.nvisit)

    # Create the mcmc_res directory
    util.create_res_dir(meta)

    # Setting up parameters for sampler
    theta = util.format_params_for_sampling(params, meta, fit_par)
    ndim, nwalkers = len(theta), meta.run_nwalkers

    fixed_array = np.array(fit_par['fixed'])
    tied_array = np.array(fit_par['tied'])
    free_array = util.return_free_array(nvisit, fixed_array, tied_array)
    untied_array = util.return_untied_array(nvisit, fixed_array, tied_array)
    l_args = [params, data, model, nvisit, fixed_array, tied_array, free_array, untied_array]

    # Setting up multiprocessing
    if hasattr(meta, 'ncpu') and meta.ncpu > 1:
        print('Using multiprocessing...')
        pool = Pool(meta.ncpu)
    else:
        meta.ncpu = 1
        pool = None

    print('Run emcee...')
    sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args = l_args, pool=pool)
    pos = read_fit_par.get_initial_walkers(data, theta, meta, fit_par)

    sampler.run_mcmc(pos, meta.run_nsteps, progress=True)

    # Dump the samples into a file using pickle
    with (meta.workdir / meta.fitdir / 'mcmc_res' /
          f'mcmc_out_bin{meta.s30_file_counter}_wvl{meta.wavelength:0.3f}.p')\
            .open("wb") as pickle_file:
        pickle.dump([data, params, sampler.get_chain()], pickle_file)
    nburn = meta.run_nburn

    labels = meta.labels

    # Thin out the Samples if a huge amount was used.
    # This was introduced as the run would break when saving the plot due to its big size
    if meta.run_nsteps * meta.run_nwalkers > 1000000:
        thin_corner = int((meta.run_nsteps - meta.run_nburn) * meta.run_nwalkers // 100000)
        print(f'Note: Big Corner plot with many steps. Thinning Plot by factor: {thin_corner}')
    else:
        thin_corner = 1

    samples = sampler.get_chain(discard=nburn, thin=thin_corner, flat=True)
    
    # Closing multiprocessing
    if meta.ncpu > 1:
        pool.close()
        pool.join()

    # Saving plots
    plots.mcmc_pairs(samples, params, meta, fit_par, data)
    plots.mcmc_chains(ndim, sampler, 0, labels, meta)
    plots.mcmc_chains(ndim, sampler, nburn, labels, meta)

    # Determine median and 16th and 84th percentiles
    p16_list = []
    p50_list = []
    p84_list = []
    errors_lower = []
    errors_upper = []
    for i in range(len(theta)):
        q = util.quantile(samples[:, i], [0.16, 0.5, 0.84])
        p16, p50, p84 = q
        p16_list.append(p16)
        p50_list.append(p50)
        p84_list.append(p84)
        errors_lower.append(p50 - p16)
        errors_upper.append(p84 - p50)
    medians = p50_list

    # Saving sampling results into txt files
    with (meta.workdir / meta.fitdir / 'mcmc_res' /
        f"mcmc_res_bin{meta.s30_file_counter}_wvl{meta.wavelength:0.3f}.txt").open('w', encoding='utf-8') as f_mcmc:
        print(f"{'parameter':<20} {'p50':>14} {'p16':>14} {'p84':>14} {'minus':>14} {'plus':>14}", file=f_mcmc)
        for label, p50, p16, p84, minus, plus in zip(
            labels, p50_list, p16_list, p84_list, errors_lower, errors_upper
        ):
            print(
                f"{label:<20} {p50:14.7g} {p16:14.7g} {p84:14.7g} {minus:14.7g} {plus:14.7g}",
                file=f_mcmc
            )

    updated_params = util.format_params_for_Model(medians, params, nvisit, fixed_array, tied_array, free_array, untied_array)
    if "uncmulti" in data.s30_myfuncs:
        util.apply_uncmulti(data, updated_params)
    fit = model.fit(data, updated_params)
    util.append_fit_output(fit, meta, fitter='mcmc', medians=medians)

    # Saving plots
    plots.plot_fit_lc2(data, fit, meta, mcmc=True)
    plots.rmsplot(model, data, meta, fitter='mcmc')
    plots.save_astrolc_data(data, model, meta, fitter='mcmc')

    if meta.s30_fit_white:
        with (meta.workdir / meta.fitdir / 'white_systematics_mcmc.txt')\
                .open("w", encoding='utf-8') as outfile:
            for i in range(len(fit.all_sys)):
                print(fit.all_sys[i], file=outfile)
            print('Saved white_systematics.txt file for mcmc run')

    #samples_auto1 = sampler.get_chain(discard=nburn, flat=True)
    #tau = emcee.autocorr.integrated_time(samples_auto1, quiet=True)
    #print(f"Autocorrelation time: {tau}")

    #tau = sampler.get_autocorr_time(discard=nburn)
    #print(f"Autocorrelation time: {tau}")

    # Print the Autocorrelation Time
    tau = sampler.get_autocorr_time(quiet=True)
    print("Autocorrelation time: ", tau)

    return medians, errors_lower, errors_upper, fit


[docs]def lnprior(theta, data):
    """Calculate the log-prior."""
    lnprior_prob = 0.0

    for i, prior in enumerate(data.prior):
        prior_type = str(prior[0]).upper()

        if prior_type == "U":
            lo, hi = prior[1], prior[2]
            if theta[i] < lo or theta[i] > hi:
                return -np.inf

        elif prior_type == "N":
            mean, sigma = prior[1], prior[2]
            if sigma <= 0:
                return -np.inf

            lnprior_prob -= 0.5 * (
                ((theta[i] - mean) / sigma) ** 2
                + np.log(2.0 * np.pi * sigma**2)
            )

        elif prior_type == "X":
            # Should have been caught before sampling.
            # Keep this explicit so X is not silently ignored.
            return -np.inf

        else:
            raise ValueError(f"Unknown prior type '{prior_type}'.")

    return lnprior_prob


[docs]def lnprob(theta, params, data, model, nvisit, fixed_array, tied_array, free_array, untied_array):
    """Calculates the log-likelihood."""
    updated_params = util.format_params_for_Model(theta, params, nvisit, fixed_array, tied_array, free_array, untied_array)
    if "uncmulti" in data.s30_myfuncs:
        util.apply_uncmulti(data, updated_params)
    lp = lnprior(theta, data)
    if lp == -np.inf:  # if the likelihood from the priors is already -inf, dont evaluate the function
        return lp
    fit = model.fit(data, updated_params)
    return fit.ln_like + lp