Source code for craft.abf

import numpy as np
import pandas as pd

from scipy.stats import norm

[docs]def calc_abf(pval, maf, n, n_controls, n_cases): """Calculate Approximate Bayes Factor. (Wakefield, 2009, Genet Epidemiol.) Based on Chris Wallace work Args: pval (float): GWAS p-value maf (float): Minor allele freq n (int): Sample size n_controls: Number of controls n_cases: Number of cases Returns: ABF **Usage** For a binary trait as no exact Bayes factor is calculable. For case-control studies only. **Limitations** Assumes causal variant is included in SNP population (i.e. requires relatively densely genotyped SNPs.) Assumes single causal variant only. """ # Assert/set types pval = float(pval) maf = float(maf) n = int(n) n_controls = int(n_controls) n_cases = int(n_cases) # Variant of ABF calculation that uses p-values # Calculate Z-score z = np.absolute(norm.ppf(pval / 2)) # Multiplicative model - assumes if 2 copies, 'twice as much' risk if have one copy. x0 = 0 x1 = 1 x2 = 2 # No idea what this does d1 = (1-maf)**2 * x0 + 2*maf*(1-maf)*x1 + maf**2 * x2 d2 = (1-maf)**2 * x0**2 + 2*maf*(1-maf)*x1 + maf**2 * x2**2 V = (n_controls + n_cases) / (n_controls * n_cases * (d2-d1**2)) # Compute W (no idea what this is either) # assumption: relative risk = 1.5 # null hypothesis is that relative risk = 1 # ppf = how many standard deviations is 99% W = (np.log(1.5) / norm.ppf(0.99))**2 VW = V + W # simplification for ABF equation # Wakefield's approximate Bayes factor calculation (2009) ABF = np.sqrt(VW/V) * np.exp(- z**2 * W / (2 * VW)) # Kass & Raftery (1995): 2 ln ABF allows comparison / rough interpretation of ABF meaning. # This version is not taken because it is not compatible #ln_2_ABF = -2 * np.log(ABF) return ABF
[docs]def calc_postprob(data): """ Calculate posterior probability for each SNP. Method taken from Maller et al. 2012 'Bayesian refinement of association signals for 14 loci in 3 common diseases.' (supplementary note). This assumes there is 1 causal SNP in the region, and that the prior probability is that any SNP in the region is equally likely to be that causal SNP. """ sum_ABF = data['ABF'].sum() for index, row in data.iterrows(): data['pp'] = data['ABF'] / sum_ABF return data
def abf(data_dfs, cred_threshold): data_list = [] for data in data_dfs: data['ABF'] = data.apply( lambda row: calc_abf(pval=row['pvalue'], maf=row['maf'], n=row['all_total'], n_controls=row['controls_total'], n_cases=row['cases_total']), axis=1) data = calc_postprob(data) data = data.sort_values('pp', ascending=False) data['cpp'] = data.pp.cumsum() # Trim credible SNPs based on posterior probability threshold if cred_threshold == '95': count = sum(data.cpp < 0.95) if cred_threshold =='99': count = sum(data.cpp < 0.99) data = data.head(count+1) data_list.append(data) return data_list