Source code for craft.read

import glob
import os

import pandas as pd
import numpy as np

[docs]def snptest(file): """ Read snptest data into an internal dataframe. """ cols = ['chromosome','alleleA','alleleB','rsid','position','all_total', 'cases_total','controls_total','all_maf','frequentist_add_pvalue', 'frequentist_add_beta_1', 'frequentist_add_se_1'] df = pd.read_csv(file, sep=' ', comment='#')[cols] df.rename(columns={'all_maf':'maf','frequentist_add_pvalue':'pvalue', 'frequentist_add_beta_1':'beta', 'frequentist_add_se_1':'se','alleleA':'allele1','alleleB':'allele2'}, inplace=True) return df
[docs]def csv(file): """Read csv data into an internal dataframe. """ cols = ['chromosome','allele1','allele2','rsid','position','all_total', 'cases_total','controls_total','maf','pvalue', 'beta', 'se'] df = pd.read_csv(file, sep='\t')[cols] return df
[docs]def maps(source_dir): """ Read genetic map data into a maps object. """ map_file_list = glob.glob(source_dir + '/*chr[0-9]*.txt') maps = {} for file in map_file_list: map_file = pd.read_csv(file, sep='\t') chromosome = map_file['Chromosome'].ix[0].strip('chr') maps[chromosome] = map_file return maps
[docs]def annovar(file, file_exonic, colnames): """ Read ANNOVAR output files into an internal dataframe. Gene annotation with ANNOVAR returns two different output files (variant_function and exonic_variant_function). Where exonic SNPs exist, we merge the additional data of exonic variant function, and genes + transcript ID + protein-level change into the dataframe based on matching rsids. """ df = pd.DataFrame(columns=colnames) if os.path.getsize(file) != 0: df = df.append(pd.read_csv(file, sep='\t', names = colnames)) if os.path.getsize(file_exonic) != 0: df2 = pd.read_csv(file_exonic, sep='\t', names = colnames, usecols=range(1,(len(colnames) + 1))) df2 = df2.filter(items=['var_effect','genes','rsid'], axis=1) df2.rename(columns={'var_effect':'exonic_variant_function', 'genes':'genes_transcriptID'}, inplace=True) df2 = df2.set_index('rsid') df = pd.merge(df, df2, how='left',on='rsid') return df
[docs]def index(file): """ Read CRAFT .index output file into a dataframe.""" index_df = pd.read_csv(file, sep='\t') return index_df
[docs]def abf_cred(file): """Read CRAFT .abf.cred file into a dataframe.""" cred_snps = pd.read_csv(file, sep='\t') return cred_snps
[docs]def finemap_cred(file): """Read FINEMAP .cred file into a dataframe.""" cred_snps = pd.read_csv(file, sep=' ') no_cols = int((len(cred_snps.columns) - 2)/ 2) cred_dfs = [] for i in range(no_cols): cred_df = cred_snps[cred_snps.columns[2*i + 1: 2*i + 3]] cred_df = cred_df.rename(columns={cred_df.columns[0]:"rsid", cred_df.columns[1]:"pp"}) cred_df = cred_df.dropna(axis=0) cred_dfs.append(cred_df) return cred_dfs
[docs]def cred_annotated(file): """Read CRAFT .cred.annotated file into a dataframe.""" cred_df = pd.read_csv(file, sep='\t') return cred_df
[docs]def ld(file): """ Read CRAFT .ld output file into a numpy array.""" ld_array = np.loadtxt(file) return ld_array
[docs]def variant_file(file): """ Read CRAFT variant_file rsids into a list.""" variant_df = pd.read_csv(file, sep=' ') return variant_df
[docs]def snp(file): """Read FINEMAP .snp file into a dataframe.""" snp_df = pd.read_csv(file, sep=' ') snp_df.rename(columns={'prob' : 'pp'}, inplace=True) return snp_df