Source code for cgr_gwas_qc.parsers.eigensoft

from pathlib import Path

import numpy as np
import pandas as pd


[docs]class Eigenvec: """Eigensoft eigenvec file parser. Attributes: filename (Path): Path to the eigenvec file from ``smartpca``. components (pd.DataFrame): A (n x 11) table of principal components .. csv-table:: :header: name, dtype, description **ID** (*index*), object, Sample or Subject ID PC1, object, The first principal componenet (PC1). PC2, object, The second principal componenet. ..., ..., The third to ninth principal components. PC10, object, The tenth principal componenet. values (np.ndarray): A vector of eigenvalues for PC1 to PC10. References: - https://www.hsph.harvard.edu/alkes-price/software/ - https://www.cell.com/ajhg/fulltext/S0002-9297(16)00003-3 """ def __init__(self, filename: Path): self.filename = filename self.components = self.get_components() self.values = self.get_values() def get_components(self) -> pd.DataFrame: return pd.read_csv( self.filename, skiprows=1, delim_whitespace=True, header=None, usecols=range(11), names=["ID", *(f"PC{i}" for i in range(1, 11))], dtype={"ID": "string"}, ).set_index("ID") def get_values(self) -> np.ndarray: return pd.read_csv( self.filename, nrows=1, header=None, delim_whitespace=True, usecols=range(1, 11), names=[f"PC{i}" for i in range(1, 11)], ).values[0]