Source code for allel.stats.hw

# -*- coding: utf-8 -*-
from __future__ import absolute_import, print_function, division


import numpy as np


from allel.model.ndarray import GenotypeArray
from allel.util import ignore_invalid, asarray_ndim


[docs]def heterozygosity_observed(g, fill=np.nan): """Calculate the rate of observed heterozygosity for each variant. Parameters ---------- g : array_like, int, shape (n_variants, n_samples, ploidy) Genotype array. fill : float, optional Use this value for variants where all calls are missing. Returns ------- ho : ndarray, float, shape (n_variants,) Observed heterozygosity Examples -------- >>> import allel >>> g = allel.model.GenotypeArray([[[0, 0], [0, 0], [0, 0]], ... [[0, 0], [0, 1], [1, 1]], ... [[0, 0], [1, 1], [2, 2]], ... [[1, 1], [1, 2], [-1, -1]]]) >>> allel.stats.heterozygosity_observed(g) array([ 0. , 0.33333333, 0. , 0.5 ]) """ # check inputs if not hasattr(g, 'count_het') or not hasattr(g, 'count_called'): g = GenotypeArray(g, copy=False) # count hets n_het = np.asarray(g.count_het(axis=1)) n_called = np.asarray(g.count_called(axis=1)) # calculate rate of observed heterozygosity, accounting for variants # where all calls are missing with ignore_invalid(): ho = np.where(n_called > 0, n_het / n_called, fill) return ho
[docs]def heterozygosity_expected(af, ploidy, fill=np.nan): """Calculate the expected rate of heterozygosity for each variant under Hardy-Weinberg equilibrium. Parameters ---------- af : array_like, float, shape (n_variants, n_alleles) Allele frequencies array. fill : float, optional Use this value for variants where allele frequencies do not sum to 1. Returns ------- he : ndarray, float, shape (n_variants,) Expected heterozygosity Examples -------- >>> import allel >>> g = allel.model.GenotypeArray([[[0, 0], [0, 0], [0, 0]], ... [[0, 0], [0, 1], [1, 1]], ... [[0, 0], [1, 1], [2, 2]], ... [[1, 1], [1, 2], [-1, -1]]]) >>> af = g.count_alleles().to_frequencies() >>> allel.stats.heterozygosity_expected(af, ploidy=2) array([ 0. , 0.5 , 0.66666667, 0.375 ]) """ # check inputs af = asarray_ndim(af, 2) # calculate expected heterozygosity out = 1 - np.sum(np.power(af, ploidy), axis=1) # fill values where allele frequencies could not be calculated af_sum = np.sum(af, axis=1) with ignore_invalid(): out[(af_sum < 1) | np.isnan(af_sum)] = fill return out
[docs]def inbreeding_coefficient(g, fill=np.nan): """Calculate the inbreeding coefficient for each variant. Parameters ---------- g : array_like, int, shape (n_variants, n_samples, ploidy) Genotype array. fill : float, optional Use this value for variants where the expected heterozygosity is zero. Returns ------- f : ndarray, float, shape (n_variants,) Inbreeding coefficient. Notes ----- The inbreeding coefficient is calculated as *1 - (Ho/He)* where *Ho* is the observed heterozygosity and *He* is the expected heterozygosity. Examples -------- >>> import allel >>> g = allel.model.GenotypeArray([[[0, 0], [0, 0], [0, 0]], ... [[0, 0], [0, 1], [1, 1]], ... [[0, 0], [1, 1], [2, 2]], ... [[1, 1], [1, 2], [-1, -1]]]) >>> allel.stats.inbreeding_coefficient(g) array([ nan, 0.33333333, 1. , -0.33333333]) """ # check inputs if not hasattr(g, 'count_het') or not hasattr(g, 'count_called'): g = GenotypeArray(g, copy=False) # calculate observed and expected heterozygosity ho = heterozygosity_observed(g) af = g.count_alleles().to_frequencies() he = heterozygosity_expected(af, ploidy=g.shape[-1], fill=0) # calculate inbreeding coefficient, accounting for variants with no # expected heterozygosity with ignore_invalid(): f = np.where(he > 0, 1 - (ho / he), fill) return f