Source code for allel.stats.preprocessing

# -*- coding: utf-8 -*-
from __future__ import absolute_import, print_function, division


import numpy as np


from allel.util import asarray_ndim


[docs]def get_scaler(scaler, copy, ploidy): if scaler == 'patterson': scaler = PattersonScaler(copy=copy, ploidy=ploidy) elif scaler == 'standard': scaler = StandardScaler(copy=copy) elif hasattr(scaler, 'fit'): pass else: scaler = CenterScaler(copy=copy) return scaler
[docs]class StandardScaler(object): def __init__(self, copy=True): self.copy = copy self.mean_ = None self.std_ = None def fit(self, gn): # check input gn = asarray_ndim(gn, 2) # find mean self.mean_ = np.mean(gn, axis=1, keepdims=True) # find scaling factor self.std_ = np.std(gn, axis=1, keepdims=True) return self def transform(self, gn, copy=None): # check inputs copy = copy if copy is not None else self.copy gn = asarray_ndim(gn, 2, copy=copy) if not gn.dtype.kind == 'f': gn = gn.astype('f2') # center gn -= self.mean_ # scale gn /= self.std_ return gn def fit_transform(self, gn, copy=None): self.fit(gn) return self.transform(gn, copy=copy)
[docs]class CenterScaler(object): def __init__(self, copy=True): self.copy = copy self.mean_ = None self.std_ = None def fit(self, gn): # check input gn = asarray_ndim(gn, 2) # find mean self.mean_ = np.mean(gn, axis=1, keepdims=True) return self def transform(self, gn, copy=None): # check inputs copy = copy if copy is not None else self.copy gn = asarray_ndim(gn, 2, copy=copy) if not gn.dtype.kind == 'f': gn = gn.astype('f2') # center gn -= self.mean_ return gn def fit_transform(self, gn, copy=None): self.fit(gn) return self.transform(gn, copy=copy)
[docs]class PattersonScaler(object): def __init__(self, copy=True, ploidy=2): self.copy = copy self.ploidy = ploidy self.mean_ = None self.std_ = None def fit(self, gn): # check input gn = asarray_ndim(gn, 2) # find mean self.mean_ = np.mean(gn, axis=1, keepdims=True) # find scaling factor p = self.mean_ / self.ploidy self.std_ = np.sqrt(p * (1 - p)) return self def transform(self, gn, copy=None): # check inputs copy = copy if copy is not None else self.copy gn = asarray_ndim(gn, 2, copy=copy) if not gn.dtype.kind == 'f': gn = gn.astype('f2') # center gn -= self.mean_ # scale gn /= self.std_ return gn def fit_transform(self, gn, copy=None): self.fit(gn) return self.transform(gn, copy=copy)