Source code for allel.stats.preprocessing

# -*- coding: utf-8 -*-
from __future__ import absolute_import, print_function, division


import numpy as np


from allel.compat import text_type
from allel.util import asarray_ndim


[docs]def get_scaler(scaler, copy, ploidy): # normalise strings to lower case if isinstance(scaler, text_type): scaler = scaler.lower() if scaler == 'patterson': return PattersonScaler(copy=copy, ploidy=ploidy) elif scaler == 'standard': return StandardScaler(copy=copy) elif hasattr(scaler, 'fit'): return scaler elif scaler in ['center', 'centre'] or scaler is None: return CenterScaler(copy=copy) else: raise ValueError('unrecognised scaler: %s' % scaler)
[docs]class StandardScaler(object): def __init__(self, copy=True): self.copy = copy self.mean_ = None self.std_ = None def fit(self, gn): # check input gn = asarray_ndim(gn, 2) # find mean self.mean_ = np.mean(gn, axis=1, keepdims=True) # find scaling factor self.std_ = np.std(gn, axis=1, keepdims=True) return self def transform(self, gn, copy=None): # check inputs copy = copy if copy is not None else self.copy gn = asarray_ndim(gn, 2, copy=copy) if not gn.dtype.kind == 'f': gn = gn.astype('f2') # center gn -= self.mean_ # scale gn /= self.std_ return gn def fit_transform(self, gn, copy=None): self.fit(gn) return self.transform(gn, copy=copy)
[docs]class CenterScaler(object): def __init__(self, copy=True): self.copy = copy self.mean_ = None self.std_ = None def fit(self, gn): # check input gn = asarray_ndim(gn, 2) # find mean self.mean_ = np.mean(gn, axis=1, keepdims=True) return self def transform(self, gn, copy=None): # check inputs copy = copy if copy is not None else self.copy gn = asarray_ndim(gn, 2, copy=copy) if not gn.dtype.kind == 'f': gn = gn.astype('f2') # center gn -= self.mean_ return gn def fit_transform(self, gn, copy=None): self.fit(gn) return self.transform(gn, copy=copy)
[docs]class PattersonScaler(object): def __init__(self, copy=True, ploidy=2): self.copy = copy self.ploidy = ploidy self.mean_ = None self.std_ = None def fit(self, gn): # check input gn = asarray_ndim(gn, 2) # find mean self.mean_ = np.mean(gn, axis=1, keepdims=True) # find scaling factor p = self.mean_ / self.ploidy self.std_ = np.sqrt(p * (1 - p)) return self def transform(self, gn, copy=None): # check inputs copy = copy if copy is not None else self.copy gn = asarray_ndim(gn, 2, copy=copy) if not gn.dtype.kind == 'f': gn = gn.astype('f2') # center gn -= self.mean_ # scale gn /= self.std_ return gn def fit_transform(self, gn, copy=None): self.fit(gn) return self.transform(gn, copy=copy)