Source code for allel.chunked.core

# -*- coding: utf-8 -*-
from __future__ import absolute_import, print_function, division
import operator
from collections import namedtuple


import numpy as np


from allel.compat import string_types, integer_types, range
from allel.chunked import util as _util
from allel.abc import ArrayWrapper, DisplayAsTable
from allel.model.ndarray import subset as _numpy_subset, NumpyRecArrayWrapper


[docs]def store(data, arr, start=0, stop=None, offset=0, blen=None):
    """Copy `data` block-wise into `arr`."""

    # setup
    blen = _util.get_blen_array(data, blen)
    if stop is None:
        stop = len(data)
    else:
        stop = min(stop, len(data))
    length = stop - start
    if length < 0:
        raise ValueError('invalid stop/start')

    # copy block-wise
    for i in range(start, stop, blen):
        j = min(i+blen, stop)
        l = j-i
        arr[offset:offset+l] = data[i:j]
        offset += l


[docs]def copy(data, start=0, stop=None, blen=None, storage=None, create='array',
         **kwargs):
    """Copy `data` block-wise into a new array."""

    # setup
    storage = _util.get_storage(storage)
    blen = _util.get_blen_array(data, blen)
    if stop is None:
        stop = len(data)
    else:
        stop = min(stop, len(data))
    length = stop - start
    if length < 0:
        raise ValueError('invalid stop/start')

    # copy block-wise
    out = None
    for i in range(start, stop, blen):
        j = min(i+blen, stop)
        block = data[i:j]
        if out is None:
            out = getattr(storage, create)(block, expectedlen=length, **kwargs)
        else:
            out.append(block)

    return out


[docs]def copy_table(tbl, start=0, stop=None, blen=None, storage=None,
               create='table', **kwargs):
    """Copy `tbl` block-wise into a new table."""

    # setup
    names, columns = _util.check_table_like(tbl)
    storage = _util.get_storage(storage)
    blen = _util.get_blen_table(tbl, blen)
    if stop is None:
        stop = len(columns[0])
    else:
        stop = min(stop, len(columns[0]))
    length = stop - start
    if length < 0:
        raise ValueError('invalid stop/start')

    # copy block-wise
    out = None
    for i in range(start, stop, blen):
        j = min(i+blen, stop)
        res = [c[i:j] for c in columns]
        if out is None:
            out = getattr(storage, create)(res, names=names,
                                           expectedlen=length, **kwargs)
        else:
            out.append(res)

    return out


[docs]def map_blocks(data, f, blen=None, storage=None, create='array', **kwargs):
    """Apply function `f` block-wise over `data`."""

    # setup
    storage = _util.get_storage(storage)
    if isinstance(data, tuple):
        blen = max(_util.get_blen_array(d, blen) for d in data)
    else:
        blen = _util.get_blen_array(data, blen)
    if isinstance(data, tuple):
        _util.check_equal_length(*data)
        length = len(data[0])
    else:
        length = len(data)

    # block-wise iteration
    out = None
    for i in range(0, length, blen):
        j = min(i+blen, length)

        # obtain blocks
        if isinstance(data, tuple):
            blocks = [d[i:j] for d in data]
        else:
            blocks = [data[i:j]]

        # map
        res = f(*blocks)

        # store
        if out is None:
            out = getattr(storage, create)(res, expectedlen=length, **kwargs)
        else:
            out.append(res)

    return out


[docs]def reduce_axis(data, reducer, block_reducer, mapper=None, axis=None,
                blen=None, storage=None, create='array', **kwargs):
    """Apply an operation to `data` that reduces over one or more axes."""

    # setup
    storage = _util.get_storage(storage)
    blen = _util.get_blen_array(data, blen)
    length = len(data)
    # normalise axis arg
    if isinstance(axis, int):
        axis = (axis,)

    # deal with 'out' kwarg if supplied, can arise if a chunked array is
    # passed as an argument to numpy.sum(), see also
    # https://github.com/cggh/scikit-allel/issues/66
    kwarg_out = kwargs.pop('out', None)
    if kwarg_out is not None:
        raise ValueError('keyword argument "out" is not supported')

    if axis is None or 0 in axis:
        # two-step reduction
        out = None
        for i in range(0, length, blen):
            j = min(i+blen, length)
            block = data[i:j]
            if mapper:
                block = mapper(block)
            res = reducer(block, axis=axis)
            if out is None:
                out = res
            else:
                out = block_reducer(out, res)
        if np.isscalar(out):
            return out
        elif len(out.shape) == 0:
            return out[()]
        else:
            return getattr(storage, create)(out, **kwargs)

    else:
        # first dimension is preserved, no need to reduce blocks
        out = None
        for i in range(0, length, blen):
            j = min(i+blen, length)
            block = data[i:j]
            if mapper:
                block = mapper(block)
            r = reducer(block, axis=axis)
            if out is None:
                out = getattr(storage, create)(r, expectedlen=length, **kwargs)
            else:
                out.append(r)
        return out


[docs]def amax(data, axis=None, mapper=None, blen=None, storage=None,
         create='array', **kwargs):
    """Compute the maximum value."""
    return reduce_axis(data, axis=axis, reducer=np.amax,
                       block_reducer=np.maximum, mapper=mapper,
                       blen=blen, storage=storage, create=create, **kwargs)


[docs]def amin(data, axis=None, mapper=None, blen=None, storage=None,
         create='array', **kwargs):
    """Compute the minimum value."""
    return reduce_axis(data, axis=axis, reducer=np.amin,
                       block_reducer=np.minimum, mapper=mapper,
                       blen=blen, storage=storage, create=create, **kwargs)


# noinspection PyShadowingBuiltins
[docs]def asum(data, axis=None, mapper=None, blen=None, storage=None,
         create='array', **kwargs):
    """Compute the sum."""
    return reduce_axis(data, axis=axis, reducer=np.sum,
                       block_reducer=np.add, mapper=mapper,
                       blen=blen, storage=storage, create=create, **kwargs)


[docs]def count_nonzero(data, mapper=None, blen=None, storage=None,
                  create='array', **kwargs):
    """Count the number of non-zero elements."""
    return reduce_axis(data, reducer=np.count_nonzero,
                       block_reducer=np.add, mapper=mapper,
                       blen=blen, storage=storage, create=create, **kwargs)


[docs]def compress(condition, data, axis=0, out=None, blen=None, storage=None, create='array',
             **kwargs):
    """Return selected slices of an array along given axis."""

    # setup
    if out is not None:
        # argument is only there for numpy API compatibility
        raise NotImplementedError('out argument is not supported')
    storage = _util.get_storage(storage)
    blen = _util.get_blen_array(data, blen)
    length = len(data)
    nnz = count_nonzero(condition)

    if axis == 0:
        _util.check_equal_length(data, condition)

        # block iteration
        out = None
        for i in range(0, length, blen):
            j = min(i+blen, length)
            bcond = np.asarray(condition[i:j])
            # don't access any data unless we have to
            if np.any(bcond):
                block = np.asarray(data[i:j])
                res = np.compress(bcond, block, axis=0)
                if out is None:
                    out = getattr(storage, create)(res, expectedlen=nnz, **kwargs)
                else:
                    out.append(res)
        return out

    elif axis == 1:

        # block iteration
        out = None
        condition = np.asanyarray(condition)
        for i in range(0, length, blen):
            j = min(i+blen, length)
            block = np.asarray(data[i:j])
            res = np.compress(condition, block, axis=1)
            if out is None:
                out = getattr(storage, create)(res, expectedlen=length,
                                               **kwargs)
            else:
                out.append(res)

        return out

    else:
        raise NotImplementedError('axis not supported: %s' % axis)


[docs]def take(data, indices, axis=0, out=None, mode='raise', blen=None, storage=None,
         create='array', **kwargs):
    """Take elements from an array along an axis."""

    # setup
    if out is not None:
        # argument is only there for numpy API compatibility
        raise NotImplementedError('out argument is not supported')
    length = len(data)

    if axis == 0:

        # check that indices are strictly increasing
        indices = np.asanyarray(indices)
        if np.any(indices[1:] <= indices[:-1]):
            raise NotImplementedError(
                'indices must be strictly increasing'
            )

        # implement via compress()
        condition = np.zeros((length,), dtype=bool)
        condition[indices] = True
        return compress(condition, data, axis=0, blen=blen, storage=storage,
                        create=create, **kwargs)

    elif axis == 1:

        # setup
        storage = _util.get_storage(storage)
        blen = _util.get_blen_array(data, blen)

        # block iteration
        out = None
        for i in range(0, length, blen):
            j = min(i+blen, length)
            block = data[i:j]
            res = np.take(block, indices, axis=1, mode=mode)
            if out is None:
                out = getattr(storage, create)(res, expectedlen=length,
                                               **kwargs)
            else:
                out.append(res)
        return out

    else:
        raise NotImplementedError('axis not supported: %s' % axis)


[docs]def compress_table(condition, tbl, axis=None, out=None, blen=None, storage=None,
                   create='table', **kwargs):
    """Return selected rows of a table."""

    # setup
    if axis is not None and axis != 0:
        raise NotImplementedError('only axis 0 is supported')
    if out is not None:
        # argument is only there for numpy API compatibility
        raise NotImplementedError('out argument is not supported')
    storage = _util.get_storage(storage)
    names, columns = _util.check_table_like(tbl)
    blen = _util.get_blen_table(tbl, blen)
    _util.check_equal_length(columns[0], condition)
    length = len(columns[0])
    nnz = count_nonzero(condition)

    # block iteration
    out = None
    for i in range(0, length, blen):
        j = min(i+blen, length)
        bcond = condition[i:j]
        # don't access any data unless we have to
        if np.any(bcond):
            bcolumns = [c[i:j] for c in columns]
            res = [np.compress(bcond, c, axis=0) for c in bcolumns]
            if out is None:
                out = getattr(storage, create)(res, names=names,
                                               expectedlen=nnz, **kwargs)
            else:
                out.append(res)
    return out


[docs]def take_table(tbl, indices, axis=None, out=None, mode='raise', blen=None, storage=None,
               create='table', **kwargs):
    """Return selected rows of a table."""

    # setup
    if axis is not None and axis != 0:
        raise NotImplementedError('only axis 0 is supported')
    if out is not None:
        # argument is only there for numpy API compatibility
        raise NotImplementedError('out argument is not supported')
    if mode is not None and mode != 'raise':
        raise NotImplementedError('only mode=raise is supported')
    names, columns = _util.check_table_like(tbl)
    length = len(columns[0])

    # check that indices are strictly increasing
    indices = np.asanyarray(indices)
    if np.any(indices[1:] <= indices[:-1]):
        raise NotImplementedError(
            'indices must be strictly increasing'
        )

    # implement via compress()
    condition = np.zeros((length,), dtype=bool)
    condition[indices] = True
    return compress_table(condition, tbl, blen=blen, storage=storage,
                          create=create, **kwargs)


[docs]def subset(data, sel0=None, sel1=None, blen=None, storage=None, create='array',
           **kwargs):
    """Return selected rows and columns of an array."""

    # TODO refactor sel0 and sel1 normalization with ndarray.subset

    # setup
    storage = _util.get_storage(storage)
    blen = _util.get_blen_array(data, blen)
    length = len(data)
    if sel0 is not None:
        sel0 = np.asanyarray(sel0)
    if sel1 is not None:
        sel1 = np.asanyarray(sel1)

    # ensure boolean array for dim 0
    if sel0 is not None and sel0.dtype.kind != 'b':
        # assume indices, convert to boolean condition
        tmp = np.zeros(length, dtype=bool)
        tmp[sel0] = True
        sel0 = tmp

    # ensure indices for dim 1
    if sel1 is not None and sel1.dtype.kind == 'b':
        # assume boolean condition, convert to indices
        sel1, = np.nonzero(sel1)

    # shortcuts
    if sel0 is None and sel1 is None:
        return copy(data, blen=blen, storage=storage, create=create, **kwargs)
    elif sel1 is None:
        return compress(sel0, data, axis=0, blen=blen, storage=storage,
                        create=create, **kwargs)
    elif sel0 is None:
        return take(data, sel1, axis=1, blen=blen, storage=storage,
                    create=create, **kwargs)

    # build output
    sel0_nnz = count_nonzero(sel0)
    out = None
    for i in range(0, length, blen):
        j = min(i+blen, length)
        bsel0 = sel0[i:j]
        # don't access data unless we have to
        if np.any(bsel0):
            block = data[i:j]
            res = _numpy_subset(block, bsel0, sel1)
            if out is None:
                out = getattr(storage, create)(res, expectedlen=sel0_nnz,
                                               **kwargs)
            else:
                out.append(res)

    return out


[docs]def concatenate_table(tup, blen=None, storage=None, create='table', **kwargs):
    """Stack tables in sequence vertically (row-wise)."""

    # setup
    storage = _util.get_storage(storage)
    if not isinstance(tup, (tuple, list)):
        raise ValueError('expected tuple or list, found %r' % tup)
    if len(tup) < 2:
        raise ValueError('expected two or more tables to stack')

    # build output
    expectedlen = sum(len(t) for t in tup)
    out = None
    tnames = None
    for tdata in tup:
        tblen = _util.get_blen_table(tdata, blen)
        tnames, tcolumns = _util.check_table_like(tdata, names=tnames)
        tlen = len(tcolumns[0])
        for i in range(0, tlen, tblen):
            j = min(i+tblen, tlen)
            bcolumns = [c[i:j] for c in tcolumns]
            if out is None:
                out = getattr(storage, create)(bcolumns, names=tnames,
                                               expectedlen=expectedlen,
                                               **kwargs)
            else:
                out.append(bcolumns)
    return out


[docs]def concatenate(tup, axis=0, blen=None, storage=None, create='array', **kwargs):
    """Concatenate arrays."""

    # setup
    storage = _util.get_storage(storage)
    if not isinstance(tup, (tuple, list)):
        raise ValueError('expected tuple or list, found %r' % tup)
    if len(tup) < 2:
        raise ValueError('expected two or more arrays')

    if axis == 0:

        # build output
        expectedlen = sum(len(a) for a in tup)
        out = None
        for a in tup:
            ablen = _util.get_blen_array(a, blen)
            for i in range(0, len(a), ablen):
                j = min(i+ablen, len(a))
                block = a[i:j]
                if out is None:
                    out = getattr(storage, create)(block, expectedlen=expectedlen, **kwargs)
                else:
                    out.append(block)

    else:

        def f(*blocks):
            return np.concatenate(blocks, axis=axis)

        out = map_blocks(tup, f, blen=blen, storage=storage, create=create, **kwargs)

    return out


[docs]def binary_op(data, op, other, blen=None, storage=None, create='array',
              **kwargs):
    """Compute a binary operation block-wise over `data`."""

    # normalise scalars
    if hasattr(other, 'shape') and len(other.shape) == 0:
        other = other[()]

    if np.isscalar(other):
        def f(block):
            return op(block, other)
        return map_blocks(data, f, blen=blen, storage=storage, create=create, **kwargs)

    elif len(data) == len(other):
        def f(a, b):
            return op(a, b)
        return map_blocks((data, other), f, blen=blen, storage=storage, create=create,
                          **kwargs)

    else:
        raise NotImplementedError('argument type not supported')


# based on bcolz.chunked_eval
def _get_expression_variables(expression, vm):
    cexpr = compile(expression, '<string>', 'eval')
    if vm == 'numexpr':
        # Check that var is not a numexpr function here.  This is useful for
        # detecting unbound variables in expressions.  This is not necessary
        # for the 'python' engine.
        from numexpr.expressions import functions as numexpr_functions
        return [var for var in cexpr.co_names
                if var not in ['None', 'False', 'True'] and
                var not in numexpr_functions]
    else:
        return [var for var in cexpr.co_names
                if var not in ['None', 'False', 'True']]


# based on bcolz.chunked_eval
[docs]def eval_table(tbl, expression, vm='python', blen=None, storage=None,
               create='array', vm_kwargs=None, **kwargs):
    """Evaluate `expression` against columns of a table."""

    # setup
    storage = _util.get_storage(storage)
    names, columns = _util.check_table_like(tbl)
    length = len(columns[0])
    if vm_kwargs is None:
        vm_kwargs = dict()

    # setup vm
    if vm == 'numexpr':
        import numexpr
        evaluate = numexpr.evaluate
    elif vm == 'python':
        # noinspection PyUnusedLocal
        def evaluate(expr, local_dict=None, **kw):
            # takes no keyword arguments
            return eval(expr, dict(), local_dict)
    else:
        raise ValueError('expected vm either "numexpr" or "python"')

    # compile expression and get required columns
    variables = _get_expression_variables(expression, vm)
    required_columns = {v: columns[names.index(v)] for v in variables}

    # determine block size for evaluation
    blen = _util.get_blen_table(required_columns, blen=blen)

    # build output
    out = None
    for i in range(0, length, blen):
        j = min(i+blen, length)
        blocals = {v: c[i:j] for v, c in required_columns.items()}
        res = evaluate(expression, local_dict=blocals, **vm_kwargs)
        if out is None:
            out = getattr(storage, create)(res, expectedlen=length, **kwargs)
        else:
            out.append(res)

    return out


[docs]class ChunkedArrayWrapper(ArrayWrapper):
    """Wrapper class for chunked array-like data.

    Parameters
    ----------
    data : array_like
        Data to be wrapped. May be a bcolz carray, h5py dataset, or
        anything providing a similar interface.

    """

    def __init__(self, data):
        data = _util.ensure_array_like(data)
        super(ChunkedArrayWrapper, self).__init__(data)

    @property
    def caption(self):
        r = '<%s' % type(self).__name__
        r += ' shape=%s' % str(self.shape)
        r += ' dtype=%s' % str(self.dtype)
        if self.chunks is not None:
            r += ' chunks=%s' % str(self.chunks)
        if self.nbytes:
            r += '\n   nbytes=%s' % _util.human_readable_size(self.nbytes)
            if self.cbytes:
                r += ' cbytes=%s' % _util.human_readable_size(self.cbytes)
            if self.cratio:
                r += ' cratio=%.1f' % self.cratio
        if self.compression:
            r += '\n   compression=%s' % self.compression
            if self.compression_opts is not None:
                r += ' compression_opts=%s' % self.compression_opts
        values_cls = type(self.values)
        r += '\n   values=%s.%s' % (values_cls.__module__, values_cls.__name__)
        r += '>'
        return r

    def __repr__(self):
        return self.caption

    @property
    def nbytes(self):
        return _util.get_nbytes(self.values)

    @property
    def cbytes(self):
        return _util.get_cbytes(self.values)

    @property
    def compression(self):
        return _util.get_compression(self.values)

    @property
    def compression_opts(self):
        return _util.get_compression_opts(self.values)

    @property
    def shuffle(self):
        return _util.get_shuffle(self.values)

    @property
    def chunks(self):
        return _util.get_chunks(self.values)

    @property
    def cratio(self):
        nbytes = self.nbytes
        cbytes = self.cbytes
        if nbytes and cbytes:
            return nbytes / cbytes
        return None

    # outputs from these methods are not wrapped
    store = store
    count_nonzero = count_nonzero

    def map_blocks(self, f, blen=None, storage=None, create='array', **kwargs):
        out = map_blocks(self, f, blen=blen, storage=storage, create=create, **kwargs)
        return ChunkedArrayWrapper(out)

    def map_blocks_method(self, method_name, kwargs=None, **storage_kwargs):
        if kwargs is None:
            kwargs = dict()

        def f(block):
            method = getattr(block, method_name)
            return method(**kwargs)

        out = self.map_blocks(f, **storage_kwargs)
        return out

    def copy(self, start=0, stop=None, blen=None, storage=None, create='array',
             **kwargs):
        out = copy(self, start=start, stop=stop, blen=blen, storage=storage,
                   create=create, **kwargs)
        # can always wrap this as sub-class
        return type(self)(out)

    def binary_op(self, op, other, blen=None, storage=None, create='array',
                  **kwargs):
        out = binary_op(self, op, other, blen=blen, storage=storage,
                        create=create, **kwargs)
        return ChunkedArrayWrapper(out)

    def __eq__(self, other, **kwargs):
        return self.binary_op(operator.eq, other, **kwargs)

    def __ne__(self, other, **kwargs):
        return self.binary_op(operator.ne, other, **kwargs)

    def __lt__(self, other, **kwargs):
        return self.binary_op(operator.lt, other, **kwargs)

    def __gt__(self, other, **kwargs):
        return self.binary_op(operator.gt, other, **kwargs)

    def __le__(self, other, **kwargs):
        return self.binary_op(operator.le, other, **kwargs)

    def __ge__(self, other, **kwargs):
        return self.binary_op(operator.ge, other, **kwargs)

    def __add__(self, other, **kwargs):
        return self.binary_op(operator.add, other, **kwargs)

    def __floordiv__(self, other, **kwargs):
        return self.binary_op(operator.floordiv, other, **kwargs)

    def __mod__(self, other, **kwargs):
        return self.binary_op(operator.mod, other, **kwargs)

    def __mul__(self, other, **kwargs):
        return self.binary_op(operator.mul, other, **kwargs)

    def __pow__(self, other, **kwargs):
        return self.binary_op(operator.pow, other, **kwargs)

    def __sub__(self, other, **kwargs):
        return self.binary_op(operator.sub, other, **kwargs)

    def __truediv__(self, other, **kwargs):
        return self.binary_op(operator.truediv, other, **kwargs)

    def __and__(self, other, **kwargs):
        return self.binary_op(operator.and_, other, **kwargs)

    def __lshift__(self, other, **kwargs):
        return self.binary_op(operator.lshift, other, **kwargs)

    def __or__(self, other, **kwargs):
        return self.binary_op(operator.or_, other, **kwargs)

    def __rshift__(self, other, **kwargs):
        return self.binary_op(operator.rshift, other, **kwargs)

    def __xor__(self, other, **kwargs):
        return self.binary_op(operator.xor, other, **kwargs)

    def compress(self, condition, axis=0, out=None, **kwargs):
        out = compress(condition, self.values, axis=axis, out=out, **kwargs)
        return ChunkedArrayWrapper(out)

    def take(self, indices, axis=0, out=None, **kwargs):
        out = take(self.values, indices, axis=axis, out=out, **kwargs)
        return ChunkedArrayWrapper(out)

    def subset(self, sel0=None, sel1=None, **kwargs):
        out = subset(self.values, sel0, sel1, **kwargs)
        return ChunkedArrayWrapper(out)

    def concatenate(self, others, axis=0, **kwargs):
        if not isinstance(others, (tuple, list)):
            others = others,
        tup = (self,) + tuple(others)
        out = concatenate(tup, axis=axis, **kwargs)
        return ChunkedArrayWrapper(out)

    def max(self, axis=None, **kwargs):
        out = amax(self, axis=axis, **kwargs)
        if np.isscalar(out):
            return out
        else:
            return ChunkedArrayWrapper(out)

    def min(self, axis=None, **kwargs):
        out = amin(self, axis=axis, **kwargs)
        if np.isscalar(out):
            return out
        else:
            return ChunkedArrayWrapper(out)

    def sum(self, axis=None, **kwargs):
        out = asum(self, axis=axis, **kwargs)
        if np.isscalar(out):
            return out
        else:
            return ChunkedArrayWrapper(out)


[docs]class ChunkedTableWrapper(DisplayAsTable):
    """Wrapper class for chunked table-like data.

    Parameters
    ----------
    data: table_like
        Data to be wrapped. May be a tuple or list of columns (array-like),
        a dict mapping names to columns, a bcolz ctable, h5py group,
        numpy recarray, or anything providing a similar interface.
    names : sequence of strings
        Column names.

    """

    array_cls = NumpyRecArrayWrapper

    # noinspection PyMissingConstructor
    def __init__(self, data, names=None):
        names, columns = _util.check_table_like(data, names=names)
        # skip super-class constructor because we are more flexible about type of values here
        self._values = data
        self._names = names
        self._columns = columns
        self.rowcls = namedtuple('row', names)

    @property
    def names(self):
        return self._names

    @property
    def columns(self):
        return self._columns

    def __getitem__(self, item):

        if isinstance(item, string_types):
            # item is column name, return column
            idx = self._names.index(item)
            return ChunkedArrayWrapper(self._columns[idx])

        elif isinstance(item, integer_types):
            # item is row index, return row
            return self.rowcls(*(col[item] for col in self._columns))

        elif isinstance(item, slice):
            # item is row slice, return numpy recarray
            start = 0 if item.start is None else item.start
            if start < 0:
                raise ValueError('negative indices not supported')
            stop = len(self) if item.stop is None else item.stop
            stop = min(stop, len(self))
            step = 1 if item.step is None else item.step
            outshape = (stop - start) // step
            out = np.empty(outshape, dtype=self.dtype)
            for n, c in zip(self._names, self._columns):
                out[n] = c[start:stop:step]
            out = out.view(np.recarray)
            if self.array_cls is not None:
                out = self.array_cls(out)
            return out

        elif isinstance(item, (list, tuple)) and \
                all(isinstance(i, string_types) for i in item):
            # item is sequence of column names, return table
            columns = [self._columns[self._names.index(n)] for n in item]
            return type(self)(columns, names=item)

        else:
            raise IndexError('item not supported for indexing: %s' % repr(item))

    def __array__(self, *args):
        a = np.asanyarray(self[:])
        if args:
            a = a.astype(args[0])
        return a

    def __getattr__(self, item):
        if item in self._names:
            idx = self._names.index(item)
            return ChunkedArrayWrapper(self._columns[idx])
        else:
            return super(ChunkedTableWrapper, self).__getattr__(item)

    @property
    def caption(self):
        r = '<%s' % type(self).__name__
        r += ' shape=%s' % str(self.shape)
        r += ' dtype=%s' % str(self.dtype)
        if self.nbytes:
            r += '\n   nbytes=%s' % _util.human_readable_size(self.nbytes)
            if self.cbytes:
                r += ' cbytes=%s' % _util.human_readable_size(self.cbytes)
            if self.cratio:
                r += ' cratio=%.1f' % self.cratio
        values_cls = type(self.values)
        r += '\n   values=%s.%s' % (values_cls.__module__, values_cls.__name__)
        r += '>'
        return r

    def __repr__(self):
        return self.caption

    def __len__(self):
        return len(self._columns[0])

    @property
    def shape(self):
        return len(self),

    @property
    def ndim(self):
        return len(self.shape)

    @property
    def dtype(self):
        l = []
        for n, c in zip(self._names, self._columns):
            # need to account for multidimensional columns
            t = (n, c.dtype) if len(c.shape) == 1 else \
                (n, c.dtype, c.shape[1:])
            l.append(t)
        return np.dtype(l)

    @property
    def nbytes(self):
        cols_nbytes = [_util.get_nbytes(c) for c in self._columns]
        if all(cols_nbytes):
            return sum(cols_nbytes)
        return None

    @property
    def cbytes(self):
        cols_cbytes = [_util.get_cbytes(c) for c in self._columns]
        if all(cols_cbytes):
            return sum(cols_cbytes)
        return None

    @property
    def cratio(self):
        nbytes = self.nbytes
        cbytes = self.cbytes
        if nbytes and cbytes:
            return nbytes / cbytes
        return None

    def copy(self, start=0, stop=None, blen=None, storage=None,
             create='table', **kwargs):
        out = copy_table(self, start=start, stop=stop, blen=blen,
                         storage=storage, create=create, **kwargs)
        # can always wrap this
        return type(self)(out, names=self._names)

    def eval(self, expression, **kwargs):
        out = eval_table(self, expression, **kwargs)
        return ChunkedArrayWrapper(out)

    def query(self, expression, vm='python', blen=None, storage=None, create='table',
              vm_kwargs=None, **kwargs):
        condition = self.eval(expression, vm=vm, blen=blen, storage=storage, create='array',
                              vm_kwargs=vm_kwargs)
        out = self.compress(condition, blen=blen, storage=storage, create=create, **kwargs)
        # should already be wrapped
        return out

    def compress(self, condition, axis=None, out=None, blen=None, storage=None, create='table',
                 **kwargs):
        out = compress_table(condition, self, axis=axis, out=out, blen=blen, storage=storage,
                             create=create, **kwargs)
        return type(self)(out)

    def take(self, indices, axis=None, out=None, mode='raise', blen=None, storage=None,
             create='table', **kwargs):
        out = take_table(self, indices, axis=axis, out=out, mode=mode, blen=blen,
                         storage=storage, create=create, **kwargs)
        return type(self)(out)