aboutsummaryrefslogtreecommitdiff
# -*- coding: utf-8 -*-

# Python 2.7 workaround
try:
    import pathlib
except ImportError:
    pass


_NUM_SIGNATURE_BYTES = 8192


def get_signature_bytes(path):
    """
    Reads file from disk and returns the first 8192 bytes
    of data representing the magic number header signature.

    Args:
        path: path string to file.

    Returns:
        First 8192 bytes of the file content as bytearray type.
    """
    with open(path, 'rb') as fp:
        return bytearray(fp.read(_NUM_SIGNATURE_BYTES))


def signature(array):
    """
    Returns the first 8192 bytes of the given bytearray
    as part of the file header signature.

    Args:
        array: bytearray to extract the header signature.

    Returns:
        First 8192 bytes of the file content as bytearray type.
    """
    length = len(array)
    index = _NUM_SIGNATURE_BYTES if length > _NUM_SIGNATURE_BYTES else length

    return array[:index]


def get_bytes(obj):
    """
    Infers the input type and reads the first 8192 bytes,
    returning a sliced bytearray.

    Args:
        obj: path to readable, file-like object(with read() method), bytes,
        bytearray or memoryview

    Returns:
        First 8192 bytes of the file content as bytearray type.

    Raises:
        TypeError: if obj is not a supported type.
    """
    if isinstance(obj, bytearray):
        return signature(obj)

    if isinstance(obj, str):
        return get_signature_bytes(obj)

    if isinstance(obj, bytes):
        return signature(obj)

    if isinstance(obj, memoryview):
        return bytearray(signature(obj).tolist())

    if isinstance(obj, pathlib.PurePath):
        return get_signature_bytes(obj)

    if hasattr(obj, 'read'):
        if hasattr(obj, 'tell') and hasattr(obj, 'seek'):
            start_pos = obj.tell()
            obj.seek(0)
            magic_bytes = obj.read(_NUM_SIGNATURE_BYTES)
            obj.seek(start_pos)
            return get_bytes(magic_bytes)
        return get_bytes(obj.read(_NUM_SIGNATURE_BYTES))

    raise TypeError('Unsupported type as file input: %s' % type(obj))