diff options
Diffstat (limited to '.venv/lib/python3.12/site-packages/numpy/core/defchararray.py')
-rw-r--r-- | .venv/lib/python3.12/site-packages/numpy/core/defchararray.py | 2914 |
1 files changed, 2914 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/numpy/core/defchararray.py b/.venv/lib/python3.12/site-packages/numpy/core/defchararray.py new file mode 100644 index 00000000..11c5a30b --- /dev/null +++ b/.venv/lib/python3.12/site-packages/numpy/core/defchararray.py @@ -0,0 +1,2914 @@ +""" +This module contains a set of functions for vectorized string +operations and methods. + +.. note:: + The `chararray` class exists for backwards compatibility with + Numarray, it is not recommended for new development. Starting from numpy + 1.4, if one needs arrays of strings, it is recommended to use arrays of + `dtype` `object_`, `bytes_` or `str_`, and use the free functions + in the `numpy.char` module for fast vectorized string operations. + +Some methods will only be available if the corresponding string method is +available in your version of Python. + +The preferred alias for `defchararray` is `numpy.char`. + +""" +import functools + +from .._utils import set_module +from .numerictypes import ( + bytes_, str_, integer, int_, object_, bool_, character) +from .numeric import ndarray, compare_chararrays +from .numeric import array as narray +from numpy.core.multiarray import _vec_string +from numpy.core import overrides +from numpy.compat import asbytes +import numpy + +__all__ = [ + 'equal', 'not_equal', 'greater_equal', 'less_equal', + 'greater', 'less', 'str_len', 'add', 'multiply', 'mod', 'capitalize', + 'center', 'count', 'decode', 'encode', 'endswith', 'expandtabs', + 'find', 'index', 'isalnum', 'isalpha', 'isdigit', 'islower', 'isspace', + 'istitle', 'isupper', 'join', 'ljust', 'lower', 'lstrip', 'partition', + 'replace', 'rfind', 'rindex', 'rjust', 'rpartition', 'rsplit', + 'rstrip', 'split', 'splitlines', 'startswith', 'strip', 'swapcase', + 'title', 'translate', 'upper', 'zfill', 'isnumeric', 'isdecimal', + 'array', 'asarray' + ] + + +_globalvar = 0 + +array_function_dispatch = functools.partial( + overrides.array_function_dispatch, module='numpy.char') + + +def _is_unicode(arr): + """Returns True if arr is a string or a string array with a dtype that + represents a unicode string, otherwise returns False. + + """ + if (isinstance(arr, str) or + issubclass(numpy.asarray(arr).dtype.type, str)): + return True + return False + + +def _to_bytes_or_str_array(result, output_dtype_like=None): + """ + Helper function to cast a result back into an array + with the appropriate dtype if an object array must be used + as an intermediary. + """ + ret = numpy.asarray(result.tolist()) + dtype = getattr(output_dtype_like, 'dtype', None) + if dtype is not None: + return ret.astype(type(dtype)(_get_num_chars(ret)), copy=False) + return ret + + +def _clean_args(*args): + """ + Helper function for delegating arguments to Python string + functions. + + Many of the Python string operations that have optional arguments + do not use 'None' to indicate a default value. In these cases, + we need to remove all None arguments, and those following them. + """ + newargs = [] + for chk in args: + if chk is None: + break + newargs.append(chk) + return newargs + +def _get_num_chars(a): + """ + Helper function that returns the number of characters per field in + a string or unicode array. This is to abstract out the fact that + for a unicode array this is itemsize / 4. + """ + if issubclass(a.dtype.type, str_): + return a.itemsize // 4 + return a.itemsize + + +def _binary_op_dispatcher(x1, x2): + return (x1, x2) + + +@array_function_dispatch(_binary_op_dispatcher) +def equal(x1, x2): + """ + Return (x1 == x2) element-wise. + + Unlike `numpy.equal`, this comparison is performed by first + stripping whitespace characters from the end of the string. This + behavior is provided for backward-compatibility with numarray. + + Parameters + ---------- + x1, x2 : array_like of str or unicode + Input arrays of the same shape. + + Returns + ------- + out : ndarray + Output array of bools. + + See Also + -------- + not_equal, greater_equal, less_equal, greater, less + """ + return compare_chararrays(x1, x2, '==', True) + + +@array_function_dispatch(_binary_op_dispatcher) +def not_equal(x1, x2): + """ + Return (x1 != x2) element-wise. + + Unlike `numpy.not_equal`, this comparison is performed by first + stripping whitespace characters from the end of the string. This + behavior is provided for backward-compatibility with numarray. + + Parameters + ---------- + x1, x2 : array_like of str or unicode + Input arrays of the same shape. + + Returns + ------- + out : ndarray + Output array of bools. + + See Also + -------- + equal, greater_equal, less_equal, greater, less + """ + return compare_chararrays(x1, x2, '!=', True) + + +@array_function_dispatch(_binary_op_dispatcher) +def greater_equal(x1, x2): + """ + Return (x1 >= x2) element-wise. + + Unlike `numpy.greater_equal`, this comparison is performed by + first stripping whitespace characters from the end of the string. + This behavior is provided for backward-compatibility with + numarray. + + Parameters + ---------- + x1, x2 : array_like of str or unicode + Input arrays of the same shape. + + Returns + ------- + out : ndarray + Output array of bools. + + See Also + -------- + equal, not_equal, less_equal, greater, less + """ + return compare_chararrays(x1, x2, '>=', True) + + +@array_function_dispatch(_binary_op_dispatcher) +def less_equal(x1, x2): + """ + Return (x1 <= x2) element-wise. + + Unlike `numpy.less_equal`, this comparison is performed by first + stripping whitespace characters from the end of the string. This + behavior is provided for backward-compatibility with numarray. + + Parameters + ---------- + x1, x2 : array_like of str or unicode + Input arrays of the same shape. + + Returns + ------- + out : ndarray + Output array of bools. + + See Also + -------- + equal, not_equal, greater_equal, greater, less + """ + return compare_chararrays(x1, x2, '<=', True) + + +@array_function_dispatch(_binary_op_dispatcher) +def greater(x1, x2): + """ + Return (x1 > x2) element-wise. + + Unlike `numpy.greater`, this comparison is performed by first + stripping whitespace characters from the end of the string. This + behavior is provided for backward-compatibility with numarray. + + Parameters + ---------- + x1, x2 : array_like of str or unicode + Input arrays of the same shape. + + Returns + ------- + out : ndarray + Output array of bools. + + See Also + -------- + equal, not_equal, greater_equal, less_equal, less + """ + return compare_chararrays(x1, x2, '>', True) + + +@array_function_dispatch(_binary_op_dispatcher) +def less(x1, x2): + """ + Return (x1 < x2) element-wise. + + Unlike `numpy.greater`, this comparison is performed by first + stripping whitespace characters from the end of the string. This + behavior is provided for backward-compatibility with numarray. + + Parameters + ---------- + x1, x2 : array_like of str or unicode + Input arrays of the same shape. + + Returns + ------- + out : ndarray + Output array of bools. + + See Also + -------- + equal, not_equal, greater_equal, less_equal, greater + """ + return compare_chararrays(x1, x2, '<', True) + + +def _unary_op_dispatcher(a): + return (a,) + + +@array_function_dispatch(_unary_op_dispatcher) +def str_len(a): + """ + Return len(a) element-wise. + + Parameters + ---------- + a : array_like of str or unicode + + Returns + ------- + out : ndarray + Output array of integers + + See Also + -------- + len + + Examples + -------- + >>> a = np.array(['Grace Hopper Conference', 'Open Source Day']) + >>> np.char.str_len(a) + array([23, 15]) + >>> a = np.array([u'\u0420', u'\u043e']) + >>> np.char.str_len(a) + array([1, 1]) + >>> a = np.array([['hello', 'world'], [u'\u0420', u'\u043e']]) + >>> np.char.str_len(a) + array([[5, 5], [1, 1]]) + """ + # Note: __len__, etc. currently return ints, which are not C-integers. + # Generally intp would be expected for lengths, although int is sufficient + # due to the dtype itemsize limitation. + return _vec_string(a, int_, '__len__') + + +@array_function_dispatch(_binary_op_dispatcher) +def add(x1, x2): + """ + Return element-wise string concatenation for two arrays of str or unicode. + + Arrays `x1` and `x2` must have the same shape. + + Parameters + ---------- + x1 : array_like of str or unicode + Input array. + x2 : array_like of str or unicode + Input array. + + Returns + ------- + add : ndarray + Output array of `bytes_` or `str_`, depending on input types + of the same shape as `x1` and `x2`. + + """ + arr1 = numpy.asarray(x1) + arr2 = numpy.asarray(x2) + out_size = _get_num_chars(arr1) + _get_num_chars(arr2) + + if type(arr1.dtype) != type(arr2.dtype): + # Enforce this for now. The solution to it will be implement add + # as a ufunc. It never worked right on Python 3: bytes + unicode gave + # nonsense unicode + bytes errored, and unicode + object used the + # object dtype itemsize as num chars (worked on short strings). + # bytes + void worked but promoting void->bytes is dubious also. + raise TypeError( + "np.char.add() requires both arrays of the same dtype kind, but " + f"got dtypes: '{arr1.dtype}' and '{arr2.dtype}' (the few cases " + "where this used to work often lead to incorrect results).") + + return _vec_string(arr1, type(arr1.dtype)(out_size), '__add__', (arr2,)) + +def _multiply_dispatcher(a, i): + return (a,) + + +@array_function_dispatch(_multiply_dispatcher) +def multiply(a, i): + """ + Return (a * i), that is string multiple concatenation, + element-wise. + + Values in `i` of less than 0 are treated as 0 (which yields an + empty string). + + Parameters + ---------- + a : array_like of str or unicode + + i : array_like of ints + + Returns + ------- + out : ndarray + Output array of str or unicode, depending on input types + + Examples + -------- + >>> a = np.array(["a", "b", "c"]) + >>> np.char.multiply(x, 3) + array(['aaa', 'bbb', 'ccc'], dtype='<U3') + >>> i = np.array([1, 2, 3]) + >>> np.char.multiply(a, i) + array(['a', 'bb', 'ccc'], dtype='<U3') + >>> np.char.multiply(np.array(['a']), i) + array(['a', 'aa', 'aaa'], dtype='<U3') + >>> a = np.array(['a', 'b', 'c', 'd', 'e', 'f']).reshape((2, 3)) + >>> np.char.multiply(a, 3) + array([['aaa', 'bbb', 'ccc'], + ['ddd', 'eee', 'fff']], dtype='<U3') + >>> np.char.multiply(a, i) + array([['a', 'bb', 'ccc'], + ['d', 'ee', 'fff']], dtype='<U3') + """ + a_arr = numpy.asarray(a) + i_arr = numpy.asarray(i) + if not issubclass(i_arr.dtype.type, integer): + raise ValueError("Can only multiply by integers") + out_size = _get_num_chars(a_arr) * max(int(i_arr.max()), 0) + return _vec_string( + a_arr, type(a_arr.dtype)(out_size), '__mul__', (i_arr,)) + + +def _mod_dispatcher(a, values): + return (a, values) + + +@array_function_dispatch(_mod_dispatcher) +def mod(a, values): + """ + Return (a % i), that is pre-Python 2.6 string formatting + (interpolation), element-wise for a pair of array_likes of str + or unicode. + + Parameters + ---------- + a : array_like of str or unicode + + values : array_like of values + These values will be element-wise interpolated into the string. + + Returns + ------- + out : ndarray + Output array of str or unicode, depending on input types + + See Also + -------- + str.__mod__ + + """ + return _to_bytes_or_str_array( + _vec_string(a, object_, '__mod__', (values,)), a) + + +@array_function_dispatch(_unary_op_dispatcher) +def capitalize(a): + """ + Return a copy of `a` with only the first character of each element + capitalized. + + Calls `str.capitalize` element-wise. + + For 8-bit strings, this method is locale-dependent. + + Parameters + ---------- + a : array_like of str or unicode + Input array of strings to capitalize. + + Returns + ------- + out : ndarray + Output array of str or unicode, depending on input + types + + See Also + -------- + str.capitalize + + Examples + -------- + >>> c = np.array(['a1b2','1b2a','b2a1','2a1b'],'S4'); c + array(['a1b2', '1b2a', 'b2a1', '2a1b'], + dtype='|S4') + >>> np.char.capitalize(c) + array(['A1b2', '1b2a', 'B2a1', '2a1b'], + dtype='|S4') + + """ + a_arr = numpy.asarray(a) + return _vec_string(a_arr, a_arr.dtype, 'capitalize') + + +def _center_dispatcher(a, width, fillchar=None): + return (a,) + + +@array_function_dispatch(_center_dispatcher) +def center(a, width, fillchar=' '): + """ + Return a copy of `a` with its elements centered in a string of + length `width`. + + Calls `str.center` element-wise. + + Parameters + ---------- + a : array_like of str or unicode + + width : int + The length of the resulting strings + fillchar : str or unicode, optional + The padding character to use (default is space). + + Returns + ------- + out : ndarray + Output array of str or unicode, depending on input + types + + See Also + -------- + str.center + + Notes + ----- + This function is intended to work with arrays of strings. The + fill character is not applied to numeric types. + + Examples + -------- + >>> c = np.array(['a1b2','1b2a','b2a1','2a1b']); c + array(['a1b2', '1b2a', 'b2a1', '2a1b'], dtype='<U4') + >>> np.char.center(c, width=9) + array([' a1b2 ', ' 1b2a ', ' b2a1 ', ' 2a1b '], dtype='<U9') + >>> np.char.center(c, width=9, fillchar='*') + array(['***a1b2**', '***1b2a**', '***b2a1**', '***2a1b**'], dtype='<U9') + >>> np.char.center(c, width=1) + array(['a', '1', 'b', '2'], dtype='<U1') + + """ + a_arr = numpy.asarray(a) + width_arr = numpy.asarray(width) + size = int(numpy.max(width_arr.flat)) + if numpy.issubdtype(a_arr.dtype, numpy.bytes_): + fillchar = asbytes(fillchar) + return _vec_string( + a_arr, type(a_arr.dtype)(size), 'center', (width_arr, fillchar)) + + +def _count_dispatcher(a, sub, start=None, end=None): + return (a,) + + +@array_function_dispatch(_count_dispatcher) +def count(a, sub, start=0, end=None): + """ + Returns an array with the number of non-overlapping occurrences of + substring `sub` in the range [`start`, `end`]. + + Calls `str.count` element-wise. + + Parameters + ---------- + a : array_like of str or unicode + + sub : str or unicode + The substring to search for. + + start, end : int, optional + Optional arguments `start` and `end` are interpreted as slice + notation to specify the range in which to count. + + Returns + ------- + out : ndarray + Output array of ints. + + See Also + -------- + str.count + + Examples + -------- + >>> c = np.array(['aAaAaA', ' aA ', 'abBABba']) + >>> c + array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7') + >>> np.char.count(c, 'A') + array([3, 1, 1]) + >>> np.char.count(c, 'aA') + array([3, 1, 0]) + >>> np.char.count(c, 'A', start=1, end=4) + array([2, 1, 1]) + >>> np.char.count(c, 'A', start=1, end=3) + array([1, 0, 0]) + + """ + return _vec_string(a, int_, 'count', [sub, start] + _clean_args(end)) + + +def _code_dispatcher(a, encoding=None, errors=None): + return (a,) + + +@array_function_dispatch(_code_dispatcher) +def decode(a, encoding=None, errors=None): + r""" + Calls ``bytes.decode`` element-wise. + + The set of available codecs comes from the Python standard library, + and may be extended at runtime. For more information, see the + :mod:`codecs` module. + + Parameters + ---------- + a : array_like of str or unicode + + encoding : str, optional + The name of an encoding + + errors : str, optional + Specifies how to handle encoding errors + + Returns + ------- + out : ndarray + + See Also + -------- + :py:meth:`bytes.decode` + + Notes + ----- + The type of the result will depend on the encoding specified. + + Examples + -------- + >>> c = np.array([b'\x81\xc1\x81\xc1\x81\xc1', b'@@\x81\xc1@@', + ... b'\x81\x82\xc2\xc1\xc2\x82\x81']) + >>> c + array([b'\x81\xc1\x81\xc1\x81\xc1', b'@@\x81\xc1@@', + ... b'\x81\x82\xc2\xc1\xc2\x82\x81'], dtype='|S7') + >>> np.char.decode(c, encoding='cp037') + array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7') + + """ + return _to_bytes_or_str_array( + _vec_string(a, object_, 'decode', _clean_args(encoding, errors))) + + +@array_function_dispatch(_code_dispatcher) +def encode(a, encoding=None, errors=None): + """ + Calls `str.encode` element-wise. + + The set of available codecs comes from the Python standard library, + and may be extended at runtime. For more information, see the codecs + module. + + Parameters + ---------- + a : array_like of str or unicode + + encoding : str, optional + The name of an encoding + + errors : str, optional + Specifies how to handle encoding errors + + Returns + ------- + out : ndarray + + See Also + -------- + str.encode + + Notes + ----- + The type of the result will depend on the encoding specified. + + """ + return _to_bytes_or_str_array( + _vec_string(a, object_, 'encode', _clean_args(encoding, errors))) + + +def _endswith_dispatcher(a, suffix, start=None, end=None): + return (a,) + + +@array_function_dispatch(_endswith_dispatcher) +def endswith(a, suffix, start=0, end=None): + """ + Returns a boolean array which is `True` where the string element + in `a` ends with `suffix`, otherwise `False`. + + Calls `str.endswith` element-wise. + + Parameters + ---------- + a : array_like of str or unicode + + suffix : str + + start, end : int, optional + With optional `start`, test beginning at that position. With + optional `end`, stop comparing at that position. + + Returns + ------- + out : ndarray + Outputs an array of bools. + + See Also + -------- + str.endswith + + Examples + -------- + >>> s = np.array(['foo', 'bar']) + >>> s[0] = 'foo' + >>> s[1] = 'bar' + >>> s + array(['foo', 'bar'], dtype='<U3') + >>> np.char.endswith(s, 'ar') + array([False, True]) + >>> np.char.endswith(s, 'a', start=1, end=2) + array([False, True]) + + """ + return _vec_string( + a, bool_, 'endswith', [suffix, start] + _clean_args(end)) + + +def _expandtabs_dispatcher(a, tabsize=None): + return (a,) + + +@array_function_dispatch(_expandtabs_dispatcher) +def expandtabs(a, tabsize=8): + """ + Return a copy of each string element where all tab characters are + replaced by one or more spaces. + + Calls `str.expandtabs` element-wise. + + Return a copy of each string element where all tab characters are + replaced by one or more spaces, depending on the current column + and the given `tabsize`. The column number is reset to zero after + each newline occurring in the string. This doesn't understand other + non-printing characters or escape sequences. + + Parameters + ---------- + a : array_like of str or unicode + Input array + tabsize : int, optional + Replace tabs with `tabsize` number of spaces. If not given defaults + to 8 spaces. + + Returns + ------- + out : ndarray + Output array of str or unicode, depending on input type + + See Also + -------- + str.expandtabs + + """ + return _to_bytes_or_str_array( + _vec_string(a, object_, 'expandtabs', (tabsize,)), a) + + +@array_function_dispatch(_count_dispatcher) +def find(a, sub, start=0, end=None): + """ + For each element, return the lowest index in the string where + substring `sub` is found. + + Calls `str.find` element-wise. + + For each element, return the lowest index in the string where + substring `sub` is found, such that `sub` is contained in the + range [`start`, `end`]. + + Parameters + ---------- + a : array_like of str or unicode + + sub : str or unicode + + start, end : int, optional + Optional arguments `start` and `end` are interpreted as in + slice notation. + + Returns + ------- + out : ndarray or int + Output array of ints. Returns -1 if `sub` is not found. + + See Also + -------- + str.find + + Examples + -------- + >>> a = np.array(["NumPy is a Python library"]) + >>> np.char.find(a, "Python", start=0, end=None) + array([11]) + + """ + return _vec_string( + a, int_, 'find', [sub, start] + _clean_args(end)) + + +@array_function_dispatch(_count_dispatcher) +def index(a, sub, start=0, end=None): + """ + Like `find`, but raises `ValueError` when the substring is not found. + + Calls `str.index` element-wise. + + Parameters + ---------- + a : array_like of str or unicode + + sub : str or unicode + + start, end : int, optional + + Returns + ------- + out : ndarray + Output array of ints. Returns -1 if `sub` is not found. + + See Also + -------- + find, str.find + + Examples + -------- + >>> a = np.array(["Computer Science"]) + >>> np.char.index(a, "Science", start=0, end=None) + array([9]) + + """ + return _vec_string( + a, int_, 'index', [sub, start] + _clean_args(end)) + + +@array_function_dispatch(_unary_op_dispatcher) +def isalnum(a): + """ + Returns true for each element if all characters in the string are + alphanumeric and there is at least one character, false otherwise. + + Calls `str.isalnum` element-wise. + + For 8-bit strings, this method is locale-dependent. + + Parameters + ---------- + a : array_like of str or unicode + + Returns + ------- + out : ndarray + Output array of str or unicode, depending on input type + + See Also + -------- + str.isalnum + """ + return _vec_string(a, bool_, 'isalnum') + + +@array_function_dispatch(_unary_op_dispatcher) +def isalpha(a): + """ + Returns true for each element if all characters in the string are + alphabetic and there is at least one character, false otherwise. + + Calls `str.isalpha` element-wise. + + For 8-bit strings, this method is locale-dependent. + + Parameters + ---------- + a : array_like of str or unicode + + Returns + ------- + out : ndarray + Output array of bools + + See Also + -------- + str.isalpha + """ + return _vec_string(a, bool_, 'isalpha') + + +@array_function_dispatch(_unary_op_dispatcher) +def isdigit(a): + """ + Returns true for each element if all characters in the string are + digits and there is at least one character, false otherwise. + + Calls `str.isdigit` element-wise. + + For 8-bit strings, this method is locale-dependent. + + Parameters + ---------- + a : array_like of str or unicode + + Returns + ------- + out : ndarray + Output array of bools + + See Also + -------- + str.isdigit + + Examples + -------- + >>> a = np.array(['a', 'b', '0']) + >>> np.char.isdigit(a) + array([False, False, True]) + >>> a = np.array([['a', 'b', '0'], ['c', '1', '2']]) + >>> np.char.isdigit(a) + array([[False, False, True], [False, True, True]]) + """ + return _vec_string(a, bool_, 'isdigit') + + +@array_function_dispatch(_unary_op_dispatcher) +def islower(a): + """ + Returns true for each element if all cased characters in the + string are lowercase and there is at least one cased character, + false otherwise. + + Calls `str.islower` element-wise. + + For 8-bit strings, this method is locale-dependent. + + Parameters + ---------- + a : array_like of str or unicode + + Returns + ------- + out : ndarray + Output array of bools + + See Also + -------- + str.islower + """ + return _vec_string(a, bool_, 'islower') + + +@array_function_dispatch(_unary_op_dispatcher) +def isspace(a): + """ + Returns true for each element if there are only whitespace + characters in the string and there is at least one character, + false otherwise. + + Calls `str.isspace` element-wise. + + For 8-bit strings, this method is locale-dependent. + + Parameters + ---------- + a : array_like of str or unicode + + Returns + ------- + out : ndarray + Output array of bools + + See Also + -------- + str.isspace + """ + return _vec_string(a, bool_, 'isspace') + + +@array_function_dispatch(_unary_op_dispatcher) +def istitle(a): + """ + Returns true for each element if the element is a titlecased + string and there is at least one character, false otherwise. + + Call `str.istitle` element-wise. + + For 8-bit strings, this method is locale-dependent. + + Parameters + ---------- + a : array_like of str or unicode + + Returns + ------- + out : ndarray + Output array of bools + + See Also + -------- + str.istitle + """ + return _vec_string(a, bool_, 'istitle') + + +@array_function_dispatch(_unary_op_dispatcher) +def isupper(a): + """ + Return true for each element if all cased characters in the + string are uppercase and there is at least one character, false + otherwise. + + Call `str.isupper` element-wise. + + For 8-bit strings, this method is locale-dependent. + + Parameters + ---------- + a : array_like of str or unicode + + Returns + ------- + out : ndarray + Output array of bools + + See Also + -------- + str.isupper + + Examples + -------- + >>> str = "GHC" + >>> np.char.isupper(str) + array(True) + >>> a = np.array(["hello", "HELLO", "Hello"]) + >>> np.char.isupper(a) + array([False, True, False]) + + """ + return _vec_string(a, bool_, 'isupper') + + +def _join_dispatcher(sep, seq): + return (sep, seq) + + +@array_function_dispatch(_join_dispatcher) +def join(sep, seq): + """ + Return a string which is the concatenation of the strings in the + sequence `seq`. + + Calls `str.join` element-wise. + + Parameters + ---------- + sep : array_like of str or unicode + seq : array_like of str or unicode + + Returns + ------- + out : ndarray + Output array of str or unicode, depending on input types + + See Also + -------- + str.join + + Examples + -------- + >>> np.char.join('-', 'osd') + array('o-s-d', dtype='<U5') + + >>> np.char.join(['-', '.'], ['ghc', 'osd']) + array(['g-h-c', 'o.s.d'], dtype='<U5') + + """ + return _to_bytes_or_str_array( + _vec_string(sep, object_, 'join', (seq,)), seq) + + + +def _just_dispatcher(a, width, fillchar=None): + return (a,) + + +@array_function_dispatch(_just_dispatcher) +def ljust(a, width, fillchar=' '): + """ + Return an array with the elements of `a` left-justified in a + string of length `width`. + + Calls `str.ljust` element-wise. + + Parameters + ---------- + a : array_like of str or unicode + + width : int + The length of the resulting strings + fillchar : str or unicode, optional + The character to use for padding + + Returns + ------- + out : ndarray + Output array of str or unicode, depending on input type + + See Also + -------- + str.ljust + + """ + a_arr = numpy.asarray(a) + width_arr = numpy.asarray(width) + size = int(numpy.max(width_arr.flat)) + if numpy.issubdtype(a_arr.dtype, numpy.bytes_): + fillchar = asbytes(fillchar) + return _vec_string( + a_arr, type(a_arr.dtype)(size), 'ljust', (width_arr, fillchar)) + + +@array_function_dispatch(_unary_op_dispatcher) +def lower(a): + """ + Return an array with the elements converted to lowercase. + + Call `str.lower` element-wise. + + For 8-bit strings, this method is locale-dependent. + + Parameters + ---------- + a : array_like, {str, unicode} + Input array. + + Returns + ------- + out : ndarray, {str, unicode} + Output array of str or unicode, depending on input type + + See Also + -------- + str.lower + + Examples + -------- + >>> c = np.array(['A1B C', '1BCA', 'BCA1']); c + array(['A1B C', '1BCA', 'BCA1'], dtype='<U5') + >>> np.char.lower(c) + array(['a1b c', '1bca', 'bca1'], dtype='<U5') + + """ + a_arr = numpy.asarray(a) + return _vec_string(a_arr, a_arr.dtype, 'lower') + + +def _strip_dispatcher(a, chars=None): + return (a,) + + +@array_function_dispatch(_strip_dispatcher) +def lstrip(a, chars=None): + """ + For each element in `a`, return a copy with the leading characters + removed. + + Calls `str.lstrip` element-wise. + + Parameters + ---------- + a : array-like, {str, unicode} + Input array. + + chars : {str, unicode}, optional + The `chars` argument is a string specifying the set of + characters to be removed. If omitted or None, the `chars` + argument defaults to removing whitespace. The `chars` argument + is not a prefix; rather, all combinations of its values are + stripped. + + Returns + ------- + out : ndarray, {str, unicode} + Output array of str or unicode, depending on input type + + See Also + -------- + str.lstrip + + Examples + -------- + >>> c = np.array(['aAaAaA', ' aA ', 'abBABba']) + >>> c + array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7') + + The 'a' variable is unstripped from c[1] because whitespace leading. + + >>> np.char.lstrip(c, 'a') + array(['AaAaA', ' aA ', 'bBABba'], dtype='<U7') + + + >>> np.char.lstrip(c, 'A') # leaves c unchanged + array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7') + >>> (np.char.lstrip(c, ' ') == np.char.lstrip(c, '')).all() + ... # XXX: is this a regression? This used to return True + ... # np.char.lstrip(c,'') does not modify c at all. + False + >>> (np.char.lstrip(c, ' ') == np.char.lstrip(c, None)).all() + True + + """ + a_arr = numpy.asarray(a) + return _vec_string(a_arr, a_arr.dtype, 'lstrip', (chars,)) + + +def _partition_dispatcher(a, sep): + return (a,) + + +@array_function_dispatch(_partition_dispatcher) +def partition(a, sep): + """ + Partition each element in `a` around `sep`. + + Calls `str.partition` element-wise. + + For each element in `a`, split the element as the first + occurrence of `sep`, and return 3 strings containing the part + before the separator, the separator itself, and the part after + the separator. If the separator is not found, return 3 strings + containing the string itself, followed by two empty strings. + + Parameters + ---------- + a : array_like, {str, unicode} + Input array + sep : {str, unicode} + Separator to split each string element in `a`. + + Returns + ------- + out : ndarray, {str, unicode} + Output array of str or unicode, depending on input type. + The output array will have an extra dimension with 3 + elements per input element. + + See Also + -------- + str.partition + + """ + return _to_bytes_or_str_array( + _vec_string(a, object_, 'partition', (sep,)), a) + + +def _replace_dispatcher(a, old, new, count=None): + return (a,) + + +@array_function_dispatch(_replace_dispatcher) +def replace(a, old, new, count=None): + """ + For each element in `a`, return a copy of the string with all + occurrences of substring `old` replaced by `new`. + + Calls `str.replace` element-wise. + + Parameters + ---------- + a : array-like of str or unicode + + old, new : str or unicode + + count : int, optional + If the optional argument `count` is given, only the first + `count` occurrences are replaced. + + Returns + ------- + out : ndarray + Output array of str or unicode, depending on input type + + See Also + -------- + str.replace + + Examples + -------- + >>> a = np.array(["That is a mango", "Monkeys eat mangos"]) + >>> np.char.replace(a, 'mango', 'banana') + array(['That is a banana', 'Monkeys eat bananas'], dtype='<U19') + + >>> a = np.array(["The dish is fresh", "This is it"]) + >>> np.char.replace(a, 'is', 'was') + array(['The dwash was fresh', 'Thwas was it'], dtype='<U19') + """ + return _to_bytes_or_str_array( + _vec_string(a, object_, 'replace', [old, new] + _clean_args(count)), a) + + +@array_function_dispatch(_count_dispatcher) +def rfind(a, sub, start=0, end=None): + """ + For each element in `a`, return the highest index in the string + where substring `sub` is found, such that `sub` is contained + within [`start`, `end`]. + + Calls `str.rfind` element-wise. + + Parameters + ---------- + a : array-like of str or unicode + + sub : str or unicode + + start, end : int, optional + Optional arguments `start` and `end` are interpreted as in + slice notation. + + Returns + ------- + out : ndarray + Output array of ints. Return -1 on failure. + + See Also + -------- + str.rfind + + """ + return _vec_string( + a, int_, 'rfind', [sub, start] + _clean_args(end)) + + +@array_function_dispatch(_count_dispatcher) +def rindex(a, sub, start=0, end=None): + """ + Like `rfind`, but raises `ValueError` when the substring `sub` is + not found. + + Calls `str.rindex` element-wise. + + Parameters + ---------- + a : array-like of str or unicode + + sub : str or unicode + + start, end : int, optional + + Returns + ------- + out : ndarray + Output array of ints. + + See Also + -------- + rfind, str.rindex + + """ + return _vec_string( + a, int_, 'rindex', [sub, start] + _clean_args(end)) + + +@array_function_dispatch(_just_dispatcher) +def rjust(a, width, fillchar=' '): + """ + Return an array with the elements of `a` right-justified in a + string of length `width`. + + Calls `str.rjust` element-wise. + + Parameters + ---------- + a : array_like of str or unicode + + width : int + The length of the resulting strings + fillchar : str or unicode, optional + The character to use for padding + + Returns + ------- + out : ndarray + Output array of str or unicode, depending on input type + + See Also + -------- + str.rjust + + """ + a_arr = numpy.asarray(a) + width_arr = numpy.asarray(width) + size = int(numpy.max(width_arr.flat)) + if numpy.issubdtype(a_arr.dtype, numpy.bytes_): + fillchar = asbytes(fillchar) + return _vec_string( + a_arr, type(a_arr.dtype)(size), 'rjust', (width_arr, fillchar)) + + +@array_function_dispatch(_partition_dispatcher) +def rpartition(a, sep): + """ + Partition (split) each element around the right-most separator. + + Calls `str.rpartition` element-wise. + + For each element in `a`, split the element as the last + occurrence of `sep`, and return 3 strings containing the part + before the separator, the separator itself, and the part after + the separator. If the separator is not found, return 3 strings + containing the string itself, followed by two empty strings. + + Parameters + ---------- + a : array_like of str or unicode + Input array + sep : str or unicode + Right-most separator to split each element in array. + + Returns + ------- + out : ndarray + Output array of string or unicode, depending on input + type. The output array will have an extra dimension with + 3 elements per input element. + + See Also + -------- + str.rpartition + + """ + return _to_bytes_or_str_array( + _vec_string(a, object_, 'rpartition', (sep,)), a) + + +def _split_dispatcher(a, sep=None, maxsplit=None): + return (a,) + + +@array_function_dispatch(_split_dispatcher) +def rsplit(a, sep=None, maxsplit=None): + """ + For each element in `a`, return a list of the words in the + string, using `sep` as the delimiter string. + + Calls `str.rsplit` element-wise. + + Except for splitting from the right, `rsplit` + behaves like `split`. + + Parameters + ---------- + a : array_like of str or unicode + + sep : str or unicode, optional + If `sep` is not specified or None, any whitespace string + is a separator. + maxsplit : int, optional + If `maxsplit` is given, at most `maxsplit` splits are done, + the rightmost ones. + + Returns + ------- + out : ndarray + Array of list objects + + See Also + -------- + str.rsplit, split + + """ + # This will return an array of lists of different sizes, so we + # leave it as an object array + return _vec_string( + a, object_, 'rsplit', [sep] + _clean_args(maxsplit)) + + +def _strip_dispatcher(a, chars=None): + return (a,) + + +@array_function_dispatch(_strip_dispatcher) +def rstrip(a, chars=None): + """ + For each element in `a`, return a copy with the trailing + characters removed. + + Calls `str.rstrip` element-wise. + + Parameters + ---------- + a : array-like of str or unicode + + chars : str or unicode, optional + The `chars` argument is a string specifying the set of + characters to be removed. If omitted or None, the `chars` + argument defaults to removing whitespace. The `chars` argument + is not a suffix; rather, all combinations of its values are + stripped. + + Returns + ------- + out : ndarray + Output array of str or unicode, depending on input type + + See Also + -------- + str.rstrip + + Examples + -------- + >>> c = np.array(['aAaAaA', 'abBABba'], dtype='S7'); c + array(['aAaAaA', 'abBABba'], + dtype='|S7') + >>> np.char.rstrip(c, b'a') + array(['aAaAaA', 'abBABb'], + dtype='|S7') + >>> np.char.rstrip(c, b'A') + array(['aAaAa', 'abBABba'], + dtype='|S7') + + """ + a_arr = numpy.asarray(a) + return _vec_string(a_arr, a_arr.dtype, 'rstrip', (chars,)) + + +@array_function_dispatch(_split_dispatcher) +def split(a, sep=None, maxsplit=None): + """ + For each element in `a`, return a list of the words in the + string, using `sep` as the delimiter string. + + Calls `str.split` element-wise. + + Parameters + ---------- + a : array_like of str or unicode + + sep : str or unicode, optional + If `sep` is not specified or None, any whitespace string is a + separator. + + maxsplit : int, optional + If `maxsplit` is given, at most `maxsplit` splits are done. + + Returns + ------- + out : ndarray + Array of list objects + + See Also + -------- + str.split, rsplit + + """ + # This will return an array of lists of different sizes, so we + # leave it as an object array + return _vec_string( + a, object_, 'split', [sep] + _clean_args(maxsplit)) + + +def _splitlines_dispatcher(a, keepends=None): + return (a,) + + +@array_function_dispatch(_splitlines_dispatcher) +def splitlines(a, keepends=None): + """ + For each element in `a`, return a list of the lines in the + element, breaking at line boundaries. + + Calls `str.splitlines` element-wise. + + Parameters + ---------- + a : array_like of str or unicode + + keepends : bool, optional + Line breaks are not included in the resulting list unless + keepends is given and true. + + Returns + ------- + out : ndarray + Array of list objects + + See Also + -------- + str.splitlines + + """ + return _vec_string( + a, object_, 'splitlines', _clean_args(keepends)) + + +def _startswith_dispatcher(a, prefix, start=None, end=None): + return (a,) + + +@array_function_dispatch(_startswith_dispatcher) +def startswith(a, prefix, start=0, end=None): + """ + Returns a boolean array which is `True` where the string element + in `a` starts with `prefix`, otherwise `False`. + + Calls `str.startswith` element-wise. + + Parameters + ---------- + a : array_like of str or unicode + + prefix : str + + start, end : int, optional + With optional `start`, test beginning at that position. With + optional `end`, stop comparing at that position. + + Returns + ------- + out : ndarray + Array of booleans + + See Also + -------- + str.startswith + + """ + return _vec_string( + a, bool_, 'startswith', [prefix, start] + _clean_args(end)) + + +@array_function_dispatch(_strip_dispatcher) +def strip(a, chars=None): + """ + For each element in `a`, return a copy with the leading and + trailing characters removed. + + Calls `str.strip` element-wise. + + Parameters + ---------- + a : array-like of str or unicode + + chars : str or unicode, optional + The `chars` argument is a string specifying the set of + characters to be removed. If omitted or None, the `chars` + argument defaults to removing whitespace. The `chars` argument + is not a prefix or suffix; rather, all combinations of its + values are stripped. + + Returns + ------- + out : ndarray + Output array of str or unicode, depending on input type + + See Also + -------- + str.strip + + Examples + -------- + >>> c = np.array(['aAaAaA', ' aA ', 'abBABba']) + >>> c + array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7') + >>> np.char.strip(c) + array(['aAaAaA', 'aA', 'abBABba'], dtype='<U7') + >>> np.char.strip(c, 'a') # 'a' unstripped from c[1] because whitespace leads + array(['AaAaA', ' aA ', 'bBABb'], dtype='<U7') + >>> np.char.strip(c, 'A') # 'A' unstripped from c[1] because (unprinted) ws trails + array(['aAaAa', ' aA ', 'abBABba'], dtype='<U7') + + """ + a_arr = numpy.asarray(a) + return _vec_string(a_arr, a_arr.dtype, 'strip', _clean_args(chars)) + + +@array_function_dispatch(_unary_op_dispatcher) +def swapcase(a): + """ + Return element-wise a copy of the string with + uppercase characters converted to lowercase and vice versa. + + Calls `str.swapcase` element-wise. + + For 8-bit strings, this method is locale-dependent. + + Parameters + ---------- + a : array_like, {str, unicode} + Input array. + + Returns + ------- + out : ndarray, {str, unicode} + Output array of str or unicode, depending on input type + + See Also + -------- + str.swapcase + + Examples + -------- + >>> c=np.array(['a1B c','1b Ca','b Ca1','cA1b'],'S5'); c + array(['a1B c', '1b Ca', 'b Ca1', 'cA1b'], + dtype='|S5') + >>> np.char.swapcase(c) + array(['A1b C', '1B cA', 'B cA1', 'Ca1B'], + dtype='|S5') + + """ + a_arr = numpy.asarray(a) + return _vec_string(a_arr, a_arr.dtype, 'swapcase') + + +@array_function_dispatch(_unary_op_dispatcher) +def title(a): + """ + Return element-wise title cased version of string or unicode. + + Title case words start with uppercase characters, all remaining cased + characters are lowercase. + + Calls `str.title` element-wise. + + For 8-bit strings, this method is locale-dependent. + + Parameters + ---------- + a : array_like, {str, unicode} + Input array. + + Returns + ------- + out : ndarray + Output array of str or unicode, depending on input type + + See Also + -------- + str.title + + Examples + -------- + >>> c=np.array(['a1b c','1b ca','b ca1','ca1b'],'S5'); c + array(['a1b c', '1b ca', 'b ca1', 'ca1b'], + dtype='|S5') + >>> np.char.title(c) + array(['A1B C', '1B Ca', 'B Ca1', 'Ca1B'], + dtype='|S5') + + """ + a_arr = numpy.asarray(a) + return _vec_string(a_arr, a_arr.dtype, 'title') + + +def _translate_dispatcher(a, table, deletechars=None): + return (a,) + + +@array_function_dispatch(_translate_dispatcher) +def translate(a, table, deletechars=None): + """ + For each element in `a`, return a copy of the string where all + characters occurring in the optional argument `deletechars` are + removed, and the remaining characters have been mapped through the + given translation table. + + Calls `str.translate` element-wise. + + Parameters + ---------- + a : array-like of str or unicode + + table : str of length 256 + + deletechars : str + + Returns + ------- + out : ndarray + Output array of str or unicode, depending on input type + + See Also + -------- + str.translate + + """ + a_arr = numpy.asarray(a) + if issubclass(a_arr.dtype.type, str_): + return _vec_string( + a_arr, a_arr.dtype, 'translate', (table,)) + else: + return _vec_string( + a_arr, a_arr.dtype, 'translate', [table] + _clean_args(deletechars)) + + +@array_function_dispatch(_unary_op_dispatcher) +def upper(a): + """ + Return an array with the elements converted to uppercase. + + Calls `str.upper` element-wise. + + For 8-bit strings, this method is locale-dependent. + + Parameters + ---------- + a : array_like, {str, unicode} + Input array. + + Returns + ------- + out : ndarray, {str, unicode} + Output array of str or unicode, depending on input type + + See Also + -------- + str.upper + + Examples + -------- + >>> c = np.array(['a1b c', '1bca', 'bca1']); c + array(['a1b c', '1bca', 'bca1'], dtype='<U5') + >>> np.char.upper(c) + array(['A1B C', '1BCA', 'BCA1'], dtype='<U5') + + """ + a_arr = numpy.asarray(a) + return _vec_string(a_arr, a_arr.dtype, 'upper') + + +def _zfill_dispatcher(a, width): + return (a,) + + +@array_function_dispatch(_zfill_dispatcher) +def zfill(a, width): + """ + Return the numeric string left-filled with zeros + + Calls `str.zfill` element-wise. + + Parameters + ---------- + a : array_like, {str, unicode} + Input array. + width : int + Width of string to left-fill elements in `a`. + + Returns + ------- + out : ndarray, {str, unicode} + Output array of str or unicode, depending on input type + + See Also + -------- + str.zfill + + """ + a_arr = numpy.asarray(a) + width_arr = numpy.asarray(width) + size = int(numpy.max(width_arr.flat)) + return _vec_string( + a_arr, type(a_arr.dtype)(size), 'zfill', (width_arr,)) + + +@array_function_dispatch(_unary_op_dispatcher) +def isnumeric(a): + """ + For each element, return True if there are only numeric + characters in the element. + + Calls `str.isnumeric` element-wise. + + Numeric characters include digit characters, and all characters + that have the Unicode numeric value property, e.g. ``U+2155, + VULGAR FRACTION ONE FIFTH``. + + Parameters + ---------- + a : array_like, unicode + Input array. + + Returns + ------- + out : ndarray, bool + Array of booleans of same shape as `a`. + + See Also + -------- + str.isnumeric + + Examples + -------- + >>> np.char.isnumeric(['123', '123abc', '9.0', '1/4', 'VIII']) + array([ True, False, False, False, False]) + + """ + if not _is_unicode(a): + raise TypeError("isnumeric is only available for Unicode strings and arrays") + return _vec_string(a, bool_, 'isnumeric') + + +@array_function_dispatch(_unary_op_dispatcher) +def isdecimal(a): + """ + For each element, return True if there are only decimal + characters in the element. + + Calls `str.isdecimal` element-wise. + + Decimal characters include digit characters, and all characters + that can be used to form decimal-radix numbers, + e.g. ``U+0660, ARABIC-INDIC DIGIT ZERO``. + + Parameters + ---------- + a : array_like, unicode + Input array. + + Returns + ------- + out : ndarray, bool + Array of booleans identical in shape to `a`. + + See Also + -------- + str.isdecimal + + Examples + -------- + >>> np.char.isdecimal(['12345', '4.99', '123ABC', '']) + array([ True, False, False, False]) + + """ + if not _is_unicode(a): + raise TypeError( + "isdecimal is only available for Unicode strings and arrays") + return _vec_string(a, bool_, 'isdecimal') + + +@set_module('numpy') +class chararray(ndarray): + """ + chararray(shape, itemsize=1, unicode=False, buffer=None, offset=0, + strides=None, order=None) + + Provides a convenient view on arrays of string and unicode values. + + .. note:: + The `chararray` class exists for backwards compatibility with + Numarray, it is not recommended for new development. Starting from numpy + 1.4, if one needs arrays of strings, it is recommended to use arrays of + `dtype` `object_`, `bytes_` or `str_`, and use the free functions + in the `numpy.char` module for fast vectorized string operations. + + Versus a regular NumPy array of type `str` or `unicode`, this + class adds the following functionality: + + 1) values automatically have whitespace removed from the end + when indexed + + 2) comparison operators automatically remove whitespace from the + end when comparing values + + 3) vectorized string operations are provided as methods + (e.g. `.endswith`) and infix operators (e.g. ``"+", "*", "%"``) + + chararrays should be created using `numpy.char.array` or + `numpy.char.asarray`, rather than this constructor directly. + + This constructor creates the array, using `buffer` (with `offset` + and `strides`) if it is not ``None``. If `buffer` is ``None``, then + constructs a new array with `strides` in "C order", unless both + ``len(shape) >= 2`` and ``order='F'``, in which case `strides` + is in "Fortran order". + + Methods + ------- + astype + argsort + copy + count + decode + dump + dumps + encode + endswith + expandtabs + fill + find + flatten + getfield + index + isalnum + isalpha + isdecimal + isdigit + islower + isnumeric + isspace + istitle + isupper + item + join + ljust + lower + lstrip + nonzero + put + ravel + repeat + replace + reshape + resize + rfind + rindex + rjust + rsplit + rstrip + searchsorted + setfield + setflags + sort + split + splitlines + squeeze + startswith + strip + swapaxes + swapcase + take + title + tofile + tolist + tostring + translate + transpose + upper + view + zfill + + Parameters + ---------- + shape : tuple + Shape of the array. + itemsize : int, optional + Length of each array element, in number of characters. Default is 1. + unicode : bool, optional + Are the array elements of type unicode (True) or string (False). + Default is False. + buffer : object exposing the buffer interface or str, optional + Memory address of the start of the array data. Default is None, + in which case a new array is created. + offset : int, optional + Fixed stride displacement from the beginning of an axis? + Default is 0. Needs to be >=0. + strides : array_like of ints, optional + Strides for the array (see `ndarray.strides` for full description). + Default is None. + order : {'C', 'F'}, optional + The order in which the array data is stored in memory: 'C' -> + "row major" order (the default), 'F' -> "column major" + (Fortran) order. + + Examples + -------- + >>> charar = np.chararray((3, 3)) + >>> charar[:] = 'a' + >>> charar + chararray([[b'a', b'a', b'a'], + [b'a', b'a', b'a'], + [b'a', b'a', b'a']], dtype='|S1') + + >>> charar = np.chararray(charar.shape, itemsize=5) + >>> charar[:] = 'abc' + >>> charar + chararray([[b'abc', b'abc', b'abc'], + [b'abc', b'abc', b'abc'], + [b'abc', b'abc', b'abc']], dtype='|S5') + + """ + def __new__(subtype, shape, itemsize=1, unicode=False, buffer=None, + offset=0, strides=None, order='C'): + global _globalvar + + if unicode: + dtype = str_ + else: + dtype = bytes_ + + # force itemsize to be a Python int, since using NumPy integer + # types results in itemsize.itemsize being used as the size of + # strings in the new array. + itemsize = int(itemsize) + + if isinstance(buffer, str): + # unicode objects do not have the buffer interface + filler = buffer + buffer = None + else: + filler = None + + _globalvar = 1 + if buffer is None: + self = ndarray.__new__(subtype, shape, (dtype, itemsize), + order=order) + else: + self = ndarray.__new__(subtype, shape, (dtype, itemsize), + buffer=buffer, + offset=offset, strides=strides, + order=order) + if filler is not None: + self[...] = filler + _globalvar = 0 + return self + + def __array_finalize__(self, obj): + # The b is a special case because it is used for reconstructing. + if not _globalvar and self.dtype.char not in 'SUbc': + raise ValueError("Can only create a chararray from string data.") + + def __getitem__(self, obj): + val = ndarray.__getitem__(self, obj) + + if isinstance(val, character): + temp = val.rstrip() + if len(temp) == 0: + val = '' + else: + val = temp + + return val + + # IMPLEMENTATION NOTE: Most of the methods of this class are + # direct delegations to the free functions in this module. + # However, those that return an array of strings should instead + # return a chararray, so some extra wrapping is required. + + def __eq__(self, other): + """ + Return (self == other) element-wise. + + See Also + -------- + equal + """ + return equal(self, other) + + def __ne__(self, other): + """ + Return (self != other) element-wise. + + See Also + -------- + not_equal + """ + return not_equal(self, other) + + def __ge__(self, other): + """ + Return (self >= other) element-wise. + + See Also + -------- + greater_equal + """ + return greater_equal(self, other) + + def __le__(self, other): + """ + Return (self <= other) element-wise. + + See Also + -------- + less_equal + """ + return less_equal(self, other) + + def __gt__(self, other): + """ + Return (self > other) element-wise. + + See Also + -------- + greater + """ + return greater(self, other) + + def __lt__(self, other): + """ + Return (self < other) element-wise. + + See Also + -------- + less + """ + return less(self, other) + + def __add__(self, other): + """ + Return (self + other), that is string concatenation, + element-wise for a pair of array_likes of str or unicode. + + See Also + -------- + add + """ + return asarray(add(self, other)) + + def __radd__(self, other): + """ + Return (other + self), that is string concatenation, + element-wise for a pair of array_likes of `bytes_` or `str_`. + + See Also + -------- + add + """ + return asarray(add(numpy.asarray(other), self)) + + def __mul__(self, i): + """ + Return (self * i), that is string multiple concatenation, + element-wise. + + See Also + -------- + multiply + """ + return asarray(multiply(self, i)) + + def __rmul__(self, i): + """ + Return (self * i), that is string multiple concatenation, + element-wise. + + See Also + -------- + multiply + """ + return asarray(multiply(self, i)) + + def __mod__(self, i): + """ + Return (self % i), that is pre-Python 2.6 string formatting + (interpolation), element-wise for a pair of array_likes of `bytes_` + or `str_`. + + See Also + -------- + mod + """ + return asarray(mod(self, i)) + + def __rmod__(self, other): + return NotImplemented + + def argsort(self, axis=-1, kind=None, order=None): + """ + Return the indices that sort the array lexicographically. + + For full documentation see `numpy.argsort`, for which this method is + in fact merely a "thin wrapper." + + Examples + -------- + >>> c = np.array(['a1b c', '1b ca', 'b ca1', 'Ca1b'], 'S5') + >>> c = c.view(np.chararray); c + chararray(['a1b c', '1b ca', 'b ca1', 'Ca1b'], + dtype='|S5') + >>> c[c.argsort()] + chararray(['1b ca', 'Ca1b', 'a1b c', 'b ca1'], + dtype='|S5') + + """ + return self.__array__().argsort(axis, kind, order) + argsort.__doc__ = ndarray.argsort.__doc__ + + def capitalize(self): + """ + Return a copy of `self` with only the first character of each element + capitalized. + + See Also + -------- + char.capitalize + + """ + return asarray(capitalize(self)) + + def center(self, width, fillchar=' '): + """ + Return a copy of `self` with its elements centered in a + string of length `width`. + + See Also + -------- + center + """ + return asarray(center(self, width, fillchar)) + + def count(self, sub, start=0, end=None): + """ + Returns an array with the number of non-overlapping occurrences of + substring `sub` in the range [`start`, `end`]. + + See Also + -------- + char.count + + """ + return count(self, sub, start, end) + + def decode(self, encoding=None, errors=None): + """ + Calls ``bytes.decode`` element-wise. + + See Also + -------- + char.decode + + """ + return decode(self, encoding, errors) + + def encode(self, encoding=None, errors=None): + """ + Calls `str.encode` element-wise. + + See Also + -------- + char.encode + + """ + return encode(self, encoding, errors) + + def endswith(self, suffix, start=0, end=None): + """ + Returns a boolean array which is `True` where the string element + in `self` ends with `suffix`, otherwise `False`. + + See Also + -------- + char.endswith + + """ + return endswith(self, suffix, start, end) + + def expandtabs(self, tabsize=8): + """ + Return a copy of each string element where all tab characters are + replaced by one or more spaces. + + See Also + -------- + char.expandtabs + + """ + return asarray(expandtabs(self, tabsize)) + + def find(self, sub, start=0, end=None): + """ + For each element, return the lowest index in the string where + substring `sub` is found. + + See Also + -------- + char.find + + """ + return find(self, sub, start, end) + + def index(self, sub, start=0, end=None): + """ + Like `find`, but raises `ValueError` when the substring is not found. + + See Also + -------- + char.index + + """ + return index(self, sub, start, end) + + def isalnum(self): + """ + Returns true for each element if all characters in the string + are alphanumeric and there is at least one character, false + otherwise. + + See Also + -------- + char.isalnum + + """ + return isalnum(self) + + def isalpha(self): + """ + Returns true for each element if all characters in the string + are alphabetic and there is at least one character, false + otherwise. + + See Also + -------- + char.isalpha + + """ + return isalpha(self) + + def isdigit(self): + """ + Returns true for each element if all characters in the string are + digits and there is at least one character, false otherwise. + + See Also + -------- + char.isdigit + + """ + return isdigit(self) + + def islower(self): + """ + Returns true for each element if all cased characters in the + string are lowercase and there is at least one cased character, + false otherwise. + + See Also + -------- + char.islower + + """ + return islower(self) + + def isspace(self): + """ + Returns true for each element if there are only whitespace + characters in the string and there is at least one character, + false otherwise. + + See Also + -------- + char.isspace + + """ + return isspace(self) + + def istitle(self): + """ + Returns true for each element if the element is a titlecased + string and there is at least one character, false otherwise. + + See Also + -------- + char.istitle + + """ + return istitle(self) + + def isupper(self): + """ + Returns true for each element if all cased characters in the + string are uppercase and there is at least one character, false + otherwise. + + See Also + -------- + char.isupper + + """ + return isupper(self) + + def join(self, seq): + """ + Return a string which is the concatenation of the strings in the + sequence `seq`. + + See Also + -------- + char.join + + """ + return join(self, seq) + + def ljust(self, width, fillchar=' '): + """ + Return an array with the elements of `self` left-justified in a + string of length `width`. + + See Also + -------- + char.ljust + + """ + return asarray(ljust(self, width, fillchar)) + + def lower(self): + """ + Return an array with the elements of `self` converted to + lowercase. + + See Also + -------- + char.lower + + """ + return asarray(lower(self)) + + def lstrip(self, chars=None): + """ + For each element in `self`, return a copy with the leading characters + removed. + + See Also + -------- + char.lstrip + + """ + return asarray(lstrip(self, chars)) + + def partition(self, sep): + """ + Partition each element in `self` around `sep`. + + See Also + -------- + partition + """ + return asarray(partition(self, sep)) + + def replace(self, old, new, count=None): + """ + For each element in `self`, return a copy of the string with all + occurrences of substring `old` replaced by `new`. + + See Also + -------- + char.replace + + """ + return asarray(replace(self, old, new, count)) + + def rfind(self, sub, start=0, end=None): + """ + For each element in `self`, return the highest index in the string + where substring `sub` is found, such that `sub` is contained + within [`start`, `end`]. + + See Also + -------- + char.rfind + + """ + return rfind(self, sub, start, end) + + def rindex(self, sub, start=0, end=None): + """ + Like `rfind`, but raises `ValueError` when the substring `sub` is + not found. + + See Also + -------- + char.rindex + + """ + return rindex(self, sub, start, end) + + def rjust(self, width, fillchar=' '): + """ + Return an array with the elements of `self` + right-justified in a string of length `width`. + + See Also + -------- + char.rjust + + """ + return asarray(rjust(self, width, fillchar)) + + def rpartition(self, sep): + """ + Partition each element in `self` around `sep`. + + See Also + -------- + rpartition + """ + return asarray(rpartition(self, sep)) + + def rsplit(self, sep=None, maxsplit=None): + """ + For each element in `self`, return a list of the words in + the string, using `sep` as the delimiter string. + + See Also + -------- + char.rsplit + + """ + return rsplit(self, sep, maxsplit) + + def rstrip(self, chars=None): + """ + For each element in `self`, return a copy with the trailing + characters removed. + + See Also + -------- + char.rstrip + + """ + return asarray(rstrip(self, chars)) + + def split(self, sep=None, maxsplit=None): + """ + For each element in `self`, return a list of the words in the + string, using `sep` as the delimiter string. + + See Also + -------- + char.split + + """ + return split(self, sep, maxsplit) + + def splitlines(self, keepends=None): + """ + For each element in `self`, return a list of the lines in the + element, breaking at line boundaries. + + See Also + -------- + char.splitlines + + """ + return splitlines(self, keepends) + + def startswith(self, prefix, start=0, end=None): + """ + Returns a boolean array which is `True` where the string element + in `self` starts with `prefix`, otherwise `False`. + + See Also + -------- + char.startswith + + """ + return startswith(self, prefix, start, end) + + def strip(self, chars=None): + """ + For each element in `self`, return a copy with the leading and + trailing characters removed. + + See Also + -------- + char.strip + + """ + return asarray(strip(self, chars)) + + def swapcase(self): + """ + For each element in `self`, return a copy of the string with + uppercase characters converted to lowercase and vice versa. + + See Also + -------- + char.swapcase + + """ + return asarray(swapcase(self)) + + def title(self): + """ + For each element in `self`, return a titlecased version of the + string: words start with uppercase characters, all remaining cased + characters are lowercase. + + See Also + -------- + char.title + + """ + return asarray(title(self)) + + def translate(self, table, deletechars=None): + """ + For each element in `self`, return a copy of the string where + all characters occurring in the optional argument + `deletechars` are removed, and the remaining characters have + been mapped through the given translation table. + + See Also + -------- + char.translate + + """ + return asarray(translate(self, table, deletechars)) + + def upper(self): + """ + Return an array with the elements of `self` converted to + uppercase. + + See Also + -------- + char.upper + + """ + return asarray(upper(self)) + + def zfill(self, width): + """ + Return the numeric string left-filled with zeros in a string of + length `width`. + + See Also + -------- + char.zfill + + """ + return asarray(zfill(self, width)) + + def isnumeric(self): + """ + For each element in `self`, return True if there are only + numeric characters in the element. + + See Also + -------- + char.isnumeric + + """ + return isnumeric(self) + + def isdecimal(self): + """ + For each element in `self`, return True if there are only + decimal characters in the element. + + See Also + -------- + char.isdecimal + + """ + return isdecimal(self) + + +@set_module("numpy.char") +def array(obj, itemsize=None, copy=True, unicode=None, order=None): + """ + Create a `chararray`. + + .. note:: + This class is provided for numarray backward-compatibility. + New code (not concerned with numarray compatibility) should use + arrays of type `bytes_` or `str_` and use the free functions + in :mod:`numpy.char <numpy.core.defchararray>` for fast + vectorized string operations instead. + + Versus a regular NumPy array of type `str` or `unicode`, this + class adds the following functionality: + + 1) values automatically have whitespace removed from the end + when indexed + + 2) comparison operators automatically remove whitespace from the + end when comparing values + + 3) vectorized string operations are provided as methods + (e.g. `str.endswith`) and infix operators (e.g. ``+, *, %``) + + Parameters + ---------- + obj : array of str or unicode-like + + itemsize : int, optional + `itemsize` is the number of characters per scalar in the + resulting array. If `itemsize` is None, and `obj` is an + object array or a Python list, the `itemsize` will be + automatically determined. If `itemsize` is provided and `obj` + is of type str or unicode, then the `obj` string will be + chunked into `itemsize` pieces. + + copy : bool, optional + If true (default), then the object is copied. Otherwise, a copy + will only be made if __array__ returns a copy, if obj is a + nested sequence, or if a copy is needed to satisfy any of the other + requirements (`itemsize`, unicode, `order`, etc.). + + unicode : bool, optional + When true, the resulting `chararray` can contain Unicode + characters, when false only 8-bit characters. If unicode is + None and `obj` is one of the following: + + - a `chararray`, + - an ndarray of type `str` or `unicode` + - a Python str or unicode object, + + then the unicode setting of the output array will be + automatically determined. + + order : {'C', 'F', 'A'}, optional + Specify the order of the array. If order is 'C' (default), then the + array will be in C-contiguous order (last-index varies the + fastest). If order is 'F', then the returned array + will be in Fortran-contiguous order (first-index varies the + fastest). If order is 'A', then the returned array may + be in any order (either C-, Fortran-contiguous, or even + discontiguous). + """ + if isinstance(obj, (bytes, str)): + if unicode is None: + if isinstance(obj, str): + unicode = True + else: + unicode = False + + if itemsize is None: + itemsize = len(obj) + shape = len(obj) // itemsize + + return chararray(shape, itemsize=itemsize, unicode=unicode, + buffer=obj, order=order) + + if isinstance(obj, (list, tuple)): + obj = numpy.asarray(obj) + + if isinstance(obj, ndarray) and issubclass(obj.dtype.type, character): + # If we just have a vanilla chararray, create a chararray + # view around it. + if not isinstance(obj, chararray): + obj = obj.view(chararray) + + if itemsize is None: + itemsize = obj.itemsize + # itemsize is in 8-bit chars, so for Unicode, we need + # to divide by the size of a single Unicode character, + # which for NumPy is always 4 + if issubclass(obj.dtype.type, str_): + itemsize //= 4 + + if unicode is None: + if issubclass(obj.dtype.type, str_): + unicode = True + else: + unicode = False + + if unicode: + dtype = str_ + else: + dtype = bytes_ + + if order is not None: + obj = numpy.asarray(obj, order=order) + if (copy or + (itemsize != obj.itemsize) or + (not unicode and isinstance(obj, str_)) or + (unicode and isinstance(obj, bytes_))): + obj = obj.astype((dtype, int(itemsize))) + return obj + + if isinstance(obj, ndarray) and issubclass(obj.dtype.type, object): + if itemsize is None: + # Since no itemsize was specified, convert the input array to + # a list so the ndarray constructor will automatically + # determine the itemsize for us. + obj = obj.tolist() + # Fall through to the default case + + if unicode: + dtype = str_ + else: + dtype = bytes_ + + if itemsize is None: + val = narray(obj, dtype=dtype, order=order, subok=True) + else: + val = narray(obj, dtype=(dtype, itemsize), order=order, subok=True) + return val.view(chararray) + + +@set_module("numpy.char") +def asarray(obj, itemsize=None, unicode=None, order=None): + """ + Convert the input to a `chararray`, copying the data only if + necessary. + + Versus a regular NumPy array of type `str` or `unicode`, this + class adds the following functionality: + + 1) values automatically have whitespace removed from the end + when indexed + + 2) comparison operators automatically remove whitespace from the + end when comparing values + + 3) vectorized string operations are provided as methods + (e.g. `str.endswith`) and infix operators (e.g. ``+``, ``*``,``%``) + + Parameters + ---------- + obj : array of str or unicode-like + + itemsize : int, optional + `itemsize` is the number of characters per scalar in the + resulting array. If `itemsize` is None, and `obj` is an + object array or a Python list, the `itemsize` will be + automatically determined. If `itemsize` is provided and `obj` + is of type str or unicode, then the `obj` string will be + chunked into `itemsize` pieces. + + unicode : bool, optional + When true, the resulting `chararray` can contain Unicode + characters, when false only 8-bit characters. If unicode is + None and `obj` is one of the following: + + - a `chararray`, + - an ndarray of type `str` or 'unicode` + - a Python str or unicode object, + + then the unicode setting of the output array will be + automatically determined. + + order : {'C', 'F'}, optional + Specify the order of the array. If order is 'C' (default), then the + array will be in C-contiguous order (last-index varies the + fastest). If order is 'F', then the returned array + will be in Fortran-contiguous order (first-index varies the + fastest). + """ + return array(obj, itemsize, copy=False, + unicode=unicode, order=order) |