From 2ca47dc4e8422e5b0bc9290d1ec371199829432b Mon Sep 17 00:00:00 2001
From: Denis <denis.smirnov@intel.com>
Date: Tue, 12 Nov 2019 10:57:21 +0300
Subject: [PATCH 01/68] Implement str.capitalize() based on CPython

---
 docs/source/reference/pysupported.rst |  1 +
 numba/tests/test_unicode.py           | 17 +++++++++
 numba/unicode.py                      | 50 +++++++++++++++++++++++++++
 3 files changed, 68 insertions(+)

diff --git a/docs/source/reference/pysupported.rst b/docs/source/reference/pysupported.rst
index f19d3da7a64..183e6bf2196 100644
--- a/docs/source/reference/pysupported.rst
+++ b/docs/source/reference/pysupported.rst
@@ -180,6 +180,7 @@ The following functions, attributes and methods are currently supported:
 * ``.lstrip()``
 * ``.rstrip()``
 * ``.strip()``
+* ``.capitalize()``
 * ``.isupper()``
 * ``.upper()``
 * ``.islower()``
diff --git a/numba/tests/test_unicode.py b/numba/tests/test_unicode.py
index c9f49623dbd..4d70872a684 100644
--- a/numba/tests/test_unicode.py
+++ b/numba/tests/test_unicode.py
@@ -1248,6 +1248,23 @@ def pyfunc(x):
             self.assertEqual(pyfunc(*args), cfunc(*args),
                              msg='failed on {}'.format(args))
 
+    def test_capitalize(self):
+        def pyfunc(x):
+            return x.capitalize()
+
+        cfunc = njit(pyfunc)
+        # Samples taken from CPython testing:
+        # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Lib/test/test_unicode.py#L800-L815    # noqa: E501
+        cpython = ['\U0001044F', '\U0001044F\U0001044F', '\U00010427\U0001044F',
+                   '\U0001044F\U00010427', 'X\U00010427x\U0001044F', 'h\u0130',
+                   '\u1fd2\u0130', 'ﬁnnish', 'A\u0345\u03a3']
+        # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Lib/test/test_unicode.py#L926    # noqa: E501
+        cpython_extras = ['\U00010000\U00100000']
+
+        msg = 'Results of "{}".capitalize() must be equal'
+        for s in UNICODE_EXAMPLES + [''] + cpython + cpython_extras:
+            self.assertEqual(pyfunc(s), cfunc(s), msg=msg.format(s))
+
     def test_isupper(self):
         def pyfunc(x):
             return x.isupper()
diff --git a/numba/unicode.py b/numba/unicode.py
index 673b437ec5d..d7d6ba0689d 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -1266,6 +1266,56 @@ def impl(a):
             return len(a) == 0
         return impl
 
+# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9737-L9759    # noqa: E501
+@register_jitable
+def _do_capitalize(data, length, res, maxchars):
+    """This is a translation of the function to capitalize a unicode string."""
+    k = 0
+    mapped = np.zeros(3, dtype=_Py_UCS4)
+
+    code_point = _get_code_point(data, 0)
+    n_res = _PyUnicode_ToUpperFull(code_point, mapped)
+    for m in mapped[:n_res]:
+        maxchar = maxchars[0]
+        maxchars[0] = max(maxchar, m)
+        _set_code_point(res, k, m)
+        k += 1
+
+    for idx in range(1, length):
+        code_point = _get_code_point(data, idx)
+        n_res = _lower_ucs4(code_point, data, length, idx, mapped)
+        for m in mapped[:n_res]:
+            maxchar = maxchars[0]
+            maxchars[0] = max(maxchar, m)
+            _set_code_point(res, k, m)
+            k += 1
+
+    return k
+
+
+# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L10765-L10774    # noqa: E501
+@overload_method(types.UnicodeType, 'capitalize')
+def unicode_capitalize(data):
+    """Implements str.capitalize()"""
+    def impl(data):
+        length = len(data)
+        if length == 0:
+            return _empty_string(data._kind, length, data._is_ascii)
+
+        tmp = _empty_string(PY_UNICODE_4BYTE_KIND, 3 * length, data._is_ascii)
+        # maxchar should be inside of a list to be pass as argument by reference
+        maxchars = [0]
+        newlength = _do_capitalize(data, length, tmp, maxchars)
+        maxchar = maxchars[0]
+        newkind = _codepoint_to_kind(maxchar)
+        res = _empty_string(newkind, newlength, _codepoint_is_ascii(maxchar))
+        for i in range(newlength):
+            _set_code_point(res, i, _get_code_point(tmp, i))
+
+        return res
+
+    return impl
+
 
 def _is_upper(is_lower, is_upper, is_title):
     # impl is an approximate translation of:

From 3a4d6d07b82f496e38314a9f91da6b6fbabc7290 Mon Sep 17 00:00:00 2001
From: Denis <denis.smirnov@intel.com>
Date: Tue, 12 Nov 2019 12:57:20 +0300
Subject: [PATCH 02/68] Implement str.casefold() based on CPython

---
 docs/source/reference/pysupported.rst |  1 +
 numba/tests/test_unicode.py           | 15 +++++++++++
 numba/unicode.py                      | 39 ++++++++++++++++++++++++++-
 numba/unicode_support.py              | 12 +++++++--
 4 files changed, 64 insertions(+), 3 deletions(-)

diff --git a/docs/source/reference/pysupported.rst b/docs/source/reference/pysupported.rst
index f19d3da7a64..c7f14d85029 100644
--- a/docs/source/reference/pysupported.rst
+++ b/docs/source/reference/pysupported.rst
@@ -169,6 +169,7 @@ The following functions, attributes and methods are currently supported:
 * ``*`` (repetition of strings)
 * ``in``, ``.contains()``
 * ``==``, ``<``, ``<=``, ``>``, ``>=`` (comparison)
+* ``.casefold()``
 * ``.startswith()``
 * ``.endswith()``
 * ``.find()``
diff --git a/numba/tests/test_unicode.py b/numba/tests/test_unicode.py
index c9f49623dbd..eb620198466 100644
--- a/numba/tests/test_unicode.py
+++ b/numba/tests/test_unicode.py
@@ -1277,6 +1277,21 @@ def pyfunc(x):
             self.assertEqual(pyfunc(*args), cfunc(*args),
                              msg='failed on {}'.format(args))
 
+    def test_casefold(self):
+        def pyfunc(x):
+            return x.casefold()
+
+        cfunc = njit(pyfunc)
+        # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Lib/test/test_unicode.py#L774-L781    # noqa: E501
+        cpython = ['hello', 'hELlo', 'ß', 'ﬁ', '\u03a3',
+                   'A\u0345\u03a3', '\u00b5']
+        # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Lib/test/test_unicode.py#L924    # noqa: E501
+        cpython_extras = ['\U00010000\U00100000']
+
+        msg = 'Results of "{}".casefold() must be equal'
+        for s in UNICODE_EXAMPLES + [''] + cpython + cpython_extras:
+            self.assertEqual(pyfunc(s), cfunc(s), msg=msg.format(s))
+
     def test_title(self):
         pyfunc = title
         cfunc = njit(pyfunc)
diff --git a/numba/unicode.py b/numba/unicode.py
index 673b437ec5d..672fdfd0824 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -33,7 +33,7 @@
 from numba.errors import TypingError
 from .unicode_support import (_Py_TOUPPER, _Py_TOLOWER, _Py_UCS4,
                               _PyUnicode_ToUpperFull, _PyUnicode_ToLowerFull,
-                              _PyUnicode_ToTitleFull,
+                              _PyUnicode_ToFoldedFull, _PyUnicode_ToTitleFull,
                               _PyUnicode_IsCased, _PyUnicode_IsCaseIgnorable,
                               _PyUnicode_IsUppercase, _PyUnicode_IsLowercase,
                               _PyUnicode_IsTitlecase, _Py_ISLOWER, _Py_ISUPPER)
@@ -1361,6 +1361,43 @@ def impl(a):
     return impl
 
 
+# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9819-L9834    # noqa: E501
+@overload_method(types.UnicodeType, 'casefold')
+def unicode_casefold(data):
+    """Implements str.casefold()"""
+    def impl(data):
+        # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L10782-L10791    # noqa: E501
+        def _do_casefold(data, length, res, maxchars):
+            """Translation of the function to case fold a unicode string."""
+            k = 0
+            mapped = np.zeros(3, dtype=_Py_UCS4)
+            for idx in range(length):
+                mapped.fill(0)
+                code_point = _get_code_point(data, idx)
+                n_res = _PyUnicode_ToFoldedFull(code_point, mapped)
+                for m in mapped[:n_res]:
+                    maxchar = maxchars[0]
+                    maxchars[0] = max(maxchar, m)
+                    _set_code_point(res, k, m)
+                    k += 1
+            return k
+
+        length = len(data)
+        tmp = _empty_string(PY_UNICODE_4BYTE_KIND, 3 * length, data._is_ascii)
+        # maxchar should be inside of a list to be pass as argument by reference
+        maxchars = [0]
+        newlength = _do_casefold(data, length, tmp, maxchars)
+        maxchar = maxchars[0]
+        newkind = _codepoint_to_kind(maxchar)
+        res = _empty_string(newkind, newlength, _codepoint_is_ascii(maxchar))
+        for i in range(newlength):
+            _set_code_point(res, i, _get_code_point(tmp, i))
+
+        return res
+
+    return impl
+
+
 @overload_method(types.UnicodeType, 'istitle')
 def unicode_istitle(s):
     """
diff --git a/numba/unicode_support.py b/numba/unicode_support.py
index 021f36a8ec8..a8a88b268a9 100644
--- a/numba/unicode_support.py
+++ b/numba/unicode_support.py
@@ -292,10 +292,18 @@ def _PyUnicode_ToUpperFull(ch, res):
     return 1
 
 
+# From: https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodectype.c#L259-L272    # noqa: E501
 @register_jitable
 def _PyUnicode_ToFoldedFull(ch, res):
-    raise NotImplementedError
-
+    ctype = _PyUnicode_gettyperecord(ch)
+    extended_case_mask = _PyUnicode_TyperecordMasks.EXTENDED_CASE_MASK
+    if ctype.flags & extended_case_mask and (ctype.lower >> 20) & 7:
+        index = (ctype.lower & 0xFFFF) + (ctype.lower >> 24)
+        n = (ctype.lower >> 20) & 7
+        for i in range(n):
+            res[i] = _PyUnicode_ExtendedCase(index + i)
+        return n
+    return _PyUnicode_ToLowerFull(ch, res)
 
 # From: https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodectype.c#L274-L279    # noqa: E501
 @register_jitable

From 8a2079b11af68ff0c8389a4792a2111417d932b9 Mon Sep 17 00:00:00 2001
From: Denis <denis.smirnov@intel.com>
Date: Tue, 12 Nov 2019 13:02:21 +0300
Subject: [PATCH 03/68] Move _do_capitalize into overload

---
 numba/unicode.py | 48 ++++++++++++++++++++++--------------------------
 1 file changed, 22 insertions(+), 26 deletions(-)

diff --git a/numba/unicode.py b/numba/unicode.py
index d7d6ba0689d..e83ef9988e6 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -1266,38 +1266,34 @@ def impl(a):
             return len(a) == 0
         return impl
 
-# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9737-L9759    # noqa: E501
-@register_jitable
-def _do_capitalize(data, length, res, maxchars):
-    """This is a translation of the function to capitalize a unicode string."""
-    k = 0
-    mapped = np.zeros(3, dtype=_Py_UCS4)
-
-    code_point = _get_code_point(data, 0)
-    n_res = _PyUnicode_ToUpperFull(code_point, mapped)
-    for m in mapped[:n_res]:
-        maxchar = maxchars[0]
-        maxchars[0] = max(maxchar, m)
-        _set_code_point(res, k, m)
-        k += 1
-
-    for idx in range(1, length):
-        code_point = _get_code_point(data, idx)
-        n_res = _lower_ucs4(code_point, data, length, idx, mapped)
-        for m in mapped[:n_res]:
-            maxchar = maxchars[0]
-            maxchars[0] = max(maxchar, m)
-            _set_code_point(res, k, m)
-            k += 1
-
-    return k
-
 
 # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L10765-L10774    # noqa: E501
 @overload_method(types.UnicodeType, 'capitalize')
 def unicode_capitalize(data):
     """Implements str.capitalize()"""
     def impl(data):
+        # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9737-L9759    # noqa: E501
+        def _do_capitalize(data, length, res, maxchars):
+            """Translation of the function to capitalize a unicode string."""
+            k = 0
+            mapped = np.zeros(3, dtype=_Py_UCS4)
+            code_point = _get_code_point(data, 0)
+            n_res = _PyUnicode_ToUpperFull(code_point, mapped)
+            for m in mapped[:n_res]:
+                maxchar = maxchars[0]
+                maxchars[0] = max(maxchar, m)
+                _set_code_point(res, k, m)
+                k += 1
+            for idx in range(1, length):
+                code_point = _get_code_point(data, idx)
+                n_res = _lower_ucs4(code_point, data, length, idx, mapped)
+                for m in mapped[:n_res]:
+                    maxchar = maxchars[0]
+                    maxchars[0] = max(maxchar, m)
+                    _set_code_point(res, k, m)
+                    k += 1
+            return k
+
         length = len(data)
         if length == 0:
             return _empty_string(data._kind, length, data._is_ascii)

From 82e20bc4c4f26c56cd8ed27f485e38e8250286c0 Mon Sep 17 00:00:00 2001
From: Denis <denis.smirnov@intel.com>
Date: Tue, 12 Nov 2019 13:47:43 +0300
Subject: [PATCH 04/68] Implement str.swapcase() based on CPython

---
 docs/source/reference/pysupported.rst |  1 +
 numba/tests/test_unicode.py           | 19 ++++++++++++
 numba/unicode.py                      | 43 +++++++++++++++++++++++++++
 3 files changed, 63 insertions(+)

diff --git a/docs/source/reference/pysupported.rst b/docs/source/reference/pysupported.rst
index f19d3da7a64..12965a171df 100644
--- a/docs/source/reference/pysupported.rst
+++ b/docs/source/reference/pysupported.rst
@@ -188,6 +188,7 @@ The following functions, attributes and methods are currently supported:
 * ``.count()``
 * ``.istitle()``
 * ``.rfind()``
+* ``.swapcase()``
 * ``.title()``
 
 Additional operations as well as support for Python 2 strings / Python 3 bytes
diff --git a/numba/tests/test_unicode.py b/numba/tests/test_unicode.py
index c9f49623dbd..efa9798d367 100644
--- a/numba/tests/test_unicode.py
+++ b/numba/tests/test_unicode.py
@@ -1293,6 +1293,25 @@ def test_title(self):
         for s in UNICODE_EXAMPLES + [''] + cpython:
             self.assertEqual(pyfunc(s), cfunc(s), msg=msg.format(s))
 
+    def test_swapcase(self):
+        def pyfunc(x):
+            return x.swapcase()
+
+        cfunc = njit(pyfunc)
+        # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Lib/test/test_unicode.py#L834-L858    # noqa: E501
+        cpython = ['\U0001044F', '\U00010427', '\U0001044F\U0001044F',
+                   '\U00010427\U0001044F', '\U0001044F\U00010427',
+                   'X\U00010427x\U0001044F', 'ﬁ', '\u0130', '\u03a3',
+                   '\u0345\u03a3', 'A\u0345\u03a3', 'A\u0345\u03a3a',
+                   'A\u0345\u03a3', 'A\u03a3\u0345', '\u03a3\u0345 ',
+                   '\u03a3', 'ß', '\u1fd2']
+        # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Lib/test/test_unicode.py#L928    # noqa: E501
+        cpython_extras = ['\U00010000\U00100000']
+
+        msg = 'Results of "{}".swapcase() must be equal'
+        for s in UNICODE_EXAMPLES + [''] + cpython + cpython_extras:
+            self.assertEqual(pyfunc(s), cfunc(s), msg=msg.format(s))
+
     def test_islower(self):
         pyfunc = islower_usecase
         cfunc = njit(pyfunc)
diff --git a/numba/unicode.py b/numba/unicode.py
index 673b437ec5d..9089b3f232b 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -1493,6 +1493,49 @@ def impl(data):
     return impl
 
 
+# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L13140-L13147    # noqa: E501
+@overload_method(types.UnicodeType, 'swapcase')
+def unicode_swapcase(data):
+    """Implements str.swapcase()"""
+    def impl(data):
+        # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9761-L9784    # noqa: E501
+        def _do_swapcase(data, length, res, maxchars):
+            """Translation of the function to swap cases of a unicode string."""
+            k = 0
+            mapped = np.zeros(3, dtype=_Py_UCS4)
+            for idx in range(length):
+                mapped.fill(0)
+                code_point = _get_code_point(data, idx)
+                if _PyUnicode_IsUppercase(code_point):
+                    n_res = _lower_ucs4(code_point, data, length, idx, mapped)
+                elif _PyUnicode_IsLowercase(code_point):
+                    n_res = _PyUnicode_ToUpperFull(code_point, mapped)
+                else:
+                    n_res = 1
+                    mapped[0] = code_point
+                for m in mapped[:n_res]:
+                    maxchar = maxchars[0]
+                    maxchars[0] = max(maxchar, m)
+                    _set_code_point(res, k, m)
+                    k += 1
+            return k
+
+        length = len(data)
+        tmp = _empty_string(PY_UNICODE_4BYTE_KIND, 3 * length, data._is_ascii)
+        # maxchar should be inside of a list to be pass as argument by reference
+        maxchars = [0]
+        newlength = _do_swapcase(data, length, tmp, maxchars)
+        maxchar = maxchars[0]
+        newkind = _codepoint_to_kind(maxchar)
+        res = _empty_string(newkind, newlength, _codepoint_is_ascii(maxchar))
+        for i in range(newlength):
+            _set_code_point(res, i, _get_code_point(tmp, i))
+
+        return res
+
+    return impl
+
+
 # https://github.com/python/cpython/blob/201c8f79450628241574fba940e08107178dc3a5/Objects/unicodeobject.c#L9946-L9965    # noqa: E501
 @register_jitable
 def _do_upper_or_lower(data, length, res, maxchars, lower):

From 7602c6351a760647ca4fd6e6196702ac150d3e8b Mon Sep 17 00:00:00 2001
From: Denis <denis.smirnov@intel.com>
Date: Wed, 13 Nov 2019 09:17:08 +0300
Subject: [PATCH 05/68] Implement str.rsplit() based on CPython

---
 docs/source/reference/pysupported.rst |   1 +
 numba/tests/test_unicode.py           | 113 ++++++++++++++++++++++++
 numba/unicode.py                      | 118 +++++++++++++++++++++++++-
 numba/unicode_support.py              |   6 ++
 4 files changed, 237 insertions(+), 1 deletion(-)

diff --git a/docs/source/reference/pysupported.rst b/docs/source/reference/pysupported.rst
index f19d3da7a64..f8000ed3fe6 100644
--- a/docs/source/reference/pysupported.rst
+++ b/docs/source/reference/pysupported.rst
@@ -176,6 +176,7 @@ The following functions, attributes and methods are currently supported:
 * ``.ljust()``
 * ``.rjust()``
 * ``.split()``
+* ``.rsplit()``
 * ``.join()``
 * ``.lstrip()``
 * ``.rstrip()``
diff --git a/numba/tests/test_unicode.py b/numba/tests/test_unicode.py
index c9f49623dbd..195858b7025 100644
--- a/numba/tests/test_unicode.py
+++ b/numba/tests/test_unicode.py
@@ -130,6 +130,22 @@ def split_whitespace_usecase(x):
     return x.split()
 
 
+def rsplit_usecase(s, sep):
+    return s.rsplit(sep)
+
+
+def rsplit_with_maxsplit_usecase(s, sep, maxsplit):
+    return s.rsplit(sep, maxsplit)
+
+
+def rsplit_with_maxsplit_kwarg_usecase(s, sep, maxsplit):
+    return s.rsplit(sep, maxsplit=maxsplit)
+
+
+def rsplit_whitespace_usecase(s):
+    return s.rsplit()
+
+
 def lstrip_usecase(x):
     return x.lstrip()
 
@@ -864,6 +880,103 @@ def test_split_whitespace(self):
                              cfunc(test_str),
                              "'%s'.split()?" % (test_str,))
 
+    def test_rsplit_exception_empty_sep(self):
+        self.disable_leak_check()
+
+        pyfunc = rsplit_usecase
+        cfunc = njit(pyfunc)
+
+        # Handle empty separator exception
+        for func in [pyfunc, cfunc]:
+            with self.assertRaises(ValueError) as raises:
+                func('a', '')
+            self.assertIn('empty separator', str(raises.exception))
+
+    def test_rsplit_exception_noninteger_maxsplit(self):
+        pyfunc = rsplit_with_maxsplit_usecase
+        cfunc = njit(pyfunc)
+
+        accepted_types = (types.Integer, int)
+        for sep in [' ', None]:
+            with self.assertRaises(TypingError) as raises:
+                cfunc('a', sep, 2.4)
+            msg = '"maxsplit" must be {}, not float'.format(accepted_types)
+            self.assertIn(msg, str(raises.exception))
+
+    def test_rsplit(self):
+        pyfunc = rsplit_usecase
+        cfunc = njit(pyfunc)
+
+        CASES = [
+            (' a ', None),
+            ('', '⚡'),
+            ('abcabc', '⚡'),
+            ('🐍⚡', '⚡'),
+            ('🐍⚡🐍', '⚡'),
+            ('abababa', 'a'),
+            ('abababa', 'b'),
+            ('abababa', 'c'),
+            ('abababa', 'ab'),
+            ('abababa', 'aba'),
+        ]
+        msg = 'Results of "{}".rsplit("{}") must be equal'
+        for s, sep in CASES:
+            self.assertEqual(pyfunc(s, sep), cfunc(s, sep),
+                             msg=msg.format(s, sep))
+
+    def test_rsplit_with_maxsplit(self):
+        pyfuncs = [rsplit_with_maxsplit_usecase,
+                   rsplit_with_maxsplit_kwarg_usecase]
+        CASES = [
+            (' a ', None, 1),
+            ('', '⚡', 1),
+            ('abcabc', '⚡', 1),
+            ('🐍⚡', '⚡', 1),
+            ('🐍⚡🐍', '⚡', 1),
+            ('abababa', 'a', 2),
+            ('abababa', 'b', 1),
+            ('abababa', 'c', 2),
+            ('abababa', 'ab', 1),
+            ('abababa', 'aba', 5),
+        ]
+        messages = [
+            'Results of "{}".rsplit("{}", {}) must be equal',
+            'Results of "{}".rsplit("{}", maxsplit={}) must be equal'
+        ]
+
+        for pyfunc, msg in zip(pyfuncs, messages):
+            cfunc = njit(pyfunc)
+            for test_str, sep, maxsplit in CASES:
+                self.assertEqual(pyfunc(test_str, sep, maxsplit),
+                                 cfunc(test_str, sep, maxsplit),
+                                 msg=msg.format(test_str, sep, maxsplit))
+
+    def test_rsplit_whitespace(self):
+        pyfunc = rsplit_whitespace_usecase
+        cfunc = njit(pyfunc)
+
+        # list copied from
+        # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodetype_db.h#L5996-L6031    # noqa: E501
+        all_whitespace = ''.join(map(chr, [
+            0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x001C, 0x001D, 0x001E,
+            0x001F, 0x0020, 0x0085, 0x00A0, 0x1680, 0x2000, 0x2001, 0x2002,
+            0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A,
+            0x2028, 0x2029, 0x202F, 0x205F, 0x3000
+        ]))
+
+        CASES = [
+            '',
+            'abcabc',
+            '🐍 ⚡',
+            '🐍 ⚡ 🐍',
+            '🐍   ⚡ 🐍  ',
+            '  🐍   ⚡ 🐍',
+            ' 🐍' + all_whitespace + '⚡ 🐍  ',
+        ]
+        msg = 'Results of "{}".rsplit() must be equal'
+        for s in CASES:
+            self.assertEqual(pyfunc(s), cfunc(s), msg.format(s))
+
     def test_join_empty(self):
         # Can't pass empty list to nopython mode, so we have to make a
         # separate test case
diff --git a/numba/unicode.py b/numba/unicode.py
index 673b437ec5d..1aa16b98df3 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -1,4 +1,5 @@
 import operator
+import sys
 
 import numpy as np
 from llvmlite.ir import IntType, Constant
@@ -33,7 +34,7 @@
 from numba.errors import TypingError
 from .unicode_support import (_Py_TOUPPER, _Py_TOLOWER, _Py_UCS4,
                               _PyUnicode_ToUpperFull, _PyUnicode_ToLowerFull,
-                              _PyUnicode_ToTitleFull,
+                              _PyUnicode_ToTitleFull, _PyUnicode_IsSpace,
                               _PyUnicode_IsCased, _PyUnicode_IsCaseIgnorable,
                               _PyUnicode_IsUppercase, _PyUnicode_IsLowercase,
                               _PyUnicode_IsTitlecase, _Py_ISLOWER, _Py_ISUPPER)
@@ -757,6 +758,121 @@ def split_whitespace_impl(a, sep=None, maxsplit=-1):
             return parts
         return split_whitespace_impl
 
+# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L13095-L13108    # noqa: E501
+@overload_method(types.UnicodeType, 'rsplit')
+def unicode_rsplit(data, sep=None, maxsplit=-1):
+    """Implements str.unicode_rsplit()"""
+
+    def _unicode_rsplit_check_type(ty, name, accepted):
+        """Check object belongs to one of specified types"""
+        thety = ty
+        # if the type is omitted, the concrete type is the value
+        if isinstance(ty, types.Omitted):
+            thety = ty.value
+        # if the type is optional, the concrete type is the captured type
+        elif isinstance(ty, types.Optional):
+            thety = ty.type
+
+        if thety is not None and not isinstance(thety, accepted):
+            raise TypingError(
+                '"{}" must be {}, not {}'.format(name, accepted, ty))
+
+    _unicode_rsplit_check_type(sep, 'sep', (types.UnicodeType,
+                                            types.UnicodeCharSeq,
+                                            types.NoneType))
+    _unicode_rsplit_check_type(maxsplit, 'maxsplit', (types.Integer, int))
+
+    if sep is None or isinstance(sep, (types.NoneType, types.Omitted)):
+
+        def rsplit_whitespace_impl(data, sep=None, maxsplit=-1):
+            # https://github.com/python/cpython/blob/master/Objects/stringlib/split.h#L192-L235    # noqa: E501
+            if maxsplit < 0:
+                maxsplit = sys.maxsize
+
+            result = []
+            i = len(data) - 1
+            while maxsplit > 0:
+                while i >= 0:
+                    code_point = _get_code_point(data, i)
+                    if not _PyUnicode_IsSpace(code_point):
+                        break
+                    i -= 1
+                if i < 0:
+                    break
+                j = i
+                i -= 1
+                while i >= 0:
+                    code_point = _get_code_point(data, i)
+                    if _PyUnicode_IsSpace(code_point):
+                        break
+                    i -= 1
+                result.append(data[i+1:j+1])
+                maxsplit -= 1
+
+            if i >= 0:
+                # Only occurs when maxsplit was reached
+                # Skip any remaining whitespace and copy to beginning of string
+                while i >= 0:
+                    code_point = _get_code_point(data, i)
+                    if not _PyUnicode_IsSpace(code_point):
+                        break
+                    i -= 1
+                if i >= 0:
+                    result.append(data[0:i+1])
+
+            return result[::-1]
+
+        return rsplit_whitespace_impl
+
+    def rsplit_impl(data, sep=None, maxsplit=-1):
+        # https://github.com/python/cpython/blob/master/Objects/stringlib/split.h#L286-L333    # noqa: E501
+        if data._kind < sep._kind or len(data) < len(sep):
+            return [data]
+
+        def _rsplit_char(data, ch, maxsplit):
+            # https://github.com/python/cpython/blob/master/Objects/stringlib/split.h#L242-L284    # noqa: E501
+            result = []
+            i = j = len(data) - 1
+            while i >= 0 and maxsplit > 0:
+                while i >= 0:
+                    data_code_point = _get_code_point(data, i)
+                    ch_code_point = _get_code_point(ch, 0)
+                    if data_code_point == ch_code_point:
+                        result.append(data[i+1:j+1])
+                        j = i = i - 1
+                        break
+                    i -= 1
+                maxsplit -= 1
+            if j >= -1:
+                result.append(data[0:j+1])
+
+            return result[::-1]
+
+        if maxsplit < 0:
+            maxsplit = sys.maxsize
+        sep_length = len(sep)
+
+        if sep_length == 0:
+            raise ValueError('empty separator')
+        if sep_length == 1:
+            return _rsplit_char(data, sep, maxsplit)
+
+        result = []
+        j = len(data)
+        while maxsplit > 0:
+            pos = data.rfind(sep, start=0, end=j)
+            if pos < 0:
+                break
+            result.append(data[pos+sep_length:j])
+            j = pos
+            maxsplit -= 1
+
+        result.append(data[0:j])
+
+        return result[::-1]
+
+    return rsplit_impl
+
 
 @overload_method(types.UnicodeType, 'center')
 def unicode_center(string, width, fillchar=' '):
diff --git a/numba/unicode_support.py b/numba/unicode_support.py
index 021f36a8ec8..38f257f0ed0 100644
--- a/numba/unicode_support.py
+++ b/numba/unicode_support.py
@@ -311,6 +311,12 @@ def _PyUnicode_IsCaseIgnorable(ch):
     return ctype.flags & _PyUnicode_TyperecordMasks.CASE_IGNORABLE_MASK != 0
 
 
+@register_jitable
+def _PyUnicode_IsSpace(ch):
+    ctype = _PyUnicode_gettyperecord(ch)
+    return ctype.flags & _PyUnicode_TyperecordMasks.SPACE_MASK != 0
+
+
 @register_jitable
 def _PyUnicode_IsAlpha(ch):
     raise NotImplementedError

From 56a930cd5bc7857af4f3b9fe854970116a8314d7 Mon Sep 17 00:00:00 2001
From: Denis <denis.smirnov@intel.com>
Date: Wed, 13 Nov 2019 09:47:42 +0300
Subject: [PATCH 06/68] Fix linter issues for str.rsplit()

---
 numba/unicode.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/numba/unicode.py b/numba/unicode.py
index 1aa16b98df3..25455e9a9f8 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -806,7 +806,7 @@ def rsplit_whitespace_impl(data, sep=None, maxsplit=-1):
                     if _PyUnicode_IsSpace(code_point):
                         break
                     i -= 1
-                result.append(data[i+1:j+1])
+                result.append(data[i + 1:j + 1])
                 maxsplit -= 1
 
             if i >= 0:
@@ -818,7 +818,7 @@ def rsplit_whitespace_impl(data, sep=None, maxsplit=-1):
                         break
                     i -= 1
                 if i >= 0:
-                    result.append(data[0:i+1])
+                    result.append(data[0:i + 1])
 
             return result[::-1]
 
@@ -838,13 +838,13 @@ def _rsplit_char(data, ch, maxsplit):
                     data_code_point = _get_code_point(data, i)
                     ch_code_point = _get_code_point(ch, 0)
                     if data_code_point == ch_code_point:
-                        result.append(data[i+1:j+1])
+                        result.append(data[i + 1:j + 1])
                         j = i = i - 1
                         break
                     i -= 1
                 maxsplit -= 1
             if j >= -1:
-                result.append(data[0:j+1])
+                result.append(data[0:j + 1])
 
             return result[::-1]
 
@@ -863,7 +863,7 @@ def _rsplit_char(data, ch, maxsplit):
             pos = data.rfind(sep, start=0, end=j)
             if pos < 0:
                 break
-            result.append(data[pos+sep_length:j])
+            result.append(data[pos + sep_length:j])
             j = pos
             maxsplit -= 1
 

From 11f99f63fd5cd21fd4f9ad0a39d40c03f9fc1698 Mon Sep 17 00:00:00 2001
From: Denis <denis.smirnov@intel.com>
Date: Wed, 13 Nov 2019 14:52:36 +0300
Subject: [PATCH 07/68] Add capitalization of ascii for str.capitalize()

---
 numba/unicode.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/numba/unicode.py b/numba/unicode.py
index e83ef9988e6..c684008fd2d 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -1298,6 +1298,18 @@ def _do_capitalize(data, length, res, maxchars):
         if length == 0:
             return _empty_string(data._kind, length, data._is_ascii)
 
+        if data._is_ascii:
+            # This is an approximate translation of:
+            # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Include/pyctype.h#L29-L30    # noqa: E501
+            res = _empty_string(data._kind, length, data._is_ascii)
+            code_point = _get_code_point(data, 0)
+            _set_code_point(res, 0, _Py_TOUPPER(code_point))
+            for idx in range(1, length):
+                code_point = _get_code_point(data, idx)
+                _set_code_point(res, idx, _Py_TOLOWER(code_point))
+
+            return res
+
         tmp = _empty_string(PY_UNICODE_4BYTE_KIND, 3 * length, data._is_ascii)
         # maxchar should be inside of a list to be pass as argument by reference
         maxchars = [0]

From 22a0ee2b09db34d6d79221cd0e8a98de12aa36dd Mon Sep 17 00:00:00 2001
From: Denis <denis.smirnov@intel.com>
Date: Wed, 13 Nov 2019 14:54:59 +0300
Subject: [PATCH 08/68] Remove incorrect code comment for str.capitalize()

---
 numba/unicode.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/numba/unicode.py b/numba/unicode.py
index c684008fd2d..a4c4d1f08a9 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -1299,8 +1299,6 @@ def _do_capitalize(data, length, res, maxchars):
             return _empty_string(data._kind, length, data._is_ascii)
 
         if data._is_ascii:
-            # This is an approximate translation of:
-            # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Include/pyctype.h#L29-L30    # noqa: E501
             res = _empty_string(data._kind, length, data._is_ascii)
             code_point = _get_code_point(data, 0)
             _set_code_point(res, 0, _Py_TOUPPER(code_point))

From 40069b5d97a98b8c94431dfac54dd8b5d6b51d6e Mon Sep 17 00:00:00 2001
From: Denis <denis.smirnov@intel.com>
Date: Wed, 13 Nov 2019 15:52:22 +0300
Subject: [PATCH 09/68] Improve str.casefold()

Added case folding of ascii, merged _do_casefold to the main code and
added processing of empty input string.
---
 numba/unicode.py | 50 +++++++++++++++++++++++++++---------------------
 1 file changed, 28 insertions(+), 22 deletions(-)

diff --git a/numba/unicode.py b/numba/unicode.py
index 672fdfd0824..4298f9e481b 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -1361,36 +1361,42 @@ def impl(a):
     return impl
 
 
-# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9819-L9834    # noqa: E501
+# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L10782-L10791    # noqa: E501
 @overload_method(types.UnicodeType, 'casefold')
 def unicode_casefold(data):
     """Implements str.casefold()"""
     def impl(data):
-        # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L10782-L10791    # noqa: E501
-        def _do_casefold(data, length, res, maxchars):
-            """Translation of the function to case fold a unicode string."""
-            k = 0
-            mapped = np.zeros(3, dtype=_Py_UCS4)
+        length = len(data)
+        if length == 0:
+            return _empty_string(data._kind, length, data._is_ascii)
+
+        if data._is_ascii:
+            # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9678-L9694    # noqa: E501
+            res = _empty_string(data._kind, length, 1)
             for idx in range(length):
-                mapped.fill(0)
                 code_point = _get_code_point(data, idx)
-                n_res = _PyUnicode_ToFoldedFull(code_point, mapped)
-                for m in mapped[:n_res]:
-                    maxchar = maxchars[0]
-                    maxchars[0] = max(maxchar, m)
-                    _set_code_point(res, k, m)
-                    k += 1
-            return k
+                _set_code_point(res, idx, _Py_TOLOWER(code_point))
 
-        length = len(data)
-        tmp = _empty_string(PY_UNICODE_4BYTE_KIND, 3 * length, data._is_ascii)
-        # maxchar should be inside of a list to be pass as argument by reference
-        maxchars = [0]
-        newlength = _do_casefold(data, length, tmp, maxchars)
-        maxchar = maxchars[0]
+            return res
+
+        # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9863-L9908    # noqa: E501
+        # mixed with:
+        # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9819-L9834    # noqa: E501
+        k = 0
+        maxchar = 0
+        mapped = np.zeros(3, dtype=_Py_UCS4)
+        tmp = _empty_string(PY_UNICODE_4BYTE_KIND, 3 * length)
+        for idx in range(length):
+            mapped.fill(0)
+            code_point = _get_code_point(data, idx)
+            n_res = _PyUnicode_ToFoldedFull(code_point, mapped)
+            for m in mapped[:n_res]:
+                maxchar = max(maxchar, m)
+                _set_code_point(tmp, k, m)
+                k += 1
         newkind = _codepoint_to_kind(maxchar)
-        res = _empty_string(newkind, newlength, _codepoint_is_ascii(maxchar))
-        for i in range(newlength):
+        res = _empty_string(newkind, k)
+        for i in range(k):
             _set_code_point(res, i, _get_code_point(tmp, i))
 
         return res

From a2b178091de1e80fb3a6fc303c1a915185a5ce0c Mon Sep 17 00:00:00 2001
From: Denis <denis.smirnov@intel.com>
Date: Wed, 13 Nov 2019 16:08:32 +0300
Subject: [PATCH 10/68] Merge _do_capitalize() to the main code

---
 numba/unicode.py | 54 +++++++++++++++++++++---------------------------
 1 file changed, 24 insertions(+), 30 deletions(-)

diff --git a/numba/unicode.py b/numba/unicode.py
index a4c4d1f08a9..88a966e975d 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -1272,34 +1272,12 @@ def impl(a):
 def unicode_capitalize(data):
     """Implements str.capitalize()"""
     def impl(data):
-        # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9737-L9759    # noqa: E501
-        def _do_capitalize(data, length, res, maxchars):
-            """Translation of the function to capitalize a unicode string."""
-            k = 0
-            mapped = np.zeros(3, dtype=_Py_UCS4)
-            code_point = _get_code_point(data, 0)
-            n_res = _PyUnicode_ToUpperFull(code_point, mapped)
-            for m in mapped[:n_res]:
-                maxchar = maxchars[0]
-                maxchars[0] = max(maxchar, m)
-                _set_code_point(res, k, m)
-                k += 1
-            for idx in range(1, length):
-                code_point = _get_code_point(data, idx)
-                n_res = _lower_ucs4(code_point, data, length, idx, mapped)
-                for m in mapped[:n_res]:
-                    maxchar = maxchars[0]
-                    maxchars[0] = max(maxchar, m)
-                    _set_code_point(res, k, m)
-                    k += 1
-            return k
-
         length = len(data)
         if length == 0:
             return _empty_string(data._kind, length, data._is_ascii)
 
         if data._is_ascii:
-            res = _empty_string(data._kind, length, data._is_ascii)
+            res = _empty_string(data._kind, length, 1)
             code_point = _get_code_point(data, 0)
             _set_code_point(res, 0, _Py_TOUPPER(code_point))
             for idx in range(1, length):
@@ -1308,14 +1286,30 @@ def _do_capitalize(data, length, res, maxchars):
 
             return res
 
-        tmp = _empty_string(PY_UNICODE_4BYTE_KIND, 3 * length, data._is_ascii)
-        # maxchar should be inside of a list to be pass as argument by reference
-        maxchars = [0]
-        newlength = _do_capitalize(data, length, tmp, maxchars)
-        maxchar = maxchars[0]
+        # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9863-L9908    # noqa: E501
+        # mixed with:
+        # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9737-L9759    # noqa: E501
+        k = 0
+        maxchar = 0
+        mapped = np.zeros(3, dtype=_Py_UCS4)
+        tmp = _empty_string(PY_UNICODE_4BYTE_KIND, 3 * length)
+        code_point = _get_code_point(data, 0)
+        n_res = _PyUnicode_ToUpperFull(code_point, mapped)
+        for m in mapped[:n_res]:
+            maxchar = max(maxchar, m)
+            _set_code_point(tmp, k, m)
+            k += 1
+        for idx in range(1, length):
+            mapped.fill(0)
+            code_point = _get_code_point(data, idx)
+            n_res = _lower_ucs4(code_point, data, length, idx, mapped)
+            for m in mapped[:n_res]:
+                maxchar = max(maxchar, m)
+                _set_code_point(tmp, k, m)
+                k += 1
         newkind = _codepoint_to_kind(maxchar)
-        res = _empty_string(newkind, newlength, _codepoint_is_ascii(maxchar))
-        for i in range(newlength):
+        res = _empty_string(newkind, k)
+        for i in range(k):
             _set_code_point(res, i, _get_code_point(tmp, i))
 
         return res

From d5191432d52586da4b3383b2abee0a58b1b6672a Mon Sep 17 00:00:00 2001
From: Denis <denis.smirnov@intel.com>
Date: Wed, 13 Nov 2019 16:21:15 +0300
Subject: [PATCH 11/68] Improve str.swapcase()

Added case swaping of ascii, merged _do_swapcase to the main code and
added processing of empty input string.
---
 numba/unicode.py | 63 +++++++++++++++++++++++++++---------------------
 1 file changed, 36 insertions(+), 27 deletions(-)

diff --git a/numba/unicode.py b/numba/unicode.py
index 9089b3f232b..278fd39eb55 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -1498,37 +1498,46 @@ def impl(data):
 def unicode_swapcase(data):
     """Implements str.swapcase()"""
     def impl(data):
-        # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9761-L9784    # noqa: E501
-        def _do_swapcase(data, length, res, maxchars):
-            """Translation of the function to swap cases of a unicode string."""
-            k = 0
-            mapped = np.zeros(3, dtype=_Py_UCS4)
+        length = len(data)
+        if length == 0:
+            return _empty_string(data._kind, length, data._is_ascii)
+
+        if data._is_ascii:
+            res = _empty_string(data._kind, length, 1)
             for idx in range(length):
-                mapped.fill(0)
                 code_point = _get_code_point(data, idx)
-                if _PyUnicode_IsUppercase(code_point):
-                    n_res = _lower_ucs4(code_point, data, length, idx, mapped)
-                elif _PyUnicode_IsLowercase(code_point):
-                    n_res = _PyUnicode_ToUpperFull(code_point, mapped)
-                else:
-                    n_res = 1
-                    mapped[0] = code_point
-                for m in mapped[:n_res]:
-                    maxchar = maxchars[0]
-                    maxchars[0] = max(maxchar, m)
-                    _set_code_point(res, k, m)
-                    k += 1
-            return k
+                if _Py_ISUPPER(code_point):
+                    code_point = _Py_TOLOWER(code_point)
+                elif _Py_ISLOWER(code_point):
+                    code_point = _Py_TOUPPER(code_point)
+                _set_code_point(res, idx, code_point)
 
-        length = len(data)
-        tmp = _empty_string(PY_UNICODE_4BYTE_KIND, 3 * length, data._is_ascii)
-        # maxchar should be inside of a list to be pass as argument by reference
-        maxchars = [0]
-        newlength = _do_swapcase(data, length, tmp, maxchars)
-        maxchar = maxchars[0]
+            return res
+
+        # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9863-L9908    # noqa: E501
+        # mixed with:
+        # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9761-L9784    # noqa: E501
+        k = 0
+        maxchar = 0
+        mapped = np.zeros(3, dtype=_Py_UCS4)
+        tmp = _empty_string(PY_UNICODE_4BYTE_KIND, 3 * length)
+        for idx in range(length):
+            mapped.fill(0)
+            code_point = _get_code_point(data, idx)
+            if _PyUnicode_IsUppercase(code_point):
+                n_res = _lower_ucs4(code_point, data, length, idx, mapped)
+            elif _PyUnicode_IsLowercase(code_point):
+                n_res = _PyUnicode_ToUpperFull(code_point, mapped)
+            else:
+                n_res = 1
+                mapped[0] = code_point
+            for m in mapped[:n_res]:
+                maxchar = max(maxchar, m)
+                _set_code_point(tmp, k, m)
+                k += 1
         newkind = _codepoint_to_kind(maxchar)
-        res = _empty_string(newkind, newlength, _codepoint_is_ascii(maxchar))
-        for i in range(newlength):
+        res = _empty_string(newkind, k)
+        for i in range(k):
             _set_code_point(res, i, _get_code_point(tmp, i))
 
         return res

From 7cd87c82cf6c01e7bd931b263de853bb42146308 Mon Sep 17 00:00:00 2001
From: Denis <denis.smirnov@intel.com>
Date: Wed, 13 Nov 2019 17:53:47 +0300
Subject: [PATCH 12/68] Move common code from str.casefold()

---
 numba/unicode.py | 60 +++++++++++++++++++++++++++++++-----------------
 1 file changed, 39 insertions(+), 21 deletions(-)

diff --git a/numba/unicode.py b/numba/unicode.py
index 4298f9e481b..0e98598666a 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -1361,6 +1361,44 @@ def impl(a):
     return impl
 
 
+@register_jitable
+def _do_casefold(data, length, res, maxchars):
+    """Translation of the function to case fold a unicode string."""
+    k = 0
+    mapped = np.zeros(3, dtype=_Py_UCS4)
+    for idx in range(length):
+        mapped.fill(0)
+        code_point = _get_code_point(data, idx)
+        n_res = _PyUnicode_ToFoldedFull(code_point, mapped)
+        for m in mapped[:n_res]:
+            maxchar = maxchars[0]
+            maxchars[0] = max(maxchar, m)
+            _set_code_point(res, k, m)
+            k += 1
+    return k
+
+
+def _case_operation(func):
+    def impl(data):
+        length = len(data)
+        tmp = _empty_string(PY_UNICODE_4BYTE_KIND, 3 * length, data._is_ascii)
+        # maxchar should be inside of a list to be pass as argument by reference
+        maxchars = [0]
+        newlength = func(data, length, tmp, maxchars)
+        maxchar = maxchars[0]
+        newkind = _codepoint_to_kind(maxchar)
+        res = _empty_string(newkind, newlength, _codepoint_is_ascii(maxchar))
+        for i in range(newlength):
+            _set_code_point(res, i, _get_code_point(tmp, i))
+
+        return res
+
+    return impl
+
+
+_apply_do_casefold = register_jitable(_case_operation(_do_casefold))
+
+
 # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L10782-L10791    # noqa: E501
 @overload_method(types.UnicodeType, 'casefold')
 def unicode_casefold(data):
@@ -1379,27 +1417,7 @@ def impl(data):
 
             return res
 
-        # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9863-L9908    # noqa: E501
-        # mixed with:
-        # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9819-L9834    # noqa: E501
-        k = 0
-        maxchar = 0
-        mapped = np.zeros(3, dtype=_Py_UCS4)
-        tmp = _empty_string(PY_UNICODE_4BYTE_KIND, 3 * length)
-        for idx in range(length):
-            mapped.fill(0)
-            code_point = _get_code_point(data, idx)
-            n_res = _PyUnicode_ToFoldedFull(code_point, mapped)
-            for m in mapped[:n_res]:
-                maxchar = max(maxchar, m)
-                _set_code_point(tmp, k, m)
-                k += 1
-        newkind = _codepoint_to_kind(maxchar)
-        res = _empty_string(newkind, k)
-        for i in range(k):
-            _set_code_point(res, i, _get_code_point(tmp, i))
-
-        return res
+        return _apply_do_casefold(data)
 
     return impl
 

From e33fc47b5143bc15082e0b5284632f662edacf9b Mon Sep 17 00:00:00 2001
From: Denis <denis.smirnov@intel.com>
Date: Wed, 13 Nov 2019 21:43:16 +0300
Subject: [PATCH 13/68] Add generating case operation performer

---
 numba/unicode.py | 88 ++++++++++++++++++++++++++++++------------------
 1 file changed, 56 insertions(+), 32 deletions(-)

diff --git a/numba/unicode.py b/numba/unicode.py
index 0e98598666a..beed5cf455a 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -1361,30 +1361,54 @@ def impl(a):
     return impl
 
 
-@register_jitable
-def _do_casefold(data, length, res, maxchars):
-    """Translation of the function to case fold a unicode string."""
-    k = 0
-    mapped = np.zeros(3, dtype=_Py_UCS4)
-    for idx in range(length):
-        mapped.fill(0)
-        code_point = _get_code_point(data, idx)
-        n_res = _PyUnicode_ToFoldedFull(code_point, mapped)
-        for m in mapped[:n_res]:
-            maxchar = maxchars[0]
-            maxchars[0] = max(maxchar, m)
-            _set_code_point(res, k, m)
-            k += 1
-    return k
+def generate_unicode_operation_doer(operation_func):
+    """Generate unicode case operation performer."""
+    def impl(data, length, res, maxchars):
+        k = 0
+        mapped = np.zeros(3, dtype=_Py_UCS4)
+        for idx in range(length):
+            mapped.fill(0)
+            code_point = _get_code_point(data, idx)
+            n_res = operation_func(code_point, mapped)
+            for m in mapped[:n_res]:
+                maxchar = maxchars[0]
+                maxchars[0] = max(maxchar, m)
+                _set_code_point(res, k, m)
+                k += 1
+
+        return k
+
+    return impl
 
 
-def _case_operation(func):
+def generate_ascii_operation_doer(operation_func):
+    """Generate ascii case operation performer."""
+    def impl(data, res):
+        for idx in range(len(data)):
+            code_point = _get_code_point(data, idx)
+            _set_code_point(res, idx, operation_func(code_point))
+
+    return impl
+
+
+def generate_common_operation_doer(ascii_func, unicode_nres_func):
+    """Generate common case operation performer."""
     def impl(data):
         length = len(data)
+        if length == 0:
+            return _empty_string(data._kind, length, data._is_ascii)
+
+        if data._is_ascii:
+            res = _empty_string(data._kind, length, 1)
+            ascii_func(data, res)
+
+            return res
+
+        # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9863-L9908    # noqa: E501
         tmp = _empty_string(PY_UNICODE_4BYTE_KIND, 3 * length, data._is_ascii)
         # maxchar should be inside of a list to be pass as argument by reference
         maxchars = [0]
-        newlength = func(data, length, tmp, maxchars)
+        newlength = unicode_nres_func(data, length, tmp, maxchars)
         maxchar = maxchars[0]
         newkind = _codepoint_to_kind(maxchar)
         res = _empty_string(newkind, newlength, _codepoint_is_ascii(maxchar))
@@ -1396,7 +1420,20 @@ def impl(data):
     return impl
 
 
-_apply_do_casefold = register_jitable(_case_operation(_do_casefold))
+def generate_case_operation_func(ascii_func, unicode_nres_func):
+    """Generate function to perform case operation
+    on a string either ascii or unicode.
+    """
+    ascii_operation_doer = register_jitable(generate_ascii_operation_doer(
+        ascii_func))
+    unicode_operation_doer = register_jitable(generate_unicode_operation_doer(
+        unicode_nres_func))
+    return generate_common_operation_doer(ascii_operation_doer,
+                                          unicode_operation_doer)
+
+
+_do_casefold = register_jitable(generate_case_operation_func(
+    _Py_TOLOWER, _PyUnicode_ToFoldedFull))
 
 
 # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L10782-L10791    # noqa: E501
@@ -1404,20 +1441,7 @@ def impl(data):
 def unicode_casefold(data):
     """Implements str.casefold()"""
     def impl(data):
-        length = len(data)
-        if length == 0:
-            return _empty_string(data._kind, length, data._is_ascii)
-
-        if data._is_ascii:
-            # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9678-L9694    # noqa: E501
-            res = _empty_string(data._kind, length, 1)
-            for idx in range(length):
-                code_point = _get_code_point(data, idx)
-                _set_code_point(res, idx, _Py_TOLOWER(code_point))
-
-            return res
-
-        return _apply_do_casefold(data)
+        return _do_casefold(data)
 
     return impl
 

From 0246ad687e689757f197602e4c63ab0587546ab5 Mon Sep 17 00:00:00 2001
From: Denis <denis.smirnov@intel.com>
Date: Thu, 14 Nov 2019 11:09:53 +0300
Subject: [PATCH 14/68] Add SHA references, fix nested loops for rsplit

---
 numba/unicode.py | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/numba/unicode.py b/numba/unicode.py
index 25455e9a9f8..59e379baddc 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -785,7 +785,7 @@ def _unicode_rsplit_check_type(ty, name, accepted):
     if sep is None or isinstance(sep, (types.NoneType, types.Omitted)):
 
         def rsplit_whitespace_impl(data, sep=None, maxsplit=-1):
-            # https://github.com/python/cpython/blob/master/Objects/stringlib/split.h#L192-L235    # noqa: E501
+            # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/stringlib/split.h#L192-L240    # noqa: E501
             if maxsplit < 0:
                 maxsplit = sys.maxsize
 
@@ -825,24 +825,22 @@ def rsplit_whitespace_impl(data, sep=None, maxsplit=-1):
         return rsplit_whitespace_impl
 
     def rsplit_impl(data, sep=None, maxsplit=-1):
-        # https://github.com/python/cpython/blob/master/Objects/stringlib/split.h#L286-L333    # noqa: E501
+        # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/stringlib/split.h#L286-L333    # noqa: E501
         if data._kind < sep._kind or len(data) < len(sep):
             return [data]
 
         def _rsplit_char(data, ch, maxsplit):
-            # https://github.com/python/cpython/blob/master/Objects/stringlib/split.h#L242-L284    # noqa: E501
+            # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/stringlib/split.h#L242-L284    # noqa: E501
             result = []
             i = j = len(data) - 1
             while i >= 0 and maxsplit > 0:
-                while i >= 0:
-                    data_code_point = _get_code_point(data, i)
-                    ch_code_point = _get_code_point(ch, 0)
-                    if data_code_point == ch_code_point:
-                        result.append(data[i + 1:j + 1])
-                        j = i = i - 1
-                        break
-                    i -= 1
-                maxsplit -= 1
+                data_code_point = _get_code_point(data, i)
+                ch_code_point = _get_code_point(ch, 0)
+                if data_code_point == ch_code_point:
+                    result.append(data[i + 1:j + 1])
+                    j = i = i - 1
+                    maxsplit -= 1
+                i -= 1
             if j >= -1:
                 result.append(data[0:j + 1])
 

From b38a4b2f00e9440cca6aa74a045a8f4630924ae4 Mon Sep 17 00:00:00 2001
From: Denis <denis.smirnov@intel.com>
Date: Thu, 14 Nov 2019 13:19:24 +0300
Subject: [PATCH 15/68] Implement str.partition() based on CPython

---
 docs/source/reference/pysupported.rst |  1 +
 numba/tests/test_unicode.py           | 42 +++++++++++++++++++++++++++
 numba/unicode.py                      | 36 +++++++++++++++++++++++
 3 files changed, 79 insertions(+)

diff --git a/docs/source/reference/pysupported.rst b/docs/source/reference/pysupported.rst
index f19d3da7a64..106ca074543 100644
--- a/docs/source/reference/pysupported.rst
+++ b/docs/source/reference/pysupported.rst
@@ -184,6 +184,7 @@ The following functions, attributes and methods are currently supported:
 * ``.upper()``
 * ``.islower()``
 * ``.lower()``
+* ``.partition()``
 * ``.zfill()``
 * ``.count()``
 * ``.istitle()``
diff --git a/numba/tests/test_unicode.py b/numba/tests/test_unicode.py
index c9f49623dbd..75e8549026e 100644
--- a/numba/tests/test_unicode.py
+++ b/numba/tests/test_unicode.py
@@ -78,6 +78,10 @@ def ge_usecase(x, y):
     return x >= y
 
 
+def partition_usecase(s, sep):
+    return s.partition(sep)
+
+
 def find_usecase(x, y):
     return x.find(y)
 
@@ -404,6 +408,44 @@ def test_in(self, flags=no_pyobj_flags):
                                  cfunc(substr, a),
                                  "'%s' in '%s'?" % (substr, a))
 
+    def test_partition_exception_invalid_sep(self):
+        self.disable_leak_check()
+
+        pyfunc = partition_usecase
+        cfunc = njit(pyfunc)
+
+        # Handle empty separator exception
+        for func in [pyfunc, cfunc]:
+            with self.assertRaises(ValueError) as raises:
+                func('a', '')
+            self.assertIn('empty separator', str(raises.exception))
+
+        accepted_types = (types.UnicodeType, types.UnicodeCharSeq)
+        with self.assertRaises(TypingError) as raises:
+            cfunc('a', None)
+        msg = '"sep" must be {}, not none'.format(accepted_types)
+        self.assertIn(msg, str(raises.exception))
+
+    def test_partition(self):
+        pyfunc = partition_usecase
+        cfunc = njit(pyfunc)
+
+        CASES = [
+            ('', '⚡'),
+            ('abcabc', '⚡'),
+            ('🐍⚡', '⚡'),
+            ('🐍⚡🐍', '⚡'),
+            ('abababa', 'a'),
+            ('abababa', 'b'),
+            ('abababa', 'c'),
+            ('abababa', 'ab'),
+            ('abababa', 'aba'),
+        ]
+        msg = 'Results of "{}".partition("{}") must be equal'
+        for s, sep in CASES:
+            self.assertEqual(pyfunc(s, sep), cfunc(s, sep),
+                             msg=msg.format(s, sep))
+
     def test_find(self, flags=no_pyobj_flags):
         pyfunc = find_usecase
         cfunc = njit(pyfunc)
diff --git a/numba/unicode.py b/numba/unicode.py
index 673b437ec5d..5494e95c336 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -610,6 +610,42 @@ def _adjust_indices(length, start, end):
     return rfind_impl
 
 
+# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L12922-L12976    # noqa: E501
+@overload_method(types.UnicodeType, 'partition')
+def unicode_partition(data, sep):
+    """Implements str.partition()"""
+    thety = sep
+    # if the type is omitted, the concrete type is the value
+    if isinstance(sep, types.Omitted):
+        thety = sep.value
+    # if the type is optional, the concrete type is the captured type
+    elif isinstance(sep, types.Optional):
+        thety = sep.type
+
+    accepted = (types.UnicodeType, types.UnicodeCharSeq)
+    if thety is not None and not isinstance(thety, accepted):
+        msg = '"{}" must be {}, not {}'.format('sep', accepted, sep)
+        raise TypingError(msg)
+
+    def impl(data, sep):
+        # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/stringlib/partition.h#L7-L60    # noqa: E501
+        empty_str = _empty_string(data._kind, 0, data._is_ascii)
+        sep_length = len(sep)
+        if data._kind < sep._kind or len(data) < sep_length:
+            return data, empty_str, empty_str
+
+        if sep_length == 0:
+            raise ValueError('empty separator')
+
+        pos = data.find(sep)
+        if pos < 0:
+            return data, empty_str, empty_str
+
+        return data[0:pos], sep, data[pos + sep_length:len(data)]
+
+    return impl
+
+
 @overload_method(types.UnicodeType, 'count')
 def unicode_count(src, sub, start=None, end=None):
 

From 8aae59e48fefffb5fd6a163abbcb7b4639443829 Mon Sep 17 00:00:00 2001
From: Denis <denis.smirnov@intel.com>
Date: Fri, 15 Nov 2019 09:54:12 +0300
Subject: [PATCH 16/68] Implement str.splitlines() based on CPython

---
 docs/source/reference/pysupported.rst |  1 +
 numba/tests/test_unicode.py           | 53 +++++++++++++++++++++
 numba/unicode.py                      | 68 +++++++++++++++++++++++++++
 numba/unicode_support.py              | 59 +++++++++++++++++++++++
 4 files changed, 181 insertions(+)

diff --git a/docs/source/reference/pysupported.rst b/docs/source/reference/pysupported.rst
index f19d3da7a64..f74cb785839 100644
--- a/docs/source/reference/pysupported.rst
+++ b/docs/source/reference/pysupported.rst
@@ -176,6 +176,7 @@ The following functions, attributes and methods are currently supported:
 * ``.ljust()``
 * ``.rjust()``
 * ``.split()``
+* ``.splitlines()``
 * ``.join()``
 * ``.lstrip()``
 * ``.rstrip()``
diff --git a/numba/tests/test_unicode.py b/numba/tests/test_unicode.py
index c9f49623dbd..dbafc46e2d8 100644
--- a/numba/tests/test_unicode.py
+++ b/numba/tests/test_unicode.py
@@ -130,6 +130,18 @@ def split_whitespace_usecase(x):
     return x.split()
 
 
+def splitlines_usecase(s):
+    return s.splitlines()
+
+
+def splitlines_with_keepends_usecase(s, keepends):
+    return s.splitlines(keepends)
+
+
+def splitlines_with_keepends_kwarg_usecase(s, keepends):
+    return s.splitlines(keepends=keepends)
+
+
 def lstrip_usecase(x):
     return x.lstrip()
 
@@ -864,6 +876,47 @@ def test_split_whitespace(self):
                              cfunc(test_str),
                              "'%s'.split()?" % (test_str,))
 
+    def test_split_exception_invalid_keepends(self):
+        pyfunc = splitlines_with_keepends_usecase
+        cfunc = njit(pyfunc)
+
+        accepted_types = (types.Integer, int, types.Boolean, bool)
+        for ty, keepends in (('none', None), ('unicode_type', 'None')):
+            with self.assertRaises(TypingError) as raises:
+                cfunc('\n', keepends)
+            msg = '"keepends" must be {}, not {}'.format(accepted_types, ty)
+            self.assertIn(msg, str(raises.exception))
+
+    def test_splitlines(self):
+        pyfunc = splitlines_usecase
+        cfunc = njit(pyfunc)
+
+        cases = ['', '\n', 'abc\r\rabc\r\n', '🐍⚡\v', '\f🐍⚡\f\v\v🐍\x85',
+                 '\u2028aba\u2029baba', '\n\r\na\v\fb\x0b\x0cc\x1c\x1d\x1e']
+
+        msg = 'Results of "{}".splitlines() must be equal'
+        for s in cases:
+            self.assertEqual(pyfunc(s), cfunc(s), msg=msg.format(s))
+
+    def test_splitlines_with_keepends(self):
+        pyfuncs = [
+            splitlines_with_keepends_usecase,
+            splitlines_with_keepends_kwarg_usecase
+        ]
+        messages = [
+            'Results of "{}".splitlines({}) must be equal',
+            'Results of "{}".splitlines(keepends={}) must be equal'
+        ]
+        cases = ['', '\n', 'abc\r\rabc\r\n', '🐍⚡\v', '\f🐍⚡\f\v\v🐍\x85',
+                 '\u2028aba\u2029baba', '\n\r\na\v\fb\x0b\x0cc\x1c\x1d\x1e']
+        all_keepends = [True, False, 0, 1, -1, 100]
+
+        for pyfunc, msg in zip(pyfuncs, messages):
+            cfunc = njit(pyfunc)
+            for s, keepends in product(cases, all_keepends):
+                self.assertEqual(pyfunc(s, keepends), cfunc(s, keepends),
+                                 msg=msg.format(s, keepends))
+
     def test_join_empty(self):
         # Can't pass empty list to nopython mode, so we have to make a
         # separate test case
diff --git a/numba/unicode.py b/numba/unicode.py
index 673b437ec5d..640b1222d73 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -36,6 +36,8 @@
                               _PyUnicode_ToTitleFull,
                               _PyUnicode_IsCased, _PyUnicode_IsCaseIgnorable,
                               _PyUnicode_IsUppercase, _PyUnicode_IsLowercase,
+                              _PyUnicode_IsLineBreak, _Py_ISLINEBREAK,
+                              _Py_ISLINEFEED, _Py_ISCARRIAGERETURN,
                               _PyUnicode_IsTitlecase, _Py_ISLOWER, _Py_ISUPPER)
 
 # DATA MODEL
@@ -858,6 +860,72 @@ def rjust_impl(string, width, fillchar=' '):
     return rjust_impl
 
 
+def generate_splitlines_func(is_line_break_func):
+    """Generate splitlines performer based on ascii or unicode line breaks."""
+    def impl(data, keepends):
+        # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/stringlib/split.h#L335-L389    # noqa: E501
+        length = len(data)
+        result = []
+        i = j = 0
+        while i < length:
+            # find a line and append it
+            while i < length:
+                code_point = _get_code_point(data, i)
+                if is_line_break_func(code_point):
+                    break
+                i += 1
+
+            # skip the line break reading CRLF as one line break
+            eol = i
+            if i < length:
+                if i + 1 < length:
+                    cur_cp = _get_code_point(data, i)
+                    next_cp = _get_code_point(data, i + 1)
+                    if _Py_ISCARRIAGERETURN(cur_cp) and _Py_ISLINEFEED(next_cp):
+                        i += 1
+                i += 1
+                if keepends:
+                    eol = i
+
+            result.append(data[j:eol])
+            j = i
+
+        return result
+
+    return impl
+
+
+_ascii_splitlines = register_jitable(generate_splitlines_func(_Py_ISLINEBREAK))
+_unicode_splitlines = register_jitable(generate_splitlines_func(
+    _PyUnicode_IsLineBreak))
+
+
+# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L10196-L10229    # noqa: E501
+@overload_method(types.UnicodeType, 'splitlines')
+def unicode_splitlines(data, keepends=False):
+    """Implements str.splitlines()"""
+    thety = keepends
+    # if the type is omitted, the concrete type is the value
+    if isinstance(keepends, types.Omitted):
+        thety = keepends.value
+    # if the type is optional, the concrete type is the captured type
+    elif isinstance(keepends, types.Optional):
+        thety = keepends.type
+
+    accepted = (types.Integer, int, types.Boolean, bool)
+    if thety is not None and not isinstance(thety, accepted):
+        raise TypingError(
+            '"{}" must be {}, not {}'.format('keepends', accepted, keepends))
+
+    def splitlines_impl(data, keepends=False):
+        if data._is_ascii:
+            return _ascii_splitlines(data, keepends)
+
+        return _unicode_splitlines(data, keepends)
+
+    return splitlines_impl
+
+
 @register_jitable
 def join_list(sep, parts):
     parts_len = len(parts)
diff --git a/numba/unicode_support.py b/numba/unicode_support.py
index 021f36a8ec8..84461d919b9 100644
--- a/numba/unicode_support.py
+++ b/numba/unicode_support.py
@@ -239,6 +239,12 @@ def _PyUnicode_IsUppercase(ch):
     return ctype.flags & _PyUnicode_TyperecordMasks.UPPER_MASK != 0
 
 
+@register_jitable
+def _PyUnicode_IsLineBreak(ch):
+    ctype = _PyUnicode_gettyperecord(ch)
+    return ctype.flags & _PyUnicode_TyperecordMasks.LINEBREAK_MASK != 0
+
+
 @register_jitable
 def _PyUnicode_ToUppercase(ch):
     raise NotImplementedError
@@ -552,6 +558,40 @@ class _PY_CTF(IntEnum):
 ], dtype=np.uint8)
 
 
+class _PY_CTF_LB(IntEnum):
+    LINE_BREAK = 0x01
+    LINE_FEED = 0x02
+    CARRIAGE_RETURN = 0x04
+
+
+_Py_ctype_islinebreak = np.array([
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    _PY_CTF_LB.LINE_BREAK | _PY_CTF_LB.LINE_FEED,  # 0xa '\n'
+    _PY_CTF_LB.LINE_BREAK,  # 0xb '\v'
+    _PY_CTF_LB.LINE_BREAK,  # 0xc '\f'
+    _PY_CTF_LB.LINE_BREAK | _PY_CTF_LB.CARRIAGE_RETURN,  # 0xd '\r'
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    _PY_CTF_LB.LINE_BREAK,  # 0x1c '\x1c'
+    _PY_CTF_LB.LINE_BREAK,  # 0x1d '\x1d'
+    _PY_CTF_LB.LINE_BREAK,  # 0x1e '\x1e'
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    _PY_CTF_LB.LINE_BREAK,  # 0x85 '\x85'
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0,
+], dtype=np.intc)
+
+
 # Translation of:
 # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Include/pymacro.h#L25    # noqa: E501
 @register_jitable
@@ -654,5 +694,24 @@ def _Py_ISSPACE(ch):
     """
     return _Py_ctype_table[_Py_CHARMASK(ch)] & _PY_CTF.SPACE
 
+
+@register_jitable
+def _Py_ISLINEBREAK(ch):
+    """Check if character is ASCII line break"""
+    return _Py_ctype_islinebreak[_Py_CHARMASK(ch)] & _PY_CTF_LB.LINE_BREAK
+
+
+@register_jitable
+def _Py_ISLINEFEED(ch):
+    """Check if character is line feed `\n`"""
+    return _Py_ctype_islinebreak[_Py_CHARMASK(ch)] & _PY_CTF_LB.LINE_FEED
+
+
+@register_jitable
+def _Py_ISCARRIAGERETURN(ch):
+    """Check if character is carriage return `\r`"""
+    return _Py_ctype_islinebreak[_Py_CHARMASK(ch)] & _PY_CTF_LB.CARRIAGE_RETURN
+
+
 # End code related to/from CPython's pyctype
 # ------------------------------------------------------------------------------

From 308ea68b2f5e44f439a008f4e51a592ea8b3a39a Mon Sep 17 00:00:00 2001
From: Denis <denis.smirnov@intel.com>
Date: Fri, 15 Nov 2019 13:30:46 +0300
Subject: [PATCH 17/68] Add SHA for str.casefold()

---
 numba/unicode.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/numba/unicode.py b/numba/unicode.py
index beed5cf455a..fd14ee747ea 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -1437,13 +1437,12 @@ def generate_case_operation_func(ascii_func, unicode_nres_func):
 
 
 # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L10782-L10791    # noqa: E501
+# mixed with
+# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9819-L9834    # noqa: E501
 @overload_method(types.UnicodeType, 'casefold')
 def unicode_casefold(data):
     """Implements str.casefold()"""
-    def impl(data):
-        return _do_casefold(data)
-
-    return impl
+    return _do_casefold
 
 
 @overload_method(types.UnicodeType, 'istitle')

From a2cd602dab18150f226f6c7ce9ad3bee5dbfdf03 Mon Sep 17 00:00:00 2001
From: Denis <denis.smirnov@intel.com>
Date: Fri, 15 Nov 2019 15:01:44 +0300
Subject: [PATCH 18/68] Fix generating of operation methods

---
 numba/unicode.py | 68 ++++++++++++++++++------------------------------
 1 file changed, 25 insertions(+), 43 deletions(-)

diff --git a/numba/unicode.py b/numba/unicode.py
index fd14ee747ea..734108eba7e 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -1361,37 +1361,7 @@ def impl(a):
     return impl
 
 
-def generate_unicode_operation_doer(operation_func):
-    """Generate unicode case operation performer."""
-    def impl(data, length, res, maxchars):
-        k = 0
-        mapped = np.zeros(3, dtype=_Py_UCS4)
-        for idx in range(length):
-            mapped.fill(0)
-            code_point = _get_code_point(data, idx)
-            n_res = operation_func(code_point, mapped)
-            for m in mapped[:n_res]:
-                maxchar = maxchars[0]
-                maxchars[0] = max(maxchar, m)
-                _set_code_point(res, k, m)
-                k += 1
-
-        return k
-
-    return impl
-
-
-def generate_ascii_operation_doer(operation_func):
-    """Generate ascii case operation performer."""
-    def impl(data, res):
-        for idx in range(len(data)):
-            code_point = _get_code_point(data, idx)
-            _set_code_point(res, idx, operation_func(code_point))
-
-    return impl
-
-
-def generate_common_operation_doer(ascii_func, unicode_nres_func):
+def generate_operation_func(ascii_func, unicode_nres_func):
     """Generate common case operation performer."""
     def impl(data):
         length = len(data)
@@ -1420,20 +1390,32 @@ def impl(data):
     return impl
 
 
-def generate_case_operation_func(ascii_func, unicode_nres_func):
-    """Generate function to perform case operation
-    on a string either ascii or unicode.
-    """
-    ascii_operation_doer = register_jitable(generate_ascii_operation_doer(
-        ascii_func))
-    unicode_operation_doer = register_jitable(generate_unicode_operation_doer(
-        unicode_nres_func))
-    return generate_common_operation_doer(ascii_operation_doer,
-                                          unicode_operation_doer)
+@register_jitable
+def _unicode_casefold_doer(data, length, res, maxchars):
+    k = 0
+    mapped = np.zeros(3, dtype=_Py_UCS4)
+    for idx in range(length):
+        mapped.fill(0)
+        code_point = _get_code_point(data, idx)
+        n_res = _PyUnicode_ToFoldedFull(code_point, mapped)
+        for m in mapped[:n_res]:
+            maxchar = maxchars[0]
+            maxchars[0] = max(maxchar, m)
+            _set_code_point(res, k, m)
+            k += 1
+
+    return k
+
+
+@register_jitable
+def _ascii_casefold_doer(data, res):
+    for idx in range(len(data)):
+        code_point = _get_code_point(data, idx)
+        _set_code_point(res, idx, _Py_TOLOWER(code_point))
 
 
-_do_casefold = register_jitable(generate_case_operation_func(
-    _Py_TOLOWER, _PyUnicode_ToFoldedFull))
+_do_casefold = register_jitable(generate_operation_func(_ascii_casefold_doer,
+                                                        _unicode_casefold_doer))
 
 
 # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L10782-L10791    # noqa: E501

From 72485b455086762dd5e8193e9112fb6505c38807 Mon Sep 17 00:00:00 2001
From: Denis <denis.smirnov@intel.com>
Date: Fri, 15 Nov 2019 15:42:03 +0300
Subject: [PATCH 19/68] Add generator of rsplit whitespace implementation

---
 numba/unicode.py | 92 +++++++++++++++++++++++++++++-------------------
 1 file changed, 55 insertions(+), 37 deletions(-)

diff --git a/numba/unicode.py b/numba/unicode.py
index 59e379baddc..053ce25ecfe 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -34,7 +34,8 @@
 from numba.errors import TypingError
 from .unicode_support import (_Py_TOUPPER, _Py_TOLOWER, _Py_UCS4,
                               _PyUnicode_ToUpperFull, _PyUnicode_ToLowerFull,
-                              _PyUnicode_ToTitleFull, _PyUnicode_IsSpace,
+                              _PyUnicode_ToTitleFull,
+                              _PyUnicode_IsSpace, _Py_ISSPACE,
                               _PyUnicode_IsCased, _PyUnicode_IsCaseIgnorable,
                               _PyUnicode_IsUppercase, _PyUnicode_IsLowercase,
                               _PyUnicode_IsTitlecase, _Py_ISLOWER, _Py_ISUPPER)
@@ -758,6 +759,56 @@ def split_whitespace_impl(a, sep=None, maxsplit=-1):
             return parts
         return split_whitespace_impl
 
+
+def generate_rsplit_whitespace_impl(isspace_func):
+    """Generate whitespace rsplit func based on either ascii or unicode"""
+
+    def rsplit_whitespace_impl(data, sep=None, maxsplit=-1):
+        # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/stringlib/split.h#L192-L240    # noqa: E501
+        if maxsplit < 0:
+            maxsplit = sys.maxsize
+
+        result = []
+        i = len(data) - 1
+        while maxsplit > 0:
+            while i >= 0:
+                code_point = _get_code_point(data, i)
+                if not isspace_func(code_point):
+                    break
+                i -= 1
+            if i < 0:
+                break
+            j = i
+            i -= 1
+            while i >= 0:
+                code_point = _get_code_point(data, i)
+                if isspace_func(code_point):
+                    break
+                i -= 1
+            result.append(data[i + 1:j + 1])
+            maxsplit -= 1
+
+        if i >= 0:
+            # Only occurs when maxsplit was reached
+            # Skip any remaining whitespace and copy to beginning of string
+            while i >= 0:
+                code_point = _get_code_point(data, i)
+                if not isspace_func(code_point):
+                    break
+                i -= 1
+            if i >= 0:
+                result.append(data[0:i + 1])
+
+        return result[::-1]
+
+    return rsplit_whitespace_impl
+
+
+unicode_rsplit_whitespace_impl = register_jitable(
+    generate_rsplit_whitespace_impl(_PyUnicode_IsSpace))
+ascii_rsplit_whitespace_impl = register_jitable(
+    generate_rsplit_whitespace_impl(_Py_ISSPACE))
+
 # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L13095-L13108    # noqa: E501
 @overload_method(types.UnicodeType, 'rsplit')
 def unicode_rsplit(data, sep=None, maxsplit=-1):
@@ -785,42 +836,9 @@ def _unicode_rsplit_check_type(ty, name, accepted):
     if sep is None or isinstance(sep, (types.NoneType, types.Omitted)):
 
         def rsplit_whitespace_impl(data, sep=None, maxsplit=-1):
-            # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/stringlib/split.h#L192-L240    # noqa: E501
-            if maxsplit < 0:
-                maxsplit = sys.maxsize
-
-            result = []
-            i = len(data) - 1
-            while maxsplit > 0:
-                while i >= 0:
-                    code_point = _get_code_point(data, i)
-                    if not _PyUnicode_IsSpace(code_point):
-                        break
-                    i -= 1
-                if i < 0:
-                    break
-                j = i
-                i -= 1
-                while i >= 0:
-                    code_point = _get_code_point(data, i)
-                    if _PyUnicode_IsSpace(code_point):
-                        break
-                    i -= 1
-                result.append(data[i + 1:j + 1])
-                maxsplit -= 1
-
-            if i >= 0:
-                # Only occurs when maxsplit was reached
-                # Skip any remaining whitespace and copy to beginning of string
-                while i >= 0:
-                    code_point = _get_code_point(data, i)
-                    if not _PyUnicode_IsSpace(code_point):
-                        break
-                    i -= 1
-                if i >= 0:
-                    result.append(data[0:i + 1])
-
-            return result[::-1]
+            if data._is_ascii:
+                return ascii_rsplit_whitespace_impl(data, sep, maxsplit)
+            return unicode_rsplit_whitespace_impl(data, sep, maxsplit)
 
         return rsplit_whitespace_impl
 

From 761196f3b9ae05306f097b20d551e3873c314baa Mon Sep 17 00:00:00 2001
From: Denis <denis.smirnov@intel.com>
Date: Fri, 15 Nov 2019 15:43:08 +0300
Subject: [PATCH 20/68] Minor fix for str.rsplit()

---
 numba/unicode.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/numba/unicode.py b/numba/unicode.py
index 053ce25ecfe..70e1080494c 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -809,6 +809,7 @@ def rsplit_whitespace_impl(data, sep=None, maxsplit=-1):
 ascii_rsplit_whitespace_impl = register_jitable(
     generate_rsplit_whitespace_impl(_Py_ISSPACE))
 
+
 # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L13095-L13108    # noqa: E501
 @overload_method(types.UnicodeType, 'rsplit')
 def unicode_rsplit(data, sep=None, maxsplit=-1):

From 1b5157d98b5d830130610e526694b72b8856672f Mon Sep 17 00:00:00 2001
From: mrubtsov <maria.rubtsova@intel.com>
Date: Mon, 18 Nov 2019 12:38:05 +0300
Subject: [PATCH 21/68] Implement str.replace

---
 docs/source/reference/pysupported.rst |  1 +
 numba/tests/test_unicode.py           | 81 +++++++++++++++++++++++++++
 numba/unicode.py                      | 52 +++++++++++++++++
 3 files changed, 134 insertions(+)

diff --git a/docs/source/reference/pysupported.rst b/docs/source/reference/pysupported.rst
index f19d3da7a64..266401d506d 100644
--- a/docs/source/reference/pysupported.rst
+++ b/docs/source/reference/pysupported.rst
@@ -187,6 +187,7 @@ The following functions, attributes and methods are currently supported:
 * ``.zfill()``
 * ``.count()``
 * ``.istitle()``
+* ``.replace()``
 * ``.rfind()``
 * ``.title()``
 
diff --git a/numba/tests/test_unicode.py b/numba/tests/test_unicode.py
index c9f49623dbd..20ecbfdb052 100644
--- a/numba/tests/test_unicode.py
+++ b/numba/tests/test_unicode.py
@@ -106,6 +106,14 @@ def rfind_with_start_end_usecase(x, y, start, end):
     return x.rfind(y, start, end)
 
 
+def replace_usecase(s, x, y):
+    return s.replace(x, y)
+
+
+def replace_with_count_usecase(s, x, y, count):
+    return s.replace(x, y, count)
+
+
 def startswith_usecase(x, y):
     return x.startswith(y)
 
@@ -908,6 +916,9 @@ def test_join(self):
         ]
 
         for sep, parts in CASES:
+            print(sep)
+            print(parts)
+            print(cfunc(sep, parts))
             self.assertEqual(pyfunc(sep, parts),
                              cfunc(sep, parts),
                              "'%s'.join('%s')?" % (sep, parts))
@@ -1332,6 +1343,76 @@ def test_lower(self):
         for s in UNICODE_EXAMPLES + [''] + extras + cpython + sigma:
             self.assertEqual(pyfunc(s), cfunc(s), msg=msg.format(s))
 
+    def test_replace(self):
+        pyfunc = replace_usecase
+        cfunc = njit(pyfunc)
+
+        CASES = [
+            ('abc', '', 'A'),
+            ('', '⚡', 'A'),
+            ('abcabc', '⚡', 'A'),
+            ('🐍⚡', '⚡', 'A'),
+            ('🐍⚡🐍', '⚡', 'A'),
+            ('abababa', 'a', 'A'),
+            ('abababa', 'b', 'A'),
+            ('abababa', 'c', 'A'),
+            ('abababa', 'ab', 'A'),
+            ('abababa', 'aba', 'A'),
+        ]
+
+        for test_str, old_str, new_str in CASES:
+            self.assertEqual(pyfunc(test_str, old_str, new_str),
+                             cfunc(test_str, old_str, new_str),
+                             "'%s'.replace('%s', '%s')?" %
+                             (test_str, old_str, new_str))
+
+    def test_replace_with_count(self):
+        pyfunc = replace_with_count_usecase
+        cfunc = njit(pyfunc)
+
+        CASES = [
+            ('abc', '', 'A'),
+            ('', '⚡', 'A'),
+            ('abcabc', '⚡', 'A'),
+            ('🐍⚡', '⚡', 'A'),
+            ('🐍⚡🐍', '⚡', 'A'),
+            ('abababa', 'a', 'A'),
+            ('abababa', 'b', 'A'),
+            ('abababa', 'c', 'A'),
+            ('abababa', 'ab', 'A'),
+            ('abababa', 'aba', 'A'),
+        ]
+
+        count_test = [-1, 1, 0, 5]
+
+        for test_str, old_str, new_str in CASES:
+            for count in count_test:
+                self.assertEqual(pyfunc(test_str, old_str, new_str, count),
+                                 cfunc(test_str, old_str, new_str, count),
+                                 "'%s'.replace('%s', '%s', '%s')?" %
+                                 (test_str, old_str, new_str, count))
+
+    def test_replace_unsupported(self):
+        def pyfunc(s, x, y, count):
+            return s.replace(x, y, count)
+
+        cfunc = njit(pyfunc)
+
+        with self.assertRaises(TypingError) as raises:
+            cfunc('ababababab', 'ba', 'qqq', 3.5)
+        msg = 'Unsupported parameters. The parametrs must be Integer.'
+        self.assertIn(msg, str(raises.exception))
+
+        with self.assertRaises(TypingError) as raises:
+            cfunc('ababababab', 0, 'qqq', 3)
+        msg = 'The object must be a UnicodeType.'
+        self.assertIn(msg, str(raises.exception))
+
+        with self.assertRaises(TypingError) as raises:
+            cfunc('ababababab', 'ba', 0, 3)
+        msg = 'The object must be a UnicodeType.'
+        self.assertIn(msg, str(raises.exception))
+
 
 @unittest.skipUnless(_py34_or_later,
                      'unicode support requires Python 3.4 or later')
diff --git a/numba/unicode.py b/numba/unicode.py
index 673b437ec5d..34831f6363b 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -1297,6 +1297,58 @@ def impl(a):
                                                _PyUnicode_IsTitlecase))
 
 
+@overload_method(types.UnicodeType, 'replace')
+def unicode_replace(s, old_str, new_str, count=-1):
+    thety = count
+    if isinstance(count, types.Omitted):
+        thety = count.value
+    elif isinstance(count, types.Optional):
+        thety = count.type
+
+    if not isinstance(thety, (int, types.Integer)):
+        raise TypingError('Unsupported parameters. The parametrs '
+                          'must be Integer. Given count: {}'.format(count))
+
+    if not isinstance(old_str, (types.UnicodeType, types.NoneType)):
+        raise TypingError('The object must be a UnicodeType.'
+                          ' Given: {}'.format(old_str))
+
+    if not isinstance(new_str, types.UnicodeType):
+        raise TypingError('The object must be a UnicodeType.'
+                          ' Given: {}'.format(new_str))
+
+    def impl(s, old_str, new_str, count=-1):
+        if count == 0:
+            return s
+        if old_str == '' or old_str is None:
+            q = list(s)
+            if count == -1:
+                str_res = new_str.join(q)
+                str_result = new_str + str_res + new_str
+                return str_result
+            i = 0
+            str_result = new_str
+            if count > len(q):
+                counter = len(q)
+            else:
+                counter = count
+            while i < counter:
+                str_result += q[i]
+                if i + 1 != counter:
+                    str_result += new_str
+                else:
+                    str_result += ''.join(q[(i + 1):])
+                i += 1
+            if count > len(q):
+                str_result += new_str
+            return str_result
+        q = s.split(old_str, count)
+        str_result = new_str.join(q)
+        return str_result
+
+    return impl
+
+
 @overload_method(types.UnicodeType, 'isupper')
 def unicode_isupper(a):
     """

From fbbf808db6af94ac18ac8c44c3499e33f63ead01 Mon Sep 17 00:00:00 2001
From: "elena.totmenina" <etotmeni@nntsatel03.inn.intel.com>
Date: Mon, 18 Nov 2019 18:50:48 +0300
Subject: [PATCH 22/68] Add functionality for str.endswith()

---
 docs/source/reference/pysupported.rst |   1 +
 numba/tests/test_unicode.py           | 117 ++++++++++++++++++++++++++
 numba/unicode.py                      |  69 +++++++++++++--
 3 files changed, 178 insertions(+), 9 deletions(-)

diff --git a/docs/source/reference/pysupported.rst b/docs/source/reference/pysupported.rst
index b947e8d32ed..e096cdb006a 100644
--- a/docs/source/reference/pysupported.rst
+++ b/docs/source/reference/pysupported.rst
@@ -192,6 +192,7 @@ The following functions, attributes and methods are currently supported:
 * ``.istitle()``
 * ``.rfind()``
 * ``.title()``
+* ``.endswith()``
 
 Additional operations as well as support for Python 2 strings / Python 3 bytes
 will be added in a future version of Numba.  Python 2 Unicode objects will
diff --git a/numba/tests/test_unicode.py b/numba/tests/test_unicode.py
index c97bc1b6f50..ee64167641b 100644
--- a/numba/tests/test_unicode.py
+++ b/numba/tests/test_unicode.py
@@ -118,6 +118,14 @@ def endswith_usecase(x, y):
     return x.endswith(y)
 
 
+def endswith_with_start_only_usecase(x, y, start):
+    return x.endswith(y, start)
+
+
+def endswith_with_start_end_usecase(x, y, start, end):
+    return x.endswith(y, start, end)
+
+
 def split_usecase(x, y):
     return x.split(y)
 
@@ -398,6 +406,115 @@ def test_endswith(self, flags=no_pyobj_flags):
                                  cfunc(a, b),
                                  '%s, %s' % (a, b))
 
+    def test_endswith_default(self):
+        pyfunc = endswith_usecase
+        cfunc = njit(pyfunc)
+
+        # Samples taken from CPython testing:
+        # https://github.com/python/cpython/blob/865c3b257fe38154a4320c7ee6afb416f665b9c2/Lib/test/string_tests.py#L1049-L1099    # noqa: E501
+        cpython_str = ['hello', 'helloworld', '']
+        cpython_subs = [
+            'he', 'hello', 'helloworld', 'ello',
+            '', 'lowo', 'lo', 'he', 'lo', 'o',
+        ]
+        extra_subs = ['hellohellohello', ' ']
+        for s in cpython_str + UNICODE_EXAMPLES:
+            default_subs = ['', 'x', s[:-2], s[3:], s, s + s]
+            for sub_str in cpython_subs + default_subs + extra_subs:
+                msg = 'Results "{}".endswith("{}") must be equal'
+                self.assertEqual(pyfunc(s, sub_str), cfunc(s, sub_str),
+                                 msg=msg.format(s, sub_str))
+
+    def test_endswith_with_start(self):
+        pyfunc = endswith_with_start_only_usecase
+        cfunc = njit(pyfunc)
+
+        # Samples taken from CPython testing:
+        # https://github.com/python/cpython/blob/865c3b257fe38154a4320c7ee6afb416f665b9c2/Lib/test/string_tests.py#L1049-L1099    # noqa: E501
+        cpython_str = ['hello', 'helloworld', '']
+        cpython_subs = [
+            'he', 'hello', 'helloworld', 'ello',
+            '', 'lowo', 'lo', 'he', 'lo', 'o',
+        ]
+        extra_subs = ['hellohellohello', ' ']
+        for s in cpython_str + UNICODE_EXAMPLES:
+            default_subs = ['', 'x', s[:-2], s[3:], s, s + s]
+            for sub_str in cpython_subs + default_subs + extra_subs:
+                for start in list(range(-20, 20)) + [None]:
+                    msg = 'Results "{}".endswith("{}", {}) must be equal'
+                    self.assertEqual(pyfunc(s, sub_str, start),
+                                     cfunc(s, sub_str, start),
+                                     msg=msg.format(s, sub_str, start))
+
+    def test_endswith_with_start_end(self):
+        pyfunc = endswith_with_start_end_usecase
+        cfunc = njit(pyfunc)
+
+        # Samples taken from CPython testing:
+        # https://github.com/python/cpython/blob/865c3b257fe38154a4320c7ee6afb416f665b9c2/Lib/test/string_tests.py#LL1049-L1099    # noqa: E501
+        cpython_str = ['hello', 'helloworld', '']
+        cpython_subs = [
+            'he', 'hello', 'helloworld', 'ello',
+            '', 'lowo', 'lo', 'he', 'lo', 'o',
+        ]
+        extra_subs = ['hellohellohello', ' ']
+        for s in cpython_str + UNICODE_EXAMPLES:
+            default_subs = ['', 'x', s[:-2], s[3:], s, s + s]
+            for sub_str in cpython_subs + default_subs + extra_subs:
+                for start in list(range(-20, 20)) + [None]:
+                    for end in list(range(-20, 20)) + [None]:
+                        msg = 'Results "{}".endswith("{}", {}, {})\
+                               must be equal'
+                        self.assertEqual(pyfunc(s, sub_str, start, end),
+                                         cfunc(s, sub_str, start, end),
+                                         msg=msg.format(s, sub_str, start, end))
+
+    def test_endswith_tuple(self):
+        pyfunc = endswith_usecase
+        cfunc = njit(pyfunc)
+
+        # Samples taken from CPython testing:
+        # https://github.com/python/cpython/blob/865c3b257fe38154a4320c7ee6afb416f665b9c2/Lib/test/string_tests.py#L1049-L1099    # noqa: E501
+        cpython_str = ['hello', 'helloworld', '']
+        cpython_subs = [
+            'he', 'hello', 'helloworld', 'ello',
+            '', 'lowo', 'lo', 'he', 'lo', 'o',
+        ]
+        extra_subs = ['hellohellohello', ' ']
+        for s in cpython_str + UNICODE_EXAMPLES:
+            default_subs = ['', 'x', s[:-2], s[3:], s, s + s]
+            for sub_str in cpython_subs + default_subs + extra_subs:
+                msg = 'Results "{}".endswith({}) must be equal'
+                tuple_subs = (sub_str, 'lo')
+                self.assertEqual(pyfunc(s, tuple_subs),
+                                 cfunc(s, tuple_subs),
+                                 msg=msg.format(s, tuple_subs))
+
+    def test_endswith_tuple_args(self):
+        pyfunc = endswith_with_start_end_usecase
+        cfunc = njit(pyfunc)
+
+        # Samples taken from CPython testing:
+        # https://github.com/python/cpython/blob/865c3b257fe38154a4320c7ee6afb416f665b9c2/Lib/test/string_tests.py#L1049-L1099    # noqa: E501
+        cpython_str = ['hello', 'helloworld', '']
+        cpython_subs = [
+            'he', 'hello', 'helloworld', 'ello',
+            '', 'lowo', 'lo', 'he', 'lo', 'o',
+        ]
+        extra_subs = ['hellohellohello', ' ']
+        for s in cpython_str + UNICODE_EXAMPLES:
+            default_subs = ['', 'x', s[:-2], s[3:], s, s + s]
+            for sub_str in cpython_subs + default_subs + extra_subs:
+                for start in list(range(-20, 20)) + [None]:
+                    for end in list(range(-20, 20)) + [None]:
+                        msg = 'Results "{}".endswith("{}", {}, {})\
+                               must be equal'
+                        tuple_subs = (sub_str, 'lo')
+                        self.assertEqual(pyfunc(s, tuple_subs, start, end),
+                                         cfunc(s, tuple_subs, start, end),
+                                         msg=msg.format(s, tuple_subs,
+                                                        start, end))
+
     def test_in(self, flags=no_pyobj_flags):
         pyfunc = in_usecase
         cfunc = njit(pyfunc)
diff --git a/numba/unicode.py b/numba/unicode.py
index 88d27610fe4..0eccfd8abfc 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -660,18 +660,69 @@ def startswith_impl(a, b):
         return startswith_impl
 
 
+# https://github.com/python/cpython/blob/201c8f79450628241574fba940e08107178dc3a5/Objects/unicodeobject.c#L9342-L9354    # noqa: E501
+@register_jitable
+def _adjust_indices(length, start, end):
+    if end > length:
+        end = length
+    if end < 0:
+        end += length
+        if end < 0:
+            end = 0
+    if start < 0:
+        start += length
+        if start < 0:
+            start = 0
+
+    return start, end
+
+
 @overload_method(types.UnicodeType, 'endswith')
-def unicode_endswith(a, b):
-    if isinstance(b, types.UnicodeType):
-        def endswith_impl(a, b):
-            a_offset = len(a) - len(b)
-            if a_offset < 0:
+def unicode_endswith(s, substr, start=None, end=None):
+    if not (start is None or isinstance(start, (types.Omitted,
+                                                types.Integer,
+                                                types.NoneType))):
+        raise TypingError('The arg must be a Integer or None')
+
+    if not (end is None or isinstance(end, (types.Omitted,
+                                            types.Integer,
+                                            types.NoneType))):
+        raise TypingError('The arg must be a Integer or None')
+
+    if isinstance(substr, (types.Tuple, types.UniTuple)):
+        def endswith_impl(s, substr, start=None, end=None):
+            for item in substr:
+                if s.endswith(item, start, end) is True:
+                    return True
+
+            return False
+        return endswith_impl
+
+    if isinstance(substr, types.UnicodeType):
+        def endswith_impl(s, substr, start=None, end=None):
+            length = len(s)
+            sub_length = len(substr)
+            if start is None:
+                start = 0
+            if end is None:
+                end = length
+
+            start, end = _adjust_indices(length, start, end)
+            if end - start < sub_length:
                 return False
-            return _cmp_region(a, a_offset, b, 0, len(b)) == 0
+
+            if sub_length == 0:
+                return True
+
+            s = s[start:end]
+            offset = len(s) - sub_length
+
+            return _cmp_region(s, offset, substr, 0, sub_length) == 0
         return endswith_impl
-    if isinstance(b, types.UnicodeCharSeq):
-        def endswith_impl(a, b):
-            return a.endswith(str(b))
+
+    if isinstance(substr, types.UnicodeCharSeq):
+        def endswith_impl(s, substr, start=None, end=None):
+            return s.endswith(str(substr), start, end)
         return endswith_impl
 
 

From 0f3e6953bc657e2c199ac8736bc72f38c2e3f4c7 Mon Sep 17 00:00:00 2001
From: mrubtsov <maria.rubtsova@intel.com>
Date: Wed, 13 Nov 2019 16:37:21 +0300
Subject: [PATCH 23/68] Implement str.isdecimal

---
 docs/source/reference/pysupported.rst |  1 +
 numba/tests/test_unicode.py           | 17 +++++++++++++++++
 numba/unicode.py                      | 26 +++++++++++++++++++++++++-
 numba/unicode_support.py              | 18 ++++++++++++------
 4 files changed, 55 insertions(+), 7 deletions(-)

diff --git a/docs/source/reference/pysupported.rst b/docs/source/reference/pysupported.rst
index b947e8d32ed..fa04fcb58ab 100644
--- a/docs/source/reference/pysupported.rst
+++ b/docs/source/reference/pysupported.rst
@@ -184,6 +184,7 @@ The following functions, attributes and methods are currently supported:
 * ``.strip()``
 * ``.isupper()``
 * ``.upper()``
+* ``.isdecimal()``
 * ``.islower()``
 * ``.lower()``
 * ``.zfill()``
diff --git a/numba/tests/test_unicode.py b/numba/tests/test_unicode.py
index c97bc1b6f50..a77949dc615 100644
--- a/numba/tests/test_unicode.py
+++ b/numba/tests/test_unicode.py
@@ -1408,6 +1408,23 @@ def test_lower(self):
         for s in UNICODE_EXAMPLES + [''] + extras + cpython + sigma:
             self.assertEqual(pyfunc(s), cfunc(s), msg=msg.format(s))
 
+    def test_isdecimal(self):
+        def pyfunc(x):
+            return x.isdecimal()
+
+        cfunc = njit(pyfunc)
+        # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Lib/test/test_unicode.py#L646-L662    # noqa: E501
+        cpython = ['', 'a', '0', '\u2460', '\xbc', '\u0660', '0123456789', '0123456789a', '\U00010401', '\U00010427',
+                   '\U00010429', '\U0001044E', '\U0001F40D', '\U0001F46F', '\U00011065', '\U0001F107', '\U0001D7F6',
+                   '\U00011066', '\U000104A0']
+        # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Lib/test/test_unicode.py#L742-L749    # noqa: E501
+        cpython_extras = ['\uD800', '\uDFFF', '\uD800\uD800', '\uDFFF\uDFFF', 'a\uD800b\uDFFF', 'a\uDFFFb\uD800',
+                          'a\uD800b\uDFFFa', 'a\uDFFFb\uD800a']
+
+        msg = 'Results of "{}".isspace() must be equal'
+        for s in UNICODE_EXAMPLES + [''] + cpython + cpython_extras:
+            self.assertEqual(pyfunc(s), cfunc(s), msg=msg.format(s))
+
 
 @unittest.skipUnless(_py34_or_later,
                      'unicode support requires Python 3.4 or later')
diff --git a/numba/unicode.py b/numba/unicode.py
index 88d27610fe4..5e6a5be7e74 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -37,7 +37,8 @@
                               _PyUnicode_IsXidStart, _PyUnicode_IsXidContinue,
                               _PyUnicode_IsCased, _PyUnicode_IsCaseIgnorable,
                               _PyUnicode_IsUppercase, _PyUnicode_IsLowercase,
-                              _PyUnicode_IsTitlecase, _Py_ISLOWER, _Py_ISUPPER)
+                              _PyUnicode_IsTitlecase, _Py_ISLOWER, _Py_ISUPPER,
+                              _PyUnicode_IsDecimalDigit)
 
 # DATA MODEL
 
@@ -1410,6 +1411,29 @@ def impl(data):
     return impl
 
 
+# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L12017-L12045    # noqa: E501
+@overload_method(types.UnicodeType, 'isdecimal')
+def unicode_isdecimal(data):
+    """Implements UnicodeType.isdecimal()"""
+
+    def impl(data):
+        length = len(data)
+
+        if length == 1:
+            return _PyUnicode_IsDecimalDigit(_get_code_point(data, 0))
+
+        if length == 0:
+            return False
+
+        for i in range(length):
+            if not _PyUnicode_IsDecimalDigit(_get_code_point(data, i)):
+                return False
+
+        return True
+
+    return impl
+
+
 @overload_method(types.UnicodeType, 'istitle')
 def unicode_istitle(s):
     """
diff --git a/numba/unicode_support.py b/numba/unicode_support.py
index f8b7b7a6c9c..44c63f68d1e 100644
--- a/numba/unicode_support.py
+++ b/numba/unicode_support.py
@@ -201,12 +201,10 @@ def _PyUnicode_IsXidContinue(ch):
 
 @register_jitable
 def _PyUnicode_ToDecimalDigit(ch):
-    raise NotImplementedError
-
-
-@register_jitable
-def _PyUnicode_IsDecimalDigit(ch):
-    raise NotImplementedError
+    ctype = _PyUnicode_gettyperecord(ch)
+    if ctype.flags & _PyUnicode_TyperecordMasks.DECIMAL_MASK:
+        return ctype.decimal
+    return -1
 
 
 @register_jitable
@@ -315,6 +313,14 @@ def _PyUnicode_IsCaseIgnorable(ch):
     return ctype.flags & _PyUnicode_TyperecordMasks.CASE_IGNORABLE_MASK != 0
 
 
+# From: https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodectype.c#L106-L118    # noqa: E501
+@register_jitable
+def _PyUnicode_IsDecimalDigit(ch):
+    if _PyUnicode_ToDecimalDigit(ch) < 0:
+        return 0
+    return 1
+
+
 @register_jitable
 def _PyUnicode_IsSpace(ch):
     ctype = _PyUnicode_gettyperecord(ch)

From 4ae54efd657a0ffd699af1102f69856289845cf4 Mon Sep 17 00:00:00 2001
From: mrubtsov <maria.rubtsova@intel.com>
Date: Thu, 14 Nov 2019 09:28:59 +0300
Subject: [PATCH 24/68] Implement str.isdecimal

---
 numba/tests/test_unicode.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/numba/tests/test_unicode.py b/numba/tests/test_unicode.py
index a77949dc615..09f531ae27b 100644
--- a/numba/tests/test_unicode.py
+++ b/numba/tests/test_unicode.py
@@ -1414,12 +1414,14 @@ def pyfunc(x):
 
         cfunc = njit(pyfunc)
         # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Lib/test/test_unicode.py#L646-L662    # noqa: E501
-        cpython = ['', 'a', '0', '\u2460', '\xbc', '\u0660', '0123456789', '0123456789a', '\U00010401', '\U00010427',
-                   '\U00010429', '\U0001044E', '\U0001F40D', '\U0001F46F', '\U00011065', '\U0001F107', '\U0001D7F6',
-                   '\U00011066', '\U000104A0']
+        cpython = ['', 'a', '0', '\u2460', '\xbc', '\u0660', '0123456789',
+                   '0123456789a', '\U00010401', '\U00010427', '\U00010429',
+                   '\U0001044E', '\U0001F40D', '\U0001F46F', '\U00011065',
+                   '\U0001F107', '\U0001D7F6', '\U00011066', '\U000104A0']
         # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Lib/test/test_unicode.py#L742-L749    # noqa: E501
-        cpython_extras = ['\uD800', '\uDFFF', '\uD800\uD800', '\uDFFF\uDFFF', 'a\uD800b\uDFFF', 'a\uDFFFb\uD800',
-                          'a\uD800b\uDFFFa', 'a\uDFFFb\uD800a']
+        cpython_extras = ['\uD800', '\uDFFF', '\uD800\uD800', '\uDFFF\uDFFF',
+                          'a\uD800b\uDFFF', 'a\uDFFFb\uD800', 'a\uD800b\uDFFFa',
+                          'a\uDFFFb\uD800a']
 
         msg = 'Results of "{}".isspace() must be equal'
         for s in UNICODE_EXAMPLES + [''] + cpython + cpython_extras:

From 57ad3d8f3c9b486891d1f59acd2a242224f43021 Mon Sep 17 00:00:00 2001
From: mrubtsov <maria.rubtsova@intel.com>
Date: Mon, 18 Nov 2019 13:02:10 +0300
Subject: [PATCH 25/68] Implement str.isdecimal

---
 numba/tests/test_unicode.py | 2 +-
 numba/unicode.py            | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/numba/tests/test_unicode.py b/numba/tests/test_unicode.py
index 09f531ae27b..07712b72c4b 100644
--- a/numba/tests/test_unicode.py
+++ b/numba/tests/test_unicode.py
@@ -1423,7 +1423,7 @@ def pyfunc(x):
                           'a\uD800b\uDFFF', 'a\uDFFFb\uD800', 'a\uD800b\uDFFFa',
                           'a\uDFFFb\uD800a']
 
-        msg = 'Results of "{}".isspace() must be equal'
+        msg = 'Results of "{}".isdecimal() must be equal'
         for s in UNICODE_EXAMPLES + [''] + cpython + cpython_extras:
             self.assertEqual(pyfunc(s), cfunc(s), msg=msg.format(s))
 
diff --git a/numba/unicode.py b/numba/unicode.py
index 5e6a5be7e74..5390a020aea 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -953,6 +953,10 @@ def unicode_isidentifier(data):
 
     def impl(data):
         length = len(data)
+
+        if length == 1:
+            return _PyUnicode_IsDecimalDigit(_get_code_point(data, 0))
+
         if length == 0:
             return False
 

From ddd87c26cc28ed09ef8139b7b980d601d3aa39f3 Mon Sep 17 00:00:00 2001
From: mrubtsov <maria.rubtsova@intel.com>
Date: Wed, 13 Nov 2019 17:17:06 +0300
Subject: [PATCH 26/68] Implement str.isdigit

---
 docs/source/reference/pysupported.rst |  1 +
 numba/tests/test_unicode.py           | 16 ++++++++++++++++
 numba/unicode.py                      | 27 ++++++++++++++++++++++++++-
 numba/unicode_support.py              | 18 ++++++++++++------
 4 files changed, 55 insertions(+), 7 deletions(-)

diff --git a/docs/source/reference/pysupported.rst b/docs/source/reference/pysupported.rst
index b947e8d32ed..5fba440083c 100644
--- a/docs/source/reference/pysupported.rst
+++ b/docs/source/reference/pysupported.rst
@@ -184,6 +184,7 @@ The following functions, attributes and methods are currently supported:
 * ``.strip()``
 * ``.isupper()``
 * ``.upper()``
+* ``.isdigit()``
 * ``.islower()``
 * ``.lower()``
 * ``.zfill()``
diff --git a/numba/tests/test_unicode.py b/numba/tests/test_unicode.py
index c97bc1b6f50..5ce22149f8c 100644
--- a/numba/tests/test_unicode.py
+++ b/numba/tests/test_unicode.py
@@ -1408,6 +1408,22 @@ def test_lower(self):
         for s in UNICODE_EXAMPLES + [''] + extras + cpython + sigma:
             self.assertEqual(pyfunc(s), cfunc(s), msg=msg.format(s))
 
+    def test_isdigit(self):
+        def pyfunc(x):
+            return x.isdigit()
+
+        cfunc = njit(pyfunc)
+        # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Lib/test/test_unicode.py#L664-L674    # noqa: E501
+        cpython = ['\u2460', '\xbc', '\u0660', '\U00010401', '\U00010427', '\U00010429', '\U0001044E', '\U0001F40D',
+                   '\U0001F46F', '\U00011065', '\U0001D7F6', '\U00011066', '\U000104A0', '\U0001F107']
+        # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Lib/test/test_unicode.py#L742-L749    # noqa: E501
+        cpython_extras = ['\uD800', '\uDFFF', '\uD800\uD800', '\uDFFF\uDFFF', 'a\uD800b\uDFFF', 'a\uDFFFb\uD800',
+                          'a\uD800b\uDFFFa', 'a\uDFFFb\uD800a']
+
+        msg = 'Results of "{}".isspace() must be equal'
+        for s in UNICODE_EXAMPLES + [''] + cpython + cpython_extras:
+            self.assertEqual(pyfunc(s), cfunc(s), msg=msg.format(s))
+
 
 @unittest.skipUnless(_py34_or_later,
                      'unicode support requires Python 3.4 or later')
diff --git a/numba/unicode.py b/numba/unicode.py
index 88d27610fe4..e094db70fbf 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -37,7 +37,8 @@
                               _PyUnicode_IsXidStart, _PyUnicode_IsXidContinue,
                               _PyUnicode_IsCased, _PyUnicode_IsCaseIgnorable,
                               _PyUnicode_IsUppercase, _PyUnicode_IsLowercase,
-                              _PyUnicode_IsTitlecase, _Py_ISLOWER, _Py_ISUPPER)
+                              _PyUnicode_IsTitlecase, _Py_ISLOWER, _Py_ISUPPER,
+                              _PyUnicode_IsDigit)
 
 # DATA MODEL
 
@@ -1410,6 +1411,30 @@ def impl(data):
     return impl
 
 
+# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L12056-L12085    # noqa: E501
+@overload_method(types.UnicodeType, 'isdigit')
+def unicode_isdigit(data):
+    """Implements UnicodeType.isdigit()"""
+
+    def impl(data):
+        length = len(data)
+
+        if length == 1:
+            ch = _get_code_point(data, 0)
+            return _PyUnicode_IsDigit(ch)
+
+        if length == 0:
+            return False
+
+        for i in range(length):
+            if not _PyUnicode_IsDigit(_get_code_point(data, i)):
+                return False
+
+        return True
+
+    return impl
+
+
 @overload_method(types.UnicodeType, 'istitle')
 def unicode_istitle(s):
     """
diff --git a/numba/unicode_support.py b/numba/unicode_support.py
index f8b7b7a6c9c..824b73eef59 100644
--- a/numba/unicode_support.py
+++ b/numba/unicode_support.py
@@ -211,12 +211,10 @@ def _PyUnicode_IsDecimalDigit(ch):
 
 @register_jitable
 def _PyUnicode_ToDigit(ch):
-    raise NotImplementedError
-
-
-@register_jitable
-def _PyUnicode_IsDigit(ch):
-    raise NotImplementedError
+    ctype = _PyUnicode_gettyperecord(ch)
+    if ctype.flags & _PyUnicode_TyperecordMasks.DIGIT_MASK:
+        return ctype.digit
+    return -1
 
 
 @register_jitable
@@ -315,6 +313,14 @@ def _PyUnicode_IsCaseIgnorable(ch):
     return ctype.flags & _PyUnicode_TyperecordMasks.CASE_IGNORABLE_MASK != 0
 
 
+# From: https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodectype.c#L123-L135    # noqa: E501
+@register_jitable
+def _PyUnicode_IsDigit(ch):
+    if _PyUnicode_ToDigit(ch) < 0:
+        return 0
+    return 1
+
+
 @register_jitable
 def _PyUnicode_IsSpace(ch):
     ctype = _PyUnicode_gettyperecord(ch)

From 22acdb89707967ed8bad5e15bdcc6f49f8834630 Mon Sep 17 00:00:00 2001
From: mrubtsov <maria.rubtsova@intel.com>
Date: Thu, 14 Nov 2019 10:10:05 +0300
Subject: [PATCH 27/68] Implement str.isdigit

---
 numba/tests/test_unicode.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/numba/tests/test_unicode.py b/numba/tests/test_unicode.py
index 5ce22149f8c..e4c17a42e7a 100644
--- a/numba/tests/test_unicode.py
+++ b/numba/tests/test_unicode.py
@@ -1414,10 +1414,13 @@ def pyfunc(x):
 
         cfunc = njit(pyfunc)
         # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Lib/test/test_unicode.py#L664-L674    # noqa: E501
-        cpython = ['\u2460', '\xbc', '\u0660', '\U00010401', '\U00010427', '\U00010429', '\U0001044E', '\U0001F40D',
-                   '\U0001F46F', '\U00011065', '\U0001D7F6', '\U00011066', '\U000104A0', '\U0001F107']
+        cpython = ['\u2460', '\xbc', '\u0660', '\U00010401', '\U00010427',
+                   '\U00010429', '\U0001044E', '\U0001F40D', '\U0001F46F',
+                   '\U00011065', '\U0001D7F6', '\U00011066', '\U000104A0',
+                   '\U0001F107']
         # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Lib/test/test_unicode.py#L742-L749    # noqa: E501
-        cpython_extras = ['\uD800', '\uDFFF', '\uD800\uD800', '\uDFFF\uDFFF', 'a\uD800b\uDFFF', 'a\uDFFFb\uD800',
+        cpython_extras = ['\uD800', '\uDFFF', '\uD800\uD800', '\uDFFF\uDFFF',
+                          'a\uD800b\uDFFF', 'a\uDFFFb\uD800',
                           'a\uD800b\uDFFFa', 'a\uDFFFb\uD800a']
 
         msg = 'Results of "{}".isspace() must be equal'

From 8bfb184d0db12e607bedf7b232be531aa49e9ace Mon Sep 17 00:00:00 2001
From: mrubtsov <maria.rubtsova@intel.com>
Date: Mon, 18 Nov 2019 13:16:43 +0300
Subject: [PATCH 28/68] Implement str.isdigit

---
 numba/tests/test_unicode.py | 2 +-
 numba/unicode.py            | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/numba/tests/test_unicode.py b/numba/tests/test_unicode.py
index e4c17a42e7a..0b424f1e67f 100644
--- a/numba/tests/test_unicode.py
+++ b/numba/tests/test_unicode.py
@@ -1423,7 +1423,7 @@ def pyfunc(x):
                           'a\uD800b\uDFFF', 'a\uDFFFb\uD800',
                           'a\uD800b\uDFFFa', 'a\uDFFFb\uD800a']
 
-        msg = 'Results of "{}".isspace() must be equal'
+        msg = 'Results of "{}".isdigit() must be equal'
         for s in UNICODE_EXAMPLES + [''] + cpython + cpython_extras:
             self.assertEqual(pyfunc(s), cfunc(s), msg=msg.format(s))
 
diff --git a/numba/unicode.py b/numba/unicode.py
index e094db70fbf..49fa172c748 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -953,6 +953,11 @@ def unicode_isidentifier(data):
 
     def impl(data):
         length = len(data)
+
+        if length == 1:
+            ch = _get_code_point(data, 0)
+            return _PyUnicode_IsDigit(ch)
+
         if length == 0:
             return False
 

From 1bb174672cbbf77f4286c4dd1872bab11f96abb7 Mon Sep 17 00:00:00 2001
From: mrubtsov <maria.rubtsova@intel.com>
Date: Mon, 18 Nov 2019 13:31:01 +0300
Subject: [PATCH 29/68] Implement str.isdigit

---
 numba/unicode_support.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/numba/unicode_support.py b/numba/unicode_support.py
index 824b73eef59..7e11461530a 100644
--- a/numba/unicode_support.py
+++ b/numba/unicode_support.py
@@ -209,6 +209,7 @@ def _PyUnicode_IsDecimalDigit(ch):
     raise NotImplementedError
 
 
+# From: https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodectype.c#L123-L1128  # noqa: E501
 @register_jitable
 def _PyUnicode_ToDigit(ch):
     ctype = _PyUnicode_gettyperecord(ch)

From 57007309348813761271eb9ad16529d952364cc0 Mon Sep 17 00:00:00 2001
From: mrubtsov <maria.rubtsova@intel.com>
Date: Tue, 19 Nov 2019 09:27:14 +0300
Subject: [PATCH 30/68] Implement str.isdecimal

---
 numba/unicode.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/numba/unicode.py b/numba/unicode.py
index 5390a020aea..5e6a5be7e74 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -953,10 +953,6 @@ def unicode_isidentifier(data):
 
     def impl(data):
         length = len(data)
-
-        if length == 1:
-            return _PyUnicode_IsDecimalDigit(_get_code_point(data, 0))
-
         if length == 0:
             return False
 

From 53171aa6b9c4173fc590e76230b0036aa75b9aa6 Mon Sep 17 00:00:00 2001
From: mrubtsov <maria.rubtsova@intel.com>
Date: Tue, 19 Nov 2019 09:30:01 +0300
Subject: [PATCH 31/68] Implement str.isdigit

---
 numba/unicode.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/numba/unicode.py b/numba/unicode.py
index 49fa172c748..e094db70fbf 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -953,11 +953,6 @@ def unicode_isidentifier(data):
 
     def impl(data):
         length = len(data)
-
-        if length == 1:
-            ch = _get_code_point(data, 0)
-            return _PyUnicode_IsDigit(ch)
-
         if length == 0:
             return False
 

From 77f8f0fa07b917d695a2a3c745d6bee8d4669c7f Mon Sep 17 00:00:00 2001
From: mrubtsov <maria.rubtsova@intel.com>
Date: Wed, 13 Nov 2019 17:49:34 +0300
Subject: [PATCH 32/68] Implement str.isnumeric

---
 docs/source/reference/pysupported.rst |  1 +
 numba/tests/test_unicode.py           | 18 ++++++++++++++++++
 numba/unicode.py                      | 26 +++++++++++++++++++++++++-
 numba/unicode_support.py              |  3 ++-
 4 files changed, 46 insertions(+), 2 deletions(-)

diff --git a/docs/source/reference/pysupported.rst b/docs/source/reference/pysupported.rst
index b947e8d32ed..783bcf178d4 100644
--- a/docs/source/reference/pysupported.rst
+++ b/docs/source/reference/pysupported.rst
@@ -184,6 +184,7 @@ The following functions, attributes and methods are currently supported:
 * ``.strip()``
 * ``.isupper()``
 * ``.upper()``
+* ``.isnumeric()``
 * ``.islower()``
 * ``.lower()``
 * ``.zfill()``
diff --git a/numba/tests/test_unicode.py b/numba/tests/test_unicode.py
index c97bc1b6f50..1f52230b872 100644
--- a/numba/tests/test_unicode.py
+++ b/numba/tests/test_unicode.py
@@ -1409,6 +1409,24 @@ def test_lower(self):
             self.assertEqual(pyfunc(s), cfunc(s), msg=msg.format(s))
 
 
+    def test_isnumeric(self):
+        def pyfunc(x):
+            return x.isnumeric()
+
+        cfunc = njit(pyfunc)
+        # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Lib/test/test_unicode.py#L676-L693    # noqa: E501
+        cpython = ['', 'a', '0', '\u2460', '\xbc', '\u0660', '0123456789', '0123456789a', '\U00010401', '\U00010427',
+                   '\U00010429', '\U0001044E', '\U0001F40D', '\U0001F46F', '\U00011065', '\U0001D7F6', '\U00011066',
+                   '\U000104A0', '\U0001F107']
+        # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Lib/test/test_unicode.py#L742-L749    # noqa: E501
+        cpython_extras = ['\uD800', '\uDFFF', '\uD800\uD800', '\uDFFF\uDFFF', 'a\uD800b\uDFFF', 'a\uDFFFb\uD800',
+                          'a\uD800b\uDFFFa', 'a\uDFFFb\uD800a']
+
+        msg = 'Results of "{}".isspace() must be equal'
+        for s in UNICODE_EXAMPLES + [''] + cpython + cpython_extras:
+            self.assertEqual(pyfunc(s), cfunc(s), msg=msg.format(s))
+
+
 @unittest.skipUnless(_py34_or_later,
                      'unicode support requires Python 3.4 or later')
 class TestUnicodeInTuple(BaseTest):
diff --git a/numba/unicode.py b/numba/unicode.py
index 88d27610fe4..d6ed9a53004 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -37,7 +37,8 @@
                               _PyUnicode_IsXidStart, _PyUnicode_IsXidContinue,
                               _PyUnicode_IsCased, _PyUnicode_IsCaseIgnorable,
                               _PyUnicode_IsUppercase, _PyUnicode_IsLowercase,
-                              _PyUnicode_IsTitlecase, _Py_ISLOWER, _Py_ISUPPER)
+                              _PyUnicode_IsTitlecase, _Py_ISLOWER, _Py_ISUPPER,
+                              _PyUnicode_IsNumeric,)
 
 # DATA MODEL
 
@@ -1410,6 +1411,29 @@ def impl(data):
     return impl
 
 
+# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L12096-L12124    # noqa: E501
+@overload_method(types.UnicodeType, 'isnumeric')
+def unicode_isnumeric(data):
+    """Implements UnicodeType.isnumeric()"""
+
+    def impl(data):
+        length = len(data)
+
+        if length == 1:
+            return _PyUnicode_IsNumeric(_get_code_point(data, 0))
+
+        if length == 0:
+            return False
+
+        for i in range(length):
+            if not _PyUnicode_IsNumeric(_get_code_point(data, i)):
+                return False
+
+        return True
+
+    return impl
+
+
 @overload_method(types.UnicodeType, 'istitle')
 def unicode_istitle(s):
     """
diff --git a/numba/unicode_support.py b/numba/unicode_support.py
index f8b7b7a6c9c..c00da77c248 100644
--- a/numba/unicode_support.py
+++ b/numba/unicode_support.py
@@ -221,7 +221,8 @@ def _PyUnicode_IsDigit(ch):
 
 @register_jitable
 def _PyUnicode_IsNumeric(ch):
-    raise NotImplementedError
+    ctype = _PyUnicode_gettyperecord(ch)
+    return ctype.flags & _PyUnicode_TyperecordMasks.NUMERIC_MASK != 0
 
 
 @register_jitable

From ee4d8bac48fa54475c4313717da139447e1e87e4 Mon Sep 17 00:00:00 2001
From: mrubtsov <maria.rubtsova@intel.com>
Date: Wed, 13 Nov 2019 18:07:04 +0300
Subject: [PATCH 33/68] change

---
 numba/tests/test_unicode.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/numba/tests/test_unicode.py b/numba/tests/test_unicode.py
index 1f52230b872..284e4374131 100644
--- a/numba/tests/test_unicode.py
+++ b/numba/tests/test_unicode.py
@@ -1415,12 +1415,12 @@ def pyfunc(x):
 
         cfunc = njit(pyfunc)
         # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Lib/test/test_unicode.py#L676-L693    # noqa: E501
-        cpython = ['', 'a', '0', '\u2460', '\xbc', '\u0660', '0123456789', '0123456789a', '\U00010401', '\U00010427',
-                   '\U00010429', '\U0001044E', '\U0001F40D', '\U0001F46F', '\U00011065', '\U0001D7F6', '\U00011066',
-                   '\U000104A0', '\U0001F107']
+        cpython = ['', 'a', '0', '\u2460', '\xbc', '\u0660', '0123456789', '0123456789a', '\U00010401',
+                   '\U00010427', '\U00010429', '\U0001044E', '\U0001F40D', '\U0001F46F', '\U00011065',
+                   '\U0001D7F6', '\U00011066', '\U000104A0', '\U0001F107']
         # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Lib/test/test_unicode.py#L742-L749    # noqa: E501
-        cpython_extras = ['\uD800', '\uDFFF', '\uD800\uD800', '\uDFFF\uDFFF', 'a\uD800b\uDFFF', 'a\uDFFFb\uD800',
-                          'a\uD800b\uDFFFa', 'a\uDFFFb\uD800a']
+        cpython_extras = ['\uD800', '\uDFFF', '\uD800\uD800', '\uDFFF\uDFFF', 'a\uD800b\uDFFF',
+                          'a\uDFFFb\uD800', 'a\uD800b\uDFFFa', 'a\uDFFFb\uD800a']
 
         msg = 'Results of "{}".isspace() must be equal'
         for s in UNICODE_EXAMPLES + [''] + cpython + cpython_extras:

From 7c7025124bf3395160f76947c15b90f2452d51b2 Mon Sep 17 00:00:00 2001
From: Rubtsowa <36762665+Rubtsowa@users.noreply.github.com>
Date: Wed, 13 Nov 2019 18:28:18 +0300
Subject: [PATCH 34/68] Update test_unicode.py

---
 numba/tests/test_unicode.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/numba/tests/test_unicode.py b/numba/tests/test_unicode.py
index 284e4374131..daa5de4507a 100644
--- a/numba/tests/test_unicode.py
+++ b/numba/tests/test_unicode.py
@@ -1415,12 +1415,14 @@ def pyfunc(x):
 
         cfunc = njit(pyfunc)
         # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Lib/test/test_unicode.py#L676-L693    # noqa: E501
-        cpython = ['', 'a', '0', '\u2460', '\xbc', '\u0660', '0123456789', '0123456789a', '\U00010401',
-                   '\U00010427', '\U00010429', '\U0001044E', '\U0001F40D', '\U0001F46F', '\U00011065',
-                   '\U0001D7F6', '\U00011066', '\U000104A0', '\U0001F107']
+        cpython = ['', 'a', '0', '\u2460', '\xbc', '\u0660', '0123456789', '0123456789a', 
+                   '\U00010401', '\U00010427', '\U00010429', '\U0001044E', '\U0001F40D', 
+                   '\U0001F46F', '\U00011065', '\U0001D7F6', '\U00011066', '\U000104A0',
+                   '\U0001F107']
         # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Lib/test/test_unicode.py#L742-L749    # noqa: E501
-        cpython_extras = ['\uD800', '\uDFFF', '\uD800\uD800', '\uDFFF\uDFFF', 'a\uD800b\uDFFF',
-                          'a\uDFFFb\uD800', 'a\uD800b\uDFFFa', 'a\uDFFFb\uD800a']
+        cpython_extras = ['\uD800', '\uDFFF', '\uD800\uD800', '\uDFFF\uDFFF', 
+                          'a\uD800b\uDFFF', 'a\uDFFFb\uD800', 'a\uD800b\uDFFFa',
+                          'a\uDFFFb\uD800a']
 
         msg = 'Results of "{}".isspace() must be equal'
         for s in UNICODE_EXAMPLES + [''] + cpython + cpython_extras:

From cd2ae60a846916ae72c2b9280c97701340cb9ec6 Mon Sep 17 00:00:00 2001
From: Rubtsowa <36762665+Rubtsowa@users.noreply.github.com>
Date: Wed, 13 Nov 2019 19:22:21 +0300
Subject: [PATCH 35/68] Update test_unicode.py

---
 numba/tests/test_unicode.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/numba/tests/test_unicode.py b/numba/tests/test_unicode.py
index daa5de4507a..9746b379c72 100644
--- a/numba/tests/test_unicode.py
+++ b/numba/tests/test_unicode.py
@@ -1415,10 +1415,10 @@ def pyfunc(x):
 
         cfunc = njit(pyfunc)
         # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Lib/test/test_unicode.py#L676-L693    # noqa: E501
-        cpython = ['', 'a', '0', '\u2460', '\xbc', '\u0660', '0123456789', '0123456789a', 
-                   '\U00010401', '\U00010427', '\U00010429', '\U0001044E', '\U0001F40D', 
-                   '\U0001F46F', '\U00011065', '\U0001D7F6', '\U00011066', '\U000104A0',
-                   '\U0001F107']
+        cpython = ['', 'a', '0', '\u2460', '\xbc', '\u0660', '0123456789', 
+                   '0123456789a', '\U00010401', '\U00010427', '\U00010429',
+                   '\U0001044E', '\U0001F40D', '\U0001F46F', '\U00011065',
+                   '\U0001D7F6', '\U00011066', '\U000104A0', '\U0001F107']
         # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Lib/test/test_unicode.py#L742-L749    # noqa: E501
         cpython_extras = ['\uD800', '\uDFFF', '\uD800\uD800', '\uDFFF\uDFFF', 
                           'a\uD800b\uDFFF', 'a\uDFFFb\uD800', 'a\uD800b\uDFFFa',

From 1680025a4ded4ba9ec8d86689ad20bacad4e7714 Mon Sep 17 00:00:00 2001
From: mrubtsov <maria.rubtsova@intel.com>
Date: Thu, 14 Nov 2019 10:04:28 +0300
Subject: [PATCH 36/68] Implement str.isnumeric

---
 numba/tests/test_unicode.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/numba/tests/test_unicode.py b/numba/tests/test_unicode.py
index 9746b379c72..c1360209ad4 100644
--- a/numba/tests/test_unicode.py
+++ b/numba/tests/test_unicode.py
@@ -1408,19 +1408,18 @@ def test_lower(self):
         for s in UNICODE_EXAMPLES + [''] + extras + cpython + sigma:
             self.assertEqual(pyfunc(s), cfunc(s), msg=msg.format(s))
 
-
     def test_isnumeric(self):
         def pyfunc(x):
             return x.isnumeric()
 
         cfunc = njit(pyfunc)
         # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Lib/test/test_unicode.py#L676-L693    # noqa: E501
-        cpython = ['', 'a', '0', '\u2460', '\xbc', '\u0660', '0123456789', 
+        cpython = ['', 'a', '0', '\u2460', '\xbc', '\u0660', '0123456789',
                    '0123456789a', '\U00010401', '\U00010427', '\U00010429',
                    '\U0001044E', '\U0001F40D', '\U0001F46F', '\U00011065',
                    '\U0001D7F6', '\U00011066', '\U000104A0', '\U0001F107']
         # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Lib/test/test_unicode.py#L742-L749    # noqa: E501
-        cpython_extras = ['\uD800', '\uDFFF', '\uD800\uD800', '\uDFFF\uDFFF', 
+        cpython_extras = ['\uD800', '\uDFFF', '\uD800\uD800', '\uDFFF\uDFFF',
                           'a\uD800b\uDFFF', 'a\uDFFFb\uD800', 'a\uD800b\uDFFFa',
                           'a\uDFFFb\uD800a']
 

From 502b7558bfd32673151f32ae6abc165412d9a5a6 Mon Sep 17 00:00:00 2001
From: mrubtsov <maria.rubtsova@intel.com>
Date: Mon, 18 Nov 2019 13:26:48 +0300
Subject: [PATCH 37/68] Implement str.isnumeric

---
 numba/tests/test_unicode.py | 2 +-
 numba/unicode.py            | 4 ++++
 numba/unicode_support.py    | 1 +
 3 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/numba/tests/test_unicode.py b/numba/tests/test_unicode.py
index c1360209ad4..1873b3fd13f 100644
--- a/numba/tests/test_unicode.py
+++ b/numba/tests/test_unicode.py
@@ -1423,7 +1423,7 @@ def pyfunc(x):
                           'a\uD800b\uDFFF', 'a\uDFFFb\uD800', 'a\uD800b\uDFFFa',
                           'a\uDFFFb\uD800a']
 
-        msg = 'Results of "{}".isspace() must be equal'
+        msg = 'Results of "{}".isnumeric() must be equal'
         for s in UNICODE_EXAMPLES + [''] + cpython + cpython_extras:
             self.assertEqual(pyfunc(s), cfunc(s), msg=msg.format(s))
 
diff --git a/numba/unicode.py b/numba/unicode.py
index d6ed9a53004..a4273e1ca7f 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -953,6 +953,10 @@ def unicode_isidentifier(data):
 
     def impl(data):
         length = len(data)
+
+        if length == 1:
+            return _PyUnicode_IsNumeric(_get_code_point(data, 0))
+
         if length == 0:
             return False
 
diff --git a/numba/unicode_support.py b/numba/unicode_support.py
index c00da77c248..5e532b3c6ec 100644
--- a/numba/unicode_support.py
+++ b/numba/unicode_support.py
@@ -219,6 +219,7 @@ def _PyUnicode_IsDigit(ch):
     raise NotImplementedError
 
 
+# From: https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodectype.c#L140-L145    # noqa: E501
 @register_jitable
 def _PyUnicode_IsNumeric(ch):
     ctype = _PyUnicode_gettyperecord(ch)

From 3bce8373fd9897bccd28960e00c1d1c2699ac9f7 Mon Sep 17 00:00:00 2001
From: mrubtsov <maria.rubtsova@intel.com>
Date: Tue, 19 Nov 2019 09:56:33 +0300
Subject: [PATCH 38/68] Implement str.isnumeric

---
 numba/unicode.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/numba/unicode.py b/numba/unicode.py
index a4273e1ca7f..d6ed9a53004 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -953,10 +953,6 @@ def unicode_isidentifier(data):
 
     def impl(data):
         length = len(data)
-
-        if length == 1:
-            return _PyUnicode_IsNumeric(_get_code_point(data, 0))
-
         if length == 0:
             return False
 

From 7bbc28b66add01a80ae74c857ecdcfa7d8879792 Mon Sep 17 00:00:00 2001
From: mrubtsov <maria.rubtsova@intel.com>
Date: Wed, 20 Nov 2019 10:22:39 +0300
Subject: [PATCH 39/68] change

---
 numba/unicode.py | 41 +++++++++++++++++------------------------
 1 file changed, 17 insertions(+), 24 deletions(-)

diff --git a/numba/unicode.py b/numba/unicode.py
index 34831f6363b..b4c64b0e82d 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -1317,34 +1317,27 @@ def unicode_replace(s, old_str, new_str, count=-1):
         raise TypingError('The object must be a UnicodeType.'
                           ' Given: {}'.format(new_str))
 
-    def impl(s, old_str, new_str, count=-1):
+    def impl(s, old, new, count=-1):
         if count == 0:
             return s
-        if old_str == '' or old_str is None:
-            q = list(s)
+        if old == '':
+            schars = list(s)
             if count == -1:
-                str_res = new_str.join(q)
-                str_result = new_str + str_res + new_str
-                return str_result
-            i = 0
-            str_result = new_str
-            if count > len(q):
-                counter = len(q)
-            else:
-                counter = count
-            while i < counter:
-                str_result += q[i]
-                if i + 1 != counter:
-                    str_result += new_str
+                return new + new.join(schars) + new
+            split_result = [new]
+            min_count = min(len(schars), count)
+            for i in range(min_count):
+                split_result.append(schars[i])
+                if i + 1 != min_count:
+                    split_result.append(new)
                 else:
-                    str_result += ''.join(q[(i + 1):])
-                i += 1
-            if count > len(q):
-                str_result += new_str
-            return str_result
-        q = s.split(old_str, count)
-        str_result = new_str.join(q)
-        return str_result
+                    split_result.append(''.join(schars[(i + 1):]))
+            if count > len(schars):
+                split_result.append(new)
+            return ''.join(split_result)
+        schars = s.split(old, count)
+        result = new.join(schars)
+        return result
 
     return impl
 

From ace863124e2edc61f18b2a53b443fccc1becbcf9 Mon Sep 17 00:00:00 2001
From: mrubtsov <maria.rubtsova@intel.com>
Date: Mon, 25 Nov 2019 09:05:03 +0300
Subject: [PATCH 40/68] change

---
 numba/tests/test_unicode.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/numba/tests/test_unicode.py b/numba/tests/test_unicode.py
index 4191fcfa08b..9f357621346 100644
--- a/numba/tests/test_unicode.py
+++ b/numba/tests/test_unicode.py
@@ -958,9 +958,6 @@ def test_join(self):
         ]
 
         for sep, parts in CASES:
-            print(sep)
-            print(parts)
-            print(cfunc(sep, parts))
             self.assertEqual(pyfunc(sep, parts),
                              cfunc(sep, parts),
                              "'%s'.join('%s')?" % (sep, parts))

From 771782aad5be300ac9d710de69a2d3e9a4bfb04a Mon Sep 17 00:00:00 2001
From: Denis <denis.smirnov@intel.com>
Date: Wed, 27 Nov 2019 10:41:45 +0300
Subject: [PATCH 41/68] Extend str.capitalize() for ascii

---
 numba/unicode.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/numba/unicode.py b/numba/unicode.py
index 88a966e975d..e50c2730a4c 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -1277,12 +1277,20 @@ def impl(data):
             return _empty_string(data._kind, length, data._is_ascii)
 
         if data._is_ascii:
+            # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/bytes_methods.c#L361-L382    # noqa: E501
             res = _empty_string(data._kind, length, 1)
             code_point = _get_code_point(data, 0)
-            _set_code_point(res, 0, _Py_TOUPPER(code_point))
+            if _Py_ISLOWER(code_point):
+                _set_code_point(res, 0, _Py_TOUPPER(code_point))
+            else:
+                _set_code_point(res, 0, code_point)
+
             for idx in range(1, length):
                 code_point = _get_code_point(data, idx)
-                _set_code_point(res, idx, _Py_TOLOWER(code_point))
+                if _Py_ISUPPER(code_point):
+                    _set_code_point(res, idx, _Py_TOLOWER(code_point))
+                else:
+                    _set_code_point(res, idx, code_point)
 
             return res
 

From 2900c16f42c7e8a86d0eec0e48a956cb2c9b310f Mon Sep 17 00:00:00 2001
From: Denis <denis.smirnov@intel.com>
Date: Wed, 27 Nov 2019 11:14:51 +0300
Subject: [PATCH 42/68] Support newer version of ascii capitalization

---
 numba/unicode.py | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/numba/unicode.py b/numba/unicode.py
index e50c2730a4c..bc89691fa24 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -1277,20 +1277,15 @@ def impl(data):
             return _empty_string(data._kind, length, data._is_ascii)
 
         if data._is_ascii:
-            # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/bytes_methods.c#L361-L382    # noqa: E501
+            # https://github.com/python/cpython/blob/593bb30e82eded7f2ec02f7d1aa49742e6962113/Objects/bytes_methods.c#L361-L368    # noqa: E501
+            # mixed with:
+            # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/bytes_methods.c#L299-L307    # noqa: E501
             res = _empty_string(data._kind, length, 1)
             code_point = _get_code_point(data, 0)
-            if _Py_ISLOWER(code_point):
-                _set_code_point(res, 0, _Py_TOUPPER(code_point))
-            else:
-                _set_code_point(res, 0, code_point)
-
+            _set_code_point(res, 0, _Py_TOUPPER(code_point))
             for idx in range(1, length):
                 code_point = _get_code_point(data, idx)
-                if _Py_ISUPPER(code_point):
-                    _set_code_point(res, idx, _Py_TOLOWER(code_point))
-                else:
-                    _set_code_point(res, idx, code_point)
+                _set_code_point(res, idx, _Py_TOLOWER(code_point))
 
             return res
 

From f2ac10c2213288267270d7530a04de1a6d37003e Mon Sep 17 00:00:00 2001
From: Denis <denis.smirnov@intel.com>
Date: Wed, 27 Nov 2019 11:17:17 +0300
Subject: [PATCH 43/68] Change link to CPython for str.capitalize()

---
 numba/unicode.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/numba/unicode.py b/numba/unicode.py
index bc89691fa24..c3a24158bae 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -1279,7 +1279,7 @@ def impl(data):
         if data._is_ascii:
             # https://github.com/python/cpython/blob/593bb30e82eded7f2ec02f7d1aa49742e6962113/Objects/bytes_methods.c#L361-L368    # noqa: E501
             # mixed with:
-            # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/bytes_methods.c#L299-L307    # noqa: E501
+            # https://github.com/python/cpython/blob/593bb30e82eded7f2ec02f7d1aa49742e6962113/Objects/bytes_methods.c#L299-L307    # noqa: E501
             res = _empty_string(data._kind, length, 1)
             code_point = _get_code_point(data, 0)
             _set_code_point(res, 0, _Py_TOUPPER(code_point))

From 302e0a50b49d7e8443fb6c33b6cdf0a68a23d19d Mon Sep 17 00:00:00 2001
From: Denis <denis.smirnov@intel.com>
Date: Wed, 27 Nov 2019 13:42:27 +0300
Subject: [PATCH 44/68] Add support of python 3.8 for str.capitalize()

---
 numba/unicode.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/numba/unicode.py b/numba/unicode.py
index c3a24158bae..42fad816dba 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -1,3 +1,4 @@
+import sys
 import operator
 
 import numpy as np
@@ -38,6 +39,8 @@
                               _PyUnicode_IsUppercase, _PyUnicode_IsLowercase,
                               _PyUnicode_IsTitlecase, _Py_ISLOWER, _Py_ISUPPER)
 
+_py38_or_later = sys.version_info[:2] >= (3, 8)
+
 # DATA MODEL
 
 
@@ -1297,7 +1300,13 @@ def impl(data):
         mapped = np.zeros(3, dtype=_Py_UCS4)
         tmp = _empty_string(PY_UNICODE_4BYTE_KIND, 3 * length)
         code_point = _get_code_point(data, 0)
-        n_res = _PyUnicode_ToUpperFull(code_point, mapped)
+
+        # https://github.com/python/cpython/commit/b015fc86f7b1f35283804bfee788cce0a5495df7/Objects/unicodeobject.c#diff-220e5da0d1c8abf508b25c02da6ca16c    # noqa: E501
+        if _py38_or_later:
+            n_res = _PyUnicode_ToTitleFull(code_point, mapped)
+        else:
+            n_res = _PyUnicode_ToUpperFull(code_point, mapped)
+
         for m in mapped[:n_res]:
             maxchar = max(maxchar, m)
             _set_code_point(tmp, k, m)

From d80571c5c375fcdfa287af9647368ebe3f33e8a0 Mon Sep 17 00:00:00 2001
From: Rubtsowa <36762665+Rubtsowa@users.noreply.github.com>
Date: Wed, 4 Dec 2019 17:01:48 +0300
Subject: [PATCH 45/68] Update unicode.py

---
 numba/unicode.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/numba/unicode.py b/numba/unicode.py
index 275622b5495..efcd9386f5e 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -39,7 +39,7 @@
                               _PyUnicode_IsCased, _PyUnicode_IsCaseIgnorable,
                               _PyUnicode_IsUppercase, _PyUnicode_IsLowercase,
                               _PyUnicode_IsTitlecase, _Py_ISLOWER, _Py_ISUPPER,
-                              _PyUnicode_IsAlpha, _PyUnicode_IsNumeric, 
+                              _PyUnicode_IsAlpha, _PyUnicode_IsNumeric,
                               _PyUnicode_IsDecimalDigit)
 
 # DATA MODEL

From 3c708879f1c2d1e5dda678600137f6241959ac92 Mon Sep 17 00:00:00 2001
From: Rubtsowa <36762665+Rubtsowa@users.noreply.github.com>
Date: Wed, 4 Dec 2019 18:02:14 +0300
Subject: [PATCH 46/68] Update test_unicode.py

---
 numba/tests/test_unicode.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/numba/tests/test_unicode.py b/numba/tests/test_unicode.py
index 438fa0918e5..645e8b14a58 100644
--- a/numba/tests/test_unicode.py
+++ b/numba/tests/test_unicode.py
@@ -126,7 +126,7 @@ def replace_usecase(s, x, y):
 def replace_with_count_usecase(s, x, y, count):
     return s.replace(x, y, count)
 
-  
+
 def index_usecase(x, y):
     return x.index(y)
 

From b292caf70c85df364926283479cad62bed3eaf05 Mon Sep 17 00:00:00 2001
From: Denis <denis.smirnov@intel.com>
Date: Tue, 10 Dec 2019 09:22:01 +0300
Subject: [PATCH 47/68] Rename unit test for str.splitlines()

---
 numba/tests/test_unicode.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/numba/tests/test_unicode.py b/numba/tests/test_unicode.py
index ece09af668a..247b3c704a7 100644
--- a/numba/tests/test_unicode.py
+++ b/numba/tests/test_unicode.py
@@ -1150,7 +1150,7 @@ def test_split_whitespace(self):
                              cfunc(test_str),
                              "'%s'.split()?" % (test_str,))
 
-    def test_split_exception_invalid_keepends(self):
+    def test_splitlines_exception_invalid_keepends(self):
         pyfunc = splitlines_with_keepends_usecase
         cfunc = njit(pyfunc)
 

From 5b9969b15ae002199e4500055d8f2367fdcad25c Mon Sep 17 00:00:00 2001
From: Denis <denis.smirnov@intel.com>
Date: Tue, 10 Dec 2019 09:26:39 +0300
Subject: [PATCH 48/68] Remove str.splitlines() from this branch

---
 docs/source/reference/pysupported.rst |  1 -
 numba/tests/test_unicode.py           | 53 ---------------------
 numba/unicode.py                      | 68 ---------------------------
 numba/unicode_support.py              | 58 -----------------------
 4 files changed, 180 deletions(-)

diff --git a/docs/source/reference/pysupported.rst b/docs/source/reference/pysupported.rst
index a7c6ad8b34b..382214294c3 100644
--- a/docs/source/reference/pysupported.rst
+++ b/docs/source/reference/pysupported.rst
@@ -179,7 +179,6 @@ The following functions, attributes and methods are currently supported:
 * ``.ljust()``
 * ``.rjust()``
 * ``.split()``
-* ``.splitlines()``
 * ``.join()``
 * ``.lstrip()``
 * ``.rstrip()``
diff --git a/numba/tests/test_unicode.py b/numba/tests/test_unicode.py
index 247b3c704a7..75304fdfcd5 100644
--- a/numba/tests/test_unicode.py
+++ b/numba/tests/test_unicode.py
@@ -167,18 +167,6 @@ def split_whitespace_usecase(x):
     return x.split()
 
 
-def splitlines_usecase(s):
-    return s.splitlines()
-
-
-def splitlines_with_keepends_usecase(s, keepends):
-    return s.splitlines(keepends)
-
-
-def splitlines_with_keepends_kwarg_usecase(s, keepends):
-    return s.splitlines(keepends=keepends)
-
-
 def lstrip_usecase(x):
     return x.lstrip()
 
@@ -1150,47 +1138,6 @@ def test_split_whitespace(self):
                              cfunc(test_str),
                              "'%s'.split()?" % (test_str,))
 
-    def test_splitlines_exception_invalid_keepends(self):
-        pyfunc = splitlines_with_keepends_usecase
-        cfunc = njit(pyfunc)
-
-        accepted_types = (types.Integer, int, types.Boolean, bool)
-        for ty, keepends in (('none', None), ('unicode_type', 'None')):
-            with self.assertRaises(TypingError) as raises:
-                cfunc('\n', keepends)
-            msg = '"keepends" must be {}, not {}'.format(accepted_types, ty)
-            self.assertIn(msg, str(raises.exception))
-
-    def test_splitlines(self):
-        pyfunc = splitlines_usecase
-        cfunc = njit(pyfunc)
-
-        cases = ['', '\n', 'abc\r\rabc\r\n', '🐍⚡\v', '\f🐍⚡\f\v\v🐍\x85',
-                 '\u2028aba\u2029baba', '\n\r\na\v\fb\x0b\x0cc\x1c\x1d\x1e']
-
-        msg = 'Results of "{}".splitlines() must be equal'
-        for s in cases:
-            self.assertEqual(pyfunc(s), cfunc(s), msg=msg.format(s))
-
-    def test_splitlines_with_keepends(self):
-        pyfuncs = [
-            splitlines_with_keepends_usecase,
-            splitlines_with_keepends_kwarg_usecase
-        ]
-        messages = [
-            'Results of "{}".splitlines({}) must be equal',
-            'Results of "{}".splitlines(keepends={}) must be equal'
-        ]
-        cases = ['', '\n', 'abc\r\rabc\r\n', '🐍⚡\v', '\f🐍⚡\f\v\v🐍\x85',
-                 '\u2028aba\u2029baba', '\n\r\na\v\fb\x0b\x0cc\x1c\x1d\x1e']
-        all_keepends = [True, False, 0, 1, -1, 100]
-
-        for pyfunc, msg in zip(pyfuncs, messages):
-            cfunc = njit(pyfunc)
-            for s, keepends in product(cases, all_keepends):
-                self.assertEqual(pyfunc(s, keepends), cfunc(s, keepends),
-                                 msg=msg.format(s, keepends))
-
     def test_join_empty(self):
         # Can't pass empty list to nopython mode, so we have to make a
         # separate test case
diff --git a/numba/unicode.py b/numba/unicode.py
index 4fd375c71cc..6829b3bb67d 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -40,8 +40,6 @@
                               _PyUnicode_IsXidStart, _PyUnicode_IsXidContinue,
                               _PyUnicode_IsCased, _PyUnicode_IsCaseIgnorable,
                               _PyUnicode_IsUppercase, _PyUnicode_IsLowercase,
-                              _PyUnicode_IsLineBreak, _Py_ISLINEBREAK,
-                              _Py_ISLINEFEED, _Py_ISCARRIAGERETURN,
                               _PyUnicode_IsTitlecase, _Py_ISLOWER, _Py_ISUPPER,
                               _PyUnicode_IsAlpha, _PyUnicode_IsNumeric,
                               _Py_ISALPHA,)
@@ -933,72 +931,6 @@ def rjust_impl(string, width, fillchar=' '):
     return rjust_impl
 
 
-def generate_splitlines_func(is_line_break_func):
-    """Generate splitlines performer based on ascii or unicode line breaks."""
-    def impl(data, keepends):
-        # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/stringlib/split.h#L335-L389    # noqa: E501
-        length = len(data)
-        result = []
-        i = j = 0
-        while i < length:
-            # find a line and append it
-            while i < length:
-                code_point = _get_code_point(data, i)
-                if is_line_break_func(code_point):
-                    break
-                i += 1
-
-            # skip the line break reading CRLF as one line break
-            eol = i
-            if i < length:
-                if i + 1 < length:
-                    cur_cp = _get_code_point(data, i)
-                    next_cp = _get_code_point(data, i + 1)
-                    if _Py_ISCARRIAGERETURN(cur_cp) and _Py_ISLINEFEED(next_cp):
-                        i += 1
-                i += 1
-                if keepends:
-                    eol = i
-
-            result.append(data[j:eol])
-            j = i
-
-        return result
-
-    return impl
-
-
-_ascii_splitlines = register_jitable(generate_splitlines_func(_Py_ISLINEBREAK))
-_unicode_splitlines = register_jitable(generate_splitlines_func(
-    _PyUnicode_IsLineBreak))
-
-
-# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L10196-L10229    # noqa: E501
-@overload_method(types.UnicodeType, 'splitlines')
-def unicode_splitlines(data, keepends=False):
-    """Implements str.splitlines()"""
-    thety = keepends
-    # if the type is omitted, the concrete type is the value
-    if isinstance(keepends, types.Omitted):
-        thety = keepends.value
-    # if the type is optional, the concrete type is the captured type
-    elif isinstance(keepends, types.Optional):
-        thety = keepends.type
-
-    accepted = (types.Integer, int, types.Boolean, bool)
-    if thety is not None and not isinstance(thety, accepted):
-        raise TypingError(
-            '"{}" must be {}, not {}'.format('keepends', accepted, keepends))
-
-    def splitlines_impl(data, keepends=False):
-        if data._is_ascii:
-            return _ascii_splitlines(data, keepends)
-
-        return _unicode_splitlines(data, keepends)
-
-    return splitlines_impl
-
-
 @register_jitable
 def join_list(sep, parts):
     parts_len = len(parts)
diff --git a/numba/unicode_support.py b/numba/unicode_support.py
index 917b0f99f43..480ab3be9b4 100644
--- a/numba/unicode_support.py
+++ b/numba/unicode_support.py
@@ -247,12 +247,6 @@ def _PyUnicode_IsUppercase(ch):
     return ctype.flags & _PyUnicode_TyperecordMasks.UPPER_MASK != 0
 
 
-@register_jitable
-def _PyUnicode_IsLineBreak(ch):
-    ctype = _PyUnicode_gettyperecord(ch)
-    return ctype.flags & _PyUnicode_TyperecordMasks.LINEBREAK_MASK != 0
-
-
 @register_jitable
 def _PyUnicode_ToUppercase(ch):
     raise NotImplementedError
@@ -583,40 +577,6 @@ class _PY_CTF(IntEnum):
 ], dtype=np.uint8)
 
 
-class _PY_CTF_LB(IntEnum):
-    LINE_BREAK = 0x01
-    LINE_FEED = 0x02
-    CARRIAGE_RETURN = 0x04
-
-
-_Py_ctype_islinebreak = np.array([
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    _PY_CTF_LB.LINE_BREAK | _PY_CTF_LB.LINE_FEED,  # 0xa '\n'
-    _PY_CTF_LB.LINE_BREAK,  # 0xb '\v'
-    _PY_CTF_LB.LINE_BREAK,  # 0xc '\f'
-    _PY_CTF_LB.LINE_BREAK | _PY_CTF_LB.CARRIAGE_RETURN,  # 0xd '\r'
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    _PY_CTF_LB.LINE_BREAK,  # 0x1c '\x1c'
-    _PY_CTF_LB.LINE_BREAK,  # 0x1d '\x1d'
-    _PY_CTF_LB.LINE_BREAK,  # 0x1e '\x1e'
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    _PY_CTF_LB.LINE_BREAK,  # 0x85 '\x85'
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0,
-], dtype=np.intc)
-
-
 # Translation of:
 # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Include/pymacro.h#L25    # noqa: E501
 @register_jitable
@@ -720,23 +680,5 @@ def _Py_ISSPACE(ch):
     return _Py_ctype_table[_Py_CHARMASK(ch)] & _PY_CTF.SPACE
 
 
-@register_jitable
-def _Py_ISLINEBREAK(ch):
-    """Check if character is ASCII line break"""
-    return _Py_ctype_islinebreak[_Py_CHARMASK(ch)] & _PY_CTF_LB.LINE_BREAK
-
-
-@register_jitable
-def _Py_ISLINEFEED(ch):
-    """Check if character is line feed `\n`"""
-    return _Py_ctype_islinebreak[_Py_CHARMASK(ch)] & _PY_CTF_LB.LINE_FEED
-
-
-@register_jitable
-def _Py_ISCARRIAGERETURN(ch):
-    """Check if character is carriage return `\r`"""
-    return _Py_ctype_islinebreak[_Py_CHARMASK(ch)] & _PY_CTF_LB.CARRIAGE_RETURN
-
-
 # End code related to/from CPython's pyctype
 # ------------------------------------------------------------------------------

From 51ba21c0937ac31920c205a678003a7a74846f95 Mon Sep 17 00:00:00 2001
From: Denis <denis.smirnov@intel.com>
Date: Tue, 10 Dec 2019 09:28:40 +0300
Subject: [PATCH 49/68] Remove excess line

---
 numba/unicode_support.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/numba/unicode_support.py b/numba/unicode_support.py
index 480ab3be9b4..1d1e8c4cd57 100644
--- a/numba/unicode_support.py
+++ b/numba/unicode_support.py
@@ -679,6 +679,5 @@ def _Py_ISSPACE(ch):
     """
     return _Py_ctype_table[_Py_CHARMASK(ch)] & _PY_CTF.SPACE
 
-
 # End code related to/from CPython's pyctype
 # ------------------------------------------------------------------------------

From aeff032a4e8c72090d1dbfc5260b7bb16c1a9a52 Mon Sep 17 00:00:00 2001
From: Denis <denis.smirnov@intel.com>
Date: Tue, 10 Dec 2019 11:09:46 +0300
Subject: [PATCH 50/68] Remove excess import from unicode.py

---
 numba/unicode.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/numba/unicode.py b/numba/unicode.py
index 8b91a7db8b5..b48974465c8 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -1,4 +1,3 @@
-import sys
 import operator
 import sys
 

From 0f7bd42e70087a0af4cdf7e3fb7f1962d630a207 Mon Sep 17 00:00:00 2001
From: mrubtsov <maria.rubtsova@intel.com>
Date: Mon, 30 Dec 2019 19:46:28 +0300
Subject: [PATCH 51/68] correction names in method

---
 numba/unicode.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/numba/unicode.py b/numba/unicode.py
index c7dc5cd07cb..fdda0201146 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -1539,26 +1539,26 @@ def unicode_replace(s, old_str, new_str, count=-1):
         raise TypingError('The object must be a UnicodeType.'
                           ' Given: {}'.format(new_str))
 
-    def impl(s, old, new, count=-1):
+    def impl(s, old_str, new_str, count=-1):
         if count == 0:
             return s
-        if old == '':
+        if old_str == '':
             schars = list(s)
             if count == -1:
-                return new + new.join(schars) + new
-            split_result = [new]
+                return new_str + new_str.join(schars) + new_str
+            split_result = [new_str]
             min_count = min(len(schars), count)
             for i in range(min_count):
                 split_result.append(schars[i])
                 if i + 1 != min_count:
-                    split_result.append(new)
+                    split_result.append(new_str)
                 else:
                     split_result.append(''.join(schars[(i + 1):]))
             if count > len(schars):
-                split_result.append(new)
+                split_result.append(new_str)
             return ''.join(split_result)
-        schars = s.split(old, count)
-        result = new.join(schars)
+        schars = s.split(old_str, count)
+        result = new_str.join(schars)
         return result
 
     return impl

From aa63a0c1efdee2b1cea6c5b045b88e37d364fa39 Mon Sep 17 00:00:00 2001
From: Denis <denis.smirnov@intel.com>
Date: Fri, 15 Nov 2019 09:54:12 +0300
Subject: [PATCH 52/68] Implement str.splitlines() based on CPython

Conflicts:
	docs/source/reference/pysupported.rst
	numba/tests/test_unicode.py
	numba/unicode.py
---
 numba/unicode.py         | 68 ++++++++++++++++++++++++++++++++++++++++
 numba/unicode_support.py | 59 ++++++++++++++++++++++++++++++++++
 2 files changed, 127 insertions(+)

diff --git a/numba/unicode.py b/numba/unicode.py
index f3baeab3b62..e91676e2cee 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -48,6 +48,8 @@
                               _PyUnicode_IsAlpha, _PyUnicode_IsNumeric,
                               _Py_ISALPHA, _PyUnicode_IsDigit,
                               _PyUnicode_IsDecimalDigit)
+                              _PyUnicode_IsTitlecase, _Py_ISLOWER, _Py_ISUPPER)
+
 
 _py38_or_later = sys.version_info[:2] >= (3, 8)
 
@@ -1250,6 +1252,72 @@ def rjust_impl(string, width, fillchar=' '):
     return rjust_impl
 
 
+def generate_splitlines_func(is_line_break_func):
+    """Generate splitlines performer based on ascii or unicode line breaks."""
+    def impl(data, keepends):
+        # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/stringlib/split.h#L335-L389    # noqa: E501
+        length = len(data)
+        result = []
+        i = j = 0
+        while i < length:
+            # find a line and append it
+            while i < length:
+                code_point = _get_code_point(data, i)
+                if is_line_break_func(code_point):
+                    break
+                i += 1
+
+            # skip the line break reading CRLF as one line break
+            eol = i
+            if i < length:
+                if i + 1 < length:
+                    cur_cp = _get_code_point(data, i)
+                    next_cp = _get_code_point(data, i + 1)
+                    if _Py_ISCARRIAGERETURN(cur_cp) and _Py_ISLINEFEED(next_cp):
+                        i += 1
+                i += 1
+                if keepends:
+                    eol = i
+
+            result.append(data[j:eol])
+            j = i
+
+        return result
+
+    return impl
+
+
+_ascii_splitlines = register_jitable(generate_splitlines_func(_Py_ISLINEBREAK))
+_unicode_splitlines = register_jitable(generate_splitlines_func(
+    _PyUnicode_IsLineBreak))
+
+
+# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L10196-L10229    # noqa: E501
+@overload_method(types.UnicodeType, 'splitlines')
+def unicode_splitlines(data, keepends=False):
+    """Implements str.splitlines()"""
+    thety = keepends
+    # if the type is omitted, the concrete type is the value
+    if isinstance(keepends, types.Omitted):
+        thety = keepends.value
+    # if the type is optional, the concrete type is the captured type
+    elif isinstance(keepends, types.Optional):
+        thety = keepends.type
+
+    accepted = (types.Integer, int, types.Boolean, bool)
+    if thety is not None and not isinstance(thety, accepted):
+        raise TypingError(
+            '"{}" must be {}, not {}'.format('keepends', accepted, keepends))
+
+    def splitlines_impl(data, keepends=False):
+        if data._is_ascii:
+            return _ascii_splitlines(data, keepends)
+
+        return _unicode_splitlines(data, keepends)
+
+    return splitlines_impl
+
+
 @register_jitable
 def join_list(sep, parts):
     parts_len = len(parts)
diff --git a/numba/unicode_support.py b/numba/unicode_support.py
index 8d9a61d26bd..d170102e0cf 100644
--- a/numba/unicode_support.py
+++ b/numba/unicode_support.py
@@ -250,6 +250,12 @@ def _PyUnicode_IsUppercase(ch):
     return ctype.flags & _PyUnicode_TyperecordMasks.UPPER_MASK != 0
 
 
+@register_jitable
+def _PyUnicode_IsLineBreak(ch):
+    ctype = _PyUnicode_gettyperecord(ch)
+    return ctype.flags & _PyUnicode_TyperecordMasks.LINEBREAK_MASK != 0
+
+
 @register_jitable
 def _PyUnicode_ToUppercase(ch):
     raise NotImplementedError
@@ -596,6 +602,40 @@ class _PY_CTF(IntEnum):
 ], dtype=np.uint8)
 
 
+class _PY_CTF_LB(IntEnum):
+    LINE_BREAK = 0x01
+    LINE_FEED = 0x02
+    CARRIAGE_RETURN = 0x04
+
+
+_Py_ctype_islinebreak = np.array([
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    _PY_CTF_LB.LINE_BREAK | _PY_CTF_LB.LINE_FEED,  # 0xa '\n'
+    _PY_CTF_LB.LINE_BREAK,  # 0xb '\v'
+    _PY_CTF_LB.LINE_BREAK,  # 0xc '\f'
+    _PY_CTF_LB.LINE_BREAK | _PY_CTF_LB.CARRIAGE_RETURN,  # 0xd '\r'
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    _PY_CTF_LB.LINE_BREAK,  # 0x1c '\x1c'
+    _PY_CTF_LB.LINE_BREAK,  # 0x1d '\x1d'
+    _PY_CTF_LB.LINE_BREAK,  # 0x1e '\x1e'
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    _PY_CTF_LB.LINE_BREAK,  # 0x85 '\x85'
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0,
+], dtype=np.intc)
+
+
 # Translation of:
 # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Include/pymacro.h#L25    # noqa: E501
 @register_jitable
@@ -698,5 +738,24 @@ def _Py_ISSPACE(ch):
     """
     return _Py_ctype_table[_Py_CHARMASK(ch)] & _PY_CTF.SPACE
 
+
+@register_jitable
+def _Py_ISLINEBREAK(ch):
+    """Check if character is ASCII line break"""
+    return _Py_ctype_islinebreak[_Py_CHARMASK(ch)] & _PY_CTF_LB.LINE_BREAK
+
+
+@register_jitable
+def _Py_ISLINEFEED(ch):
+    """Check if character is line feed `\n`"""
+    return _Py_ctype_islinebreak[_Py_CHARMASK(ch)] & _PY_CTF_LB.LINE_FEED
+
+
+@register_jitable
+def _Py_ISCARRIAGERETURN(ch):
+    """Check if character is carriage return `\r`"""
+    return _Py_ctype_islinebreak[_Py_CHARMASK(ch)] & _PY_CTF_LB.CARRIAGE_RETURN
+
+
 # End code related to/from CPython's pyctype
 # ------------------------------------------------------------------------------

From a9745952dbff01082f445c79de58106b1bb597f8 Mon Sep 17 00:00:00 2001
From: Stuart Archibald <stuart.archibald@googlemail.com>
Date: Tue, 31 Dec 2019 11:07:31 +0000
Subject: [PATCH 53/68] Fix up

---
 numba/tests/test_unicode.py |  3 ++-
 numba/unicode.py            | 20 ++------------------
 2 files changed, 4 insertions(+), 19 deletions(-)

diff --git a/numba/tests/test_unicode.py b/numba/tests/test_unicode.py
index 81392a12427..3df0cb8a6f0 100644
--- a/numba/tests/test_unicode.py
+++ b/numba/tests/test_unicode.py
@@ -186,6 +186,7 @@ def endswith_with_start_only_usecase(x, y, start):
 def endswith_with_start_end_usecase(x, y, start, end):
     return x.endswith(y, start, end)
 
+
 def split_usecase(x, y):
     return x.split(y)
 
@@ -840,7 +841,7 @@ def test_count_with_start_end(self):
                                 "'{0}'.c_count('{1}', {2}, {3}) = {5}")
 
         for s, sub in UNICODE_COUNT_EXAMPLES:
-            for i , j in product(range(-18, 18), (-18, 18)):
+            for i, j in product(range(-18, 18), (-18, 18)):
                 py_result = pyfunc(s, sub, i, j)
                 c_result = cfunc(s, sub, i, j)
                 self.assertEqual(py_result, c_result,
diff --git a/numba/unicode.py b/numba/unicode.py
index e91676e2cee..b947f122bdf 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -48,7 +48,6 @@
                               _PyUnicode_IsAlpha, _PyUnicode_IsNumeric,
                               _Py_ISALPHA, _PyUnicode_IsDigit,
                               _PyUnicode_IsDecimalDigit)
-                              _PyUnicode_IsTitlecase, _Py_ISLOWER, _Py_ISUPPER)
 
 
 _py38_or_later = sys.version_info[:2] >= (3, 8)
@@ -530,23 +529,6 @@ def contains_impl(a, b):
         return contains_impl
 
 
-# https://github.com/python/cpython/blob/201c8f79450628241574fba940e08107178dc3a5/Objects/unicodeobject.c#L9342-L9354    # noqa: E501
-@register_jitable
-def _adjust_indices(length, start, end):
-    if end > length:
-        end = length
-    if end < 0:
-        end += length
-        if end < 0:
-            end = 0
-    if start < 0:
-        start += length
-        if start < 0:
-            start = 0
-
-    return start, end
-
-
 def unicode_idx_check_type(ty, name):
     """Check object belongs to one of specific types
     ty: type
@@ -2072,6 +2054,8 @@ def impl(data):
 
         return res
 
+    return impl
+
 
 # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L12017-L12045    # noqa: E501
 @overload_method(types.UnicodeType, 'isdecimal')

From d3188b50a6a05628c8f704a75e9dbf44f3d817e2 Mon Sep 17 00:00:00 2001
From: Stuart Archibald <stuart.archibald@googlemail.com>
Date: Tue, 31 Dec 2019 11:21:18 +0000
Subject: [PATCH 54/68] Refactor ljust/rjust

---
 numba/unicode.py | 90 ++++++++++++++++++------------------------------
 1 file changed, 34 insertions(+), 56 deletions(-)

diff --git a/numba/unicode.py b/numba/unicode.py
index b947f122bdf..855fbaee279 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -1172,66 +1172,44 @@ def center_impl(string, width, fillchar=' '):
     return center_impl
 
 
-@overload_method(types.UnicodeType, 'ljust')
-def unicode_ljust(string, width, fillchar=' '):
-    if not isinstance(width, types.Integer):
-        raise TypingError('The width must be an Integer')
-
-    if isinstance(fillchar, types.UnicodeCharSeq):
-        def ljust_impl(string, width, fillchar=' '):
-            return string.ljust(width, str(fillchar))
-        return ljust_impl
-
-    if not (fillchar == ' ' or isinstance(
-            fillchar, (types.Omitted, types.UnicodeType))):
-        raise TypingError('The fillchar must be a UnicodeType')
-
-    def ljust_impl(string, width, fillchar=' '):
-        str_len = len(string)
-        fillchar_len = len(fillchar)
-
-        if fillchar_len != 1:
-            raise ValueError('The fill character must be exactly one '
-                             'character long')
-
-        if width <= str_len:
-            return string
-
-        newstr = string + (fillchar * (width - str_len))
-
-        return newstr
-    return ljust_impl
-
-
-@overload_method(types.UnicodeType, 'rjust')
-def unicode_rjust(string, width, fillchar=' '):
-    if not isinstance(width, types.Integer):
-        raise TypingError('The width must be an Integer')
-
-    if isinstance(fillchar, types.UnicodeCharSeq):
-        def rjust_impl(string, width, fillchar=' '):
-            return string.rjust(width, str(fillchar))
-        return rjust_impl
-
-    if not (fillchar == ' ' or
-            isinstance(fillchar, (types.Omitted, types.UnicodeType))):
-        raise TypingError('The fillchar must be a UnicodeType')
-
-    def rjust_impl(string, width, fillchar=' '):
-        str_len = len(string)
-        fillchar_len = len(fillchar)
+def gen_unicode_Xjust(STRING_FIRST):
+    def unicode_Xjust(string, width, fillchar=' '):
+        if not isinstance(width, types.Integer):
+            raise TypingError('The width must be an Integer')
+
+        if isinstance(fillchar, types.UnicodeCharSeq):
+            def rjust_impl(string, width, fillchar=' '):
+                return string.rjust(width, str(fillchar))
+            return rjust_impl
+
+        if not (fillchar == ' ' or
+                isinstance(fillchar, (types.Omitted, types.UnicodeType))):
+            raise TypingError('The fillchar must be a UnicodeType')
+
+        def impl(string, width, fillchar=' '):
+            str_len = len(string)
+            fillchar_len = len(fillchar)
+
+            if fillchar_len != 1:
+                raise ValueError('The fill character must be exactly one '
+                                 'character long')
+
+            if width <= str_len:
+                return string
+
+            newstr = (fillchar * (width - str_len))
+            if STRING_FIRST:
+                return string + newstr
+            else:
+                return newstr + string
 
-        if fillchar_len != 1:
-            raise ValueError('The fill character must be exactly one '
-                             'character long')
+        return impl
 
-        if width <= str_len:
-            return string
+    return unicode_Xjust
 
-        newstr = (fillchar * (width - str_len)) + string
 
-        return newstr
-    return rjust_impl
+overload_method(types.UnicodeType, 'rjust')(gen_unicode_Xjust(False))
+overload_method(types.UnicodeType, 'ljust')(gen_unicode_Xjust(True))
 
 
 def generate_splitlines_func(is_line_break_func):

From e32b88f06a0032164e3333e22cf11ea94bee88c0 Mon Sep 17 00:00:00 2001
From: Stuart Archibald <stuart.archibald@googlemail.com>
Date: Tue, 31 Dec 2019 11:43:28 +0000
Subject: [PATCH 55/68] Refactor some isX methods

---
 docs/source/reference/pysupported.rst |  54 +++++------
 numba/unicode.py                      | 127 +++++++-------------------
 2 files changed, 59 insertions(+), 122 deletions(-)

diff --git a/docs/source/reference/pysupported.rst b/docs/source/reference/pysupported.rst
index e5bda3b997f..af0bcdc9e25 100644
--- a/docs/source/reference/pysupported.rst
+++ b/docs/source/reference/pysupported.rst
@@ -248,46 +248,46 @@ The following functions, attributes and methods are currently supported:
 * ``*`` (repetition of strings)
 * ``in``, ``.contains()``
 * ``==``, ``<``, ``<=``, ``>``, ``>=`` (comparison)
+* ``.capitalize()``
 * ``.casefold()``
-* ``.startswith()``
+* ``.center()``
+* ``.count()``
+* ``.endswith()``
 * ``.endswith()``
 * ``.expandtabs()``
-* ``.isspace()``
-* ``.isidentifier()``
 * ``.find()``
-* ``.center()``
-* ``.ljust()``
-* ``.rjust()``
-* ``.split()``
-* ``.splitlines()``
-* ``.rsplit()``
-* ``.join()``
-* ``.lstrip()``
-* ``.rstrip()``
-* ``.strip()``
-* ``.capitalize()``
-* ``.isupper()``
-* ``.upper()``
-* ``.isnumeric()``
-* ``.isdigit()``
+* ``.index()``
+* ``.isalnum()``
+* ``.isalpha()``
 * ``.isdecimal()``
+* ``.isdigit()``
+* ``.isidentifier()``
 * ``.islower()``
-* ``.lower()``
-* ``.partition()``
+* ``.isnumeric()``
 * ``.isprintable()``
-* ``.zfill()``
-* ``.rpartition()``
-* ``.count()``
+* ``.isspace()``
 * ``.istitle()``
+* ``.isupper()``
+* ``.join()``
+* ``.ljust()``
+* ``.lower()``
+* ``.lstrip()``
+* ``.partition()``
 * ``.replace()``
 * ``.rfind()``
 * ``.rindex()``
-* ``.index()``
+* ``.rjust()``
+* ``.rpartition()``
+* ``.rsplit()``
+* ``.rstrip()``
+* ``.split()``
+* ``.splitlines()``
+* ``.startswith()``
+* ``.strip()``
 * ``.swapcase()``
 * ``.title()``
-* ``.isalpha()``
-* ``.isalnum()``
-* ``.endswith()``
+* ``.upper()``
+* ``.zfill()``
 
 Additional operations as well as support for Python 2 strings / Python 3 bytes
 will be added in a future version of Numba.  Python 2 Unicode objects will
diff --git a/numba/unicode.py b/numba/unicode.py
index 855fbaee279..919a6572523 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -1938,72 +1938,52 @@ def impl(a):
             return ret
     return impl
 
+# generator for simple unicode "isX" methods
 
-# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L11896-L11925    # noqa: E501
-@overload_method(types.UnicodeType, 'isspace')
-def unicode_isspace(data):
-    """Implements UnicodeType.isspace()"""
-
-    def impl(data):
-        length = len(data)
-        if length == 1:
-            return _PyUnicode_IsSpace(_get_code_point(data, 0))
 
-        if length == 0:
-            return False
+def gen_isX(_PyUnicode_IS_func, empty_is_false=True):
+    def unicode_isX(data):
+        def impl(data):
+            length = len(data)
+            if length == 1:
+                return _PyUnicode_IS_func(_get_code_point(data, 0))
 
-        for i in range(length):
-            code_point = _get_code_point(data, i)
-            if not _PyUnicode_IsSpace(code_point):
+            if empty_is_false and length == 0:
                 return False
 
-        return True
-
-    return impl
-
-
-# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L12096-L12124    # noqa: E501
-@overload_method(types.UnicodeType, 'isnumeric')
-def unicode_isnumeric(data):
-    """Implements UnicodeType.isnumeric()"""
-    def impl(data):
-        length = len(data)
-        if length == 1:
-            return _PyUnicode_IsNumeric(_get_code_point(data, 0))
+            for i in range(length):
+                code_point = _get_code_point(data, i)
+                if not _PyUnicode_IS_func(code_point):
+                    return False
 
-        if length == 0:
-            return False
+            return True
 
-        for i in range(length):
-            if not _PyUnicode_IsNumeric(_get_code_point(data, i)):
-                return False
+        return impl
+    return unicode_isX
 
-        return True
 
-    return impl
+# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L11896-L11925    # noqa: E501
+overload_method(types.UnicodeType, 'isspace')(gen_isX(_PyUnicode_IsSpace))
 
+# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L12096-L12124    # noqa: E501
+overload_method(types.UnicodeType, 'isnumeric')(gen_isX(_PyUnicode_IsNumeric))
 
 # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L12056-L12085    # noqa: E501
-@overload_method(types.UnicodeType, 'isdigit')
-def unicode_isdigit(data):
-    """Implements UnicodeType.isdigit()"""
-
-    def impl(data):
-        length = len(data)
+overload_method(types.UnicodeType, 'isdigit')(gen_isX(_PyUnicode_IsDigit))
 
-        if length == 1:
-            ch = _get_code_point(data, 0)
-            return _PyUnicode_IsDigit(ch)
-        if length == 0:
-            return False
-
-        for i in range(length):
-            if not _PyUnicode_IsDigit(_get_code_point(data, i)):
-                return False
-
-        return True
+# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L12017-L12045    # noqa: E501
+overload_method(
+    types.UnicodeType,
+    'isdecimal')(
+        gen_isX(_PyUnicode_IsDecimalDigit))
 
-    return impl
+# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L12188-L12213    # noqa: E501
+overload_method(
+    types.UnicodeType,
+    'isprintable')(
+        gen_isX(
+            _PyUnicode_IsPrintable,
+            False))
 
 
 def generate_operation_func(ascii_func, unicode_nres_func):
@@ -2035,29 +2015,6 @@ def impl(data):
     return impl
 
 
-# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L12017-L12045    # noqa: E501
-@overload_method(types.UnicodeType, 'isdecimal')
-def unicode_isdecimal(data):
-    """Implements UnicodeType.isdecimal()"""
-
-    def impl(data):
-        length = len(data)
-
-        if length == 1:
-            return _PyUnicode_IsDecimalDigit(_get_code_point(data, 0))
-
-        if length == 0:
-            return False
-
-        for i in range(length):
-            if not _PyUnicode_IsDecimalDigit(_get_code_point(data, i)):
-                return False
-
-        return True
-
-    return impl
-
-
 @register_jitable
 def _unicode_casefold_doer(data, length, res, maxchars):
     k = 0
@@ -2122,26 +2079,6 @@ def impl(s):
     return impl
 
 
-# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L12188-L12213    # noqa: E501
-@overload_method(types.UnicodeType, 'isprintable')
-def unicode_isprintable(data):
-    """Implements UnicodeType.isprintable()"""
-
-    def impl(data):
-        length = len(data)
-        if length == 1:
-            return _PyUnicode_IsPrintable(_get_code_point(data, 0))
-
-        for i in range(length):
-            code_point = _get_code_point(data, i)
-            if not _PyUnicode_IsPrintable(code_point):
-                return False
-
-        return True
-
-    return impl
-
-
 # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L11975-L12006    # noqa: E501
 @overload_method(types.UnicodeType, 'isalnum')
 def unicode_isalnum(data):

From 1561eb02eb7926e87fbab978846f7de1db21128e Mon Sep 17 00:00:00 2001
From: Stuart Archibald <stuart.archibald@googlemail.com>
Date: Tue, 31 Dec 2019 12:01:13 +0000
Subject: [PATCH 56/68] Factor out isalX methods

As title.
---
 numba/unicode.py | 89 +++++++++++++++++++-----------------------------
 1 file changed, 35 insertions(+), 54 deletions(-)

diff --git a/numba/unicode.py b/numba/unicode.py
index 919a6572523..d64f9663456 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -1711,34 +1711,50 @@ def impl(a):
         return impl
 
 
-# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L11928-L11964    # noqa: E501
-@overload_method(types.UnicodeType, 'isalpha')
-def unicode_isalpha(data):
-    """Implements UnicodeType.isalpha()"""
+# generates isalpha/isalnum
+def gen_isAlX(ascii_func, unicode_func):
+    def unicode_isAlX(data):
 
-    def impl(data):
-        length = len(data)
-        if length == 0:
-            return False
+        def impl(data):
+            length = len(data)
+            if length == 0:
+                return False
 
-        if length == 1:
-            code_point = _get_code_point(data, 0)
-            return _PyUnicode_IsAlpha(code_point)
+            if length == 1:
+                code_point = _get_code_point(data, 0)
+                if data._is_ascii:
+                    return ascii_func(code_point)
+                else:
+                    return unicode_func(code_point)
+
+            if data._is_ascii:
+                for i in range(length):
+                    code_point = _get_code_point(data, i)
+                    if not ascii_func(code_point):
+                        return False
 
-        if data._is_ascii:
             for i in range(length):
                 code_point = _get_code_point(data, i)
-                if not _Py_ISALPHA(code_point):
+                if not unicode_func(code_point):
                     return False
 
-        for i in range(length):
-            code_point = _get_code_point(data, i)
-            if not _PyUnicode_IsAlpha(code_point):
-                return False
+            return True
 
-        return True
+        return impl
+    return unicode_isAlX
 
-    return impl
+
+# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L11928-L11964    # noqa: E501
+overload_method(types.UnicodeType, 'isalpha')(gen_isAlX(_Py_ISALPHA,
+                                                        _PyUnicode_IsAlpha))
+
+_unicode_is_alnum = register_jitable(lambda x:
+                                     (_PyUnicode_IsNumeric(x) or
+                                      _PyUnicode_IsAlpha(x)))
+
+# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L11975-L12006    # noqa: E501
+overload_method(types.UnicodeType, 'isalnum')(gen_isAlX(_Py_ISALNUM,
+                                                        _unicode_is_alnum))
 
 
 # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L10765-L10774    # noqa: E501
@@ -2079,41 +2095,6 @@ def impl(s):
     return impl
 
 
-# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L11975-L12006    # noqa: E501
-@overload_method(types.UnicodeType, 'isalnum')
-def unicode_isalnum(data):
-    """Implements UnicodeType.isalnum()"""
-
-    def impl(data):
-        length = len(data)
-
-        if length == 1:
-            code_point = _get_code_point(data, 0)
-            if data._is_ascii:
-                return _Py_ISALNUM(code_point)
-            return (_PyUnicode_IsNumeric(code_point) or
-                    _PyUnicode_IsAlpha(code_point))
-
-        if length == 0:
-            return False
-
-        if data._is_ascii:
-            for i in range(length):
-                code_point = _get_code_point(data, i)
-                if not _Py_ISALNUM(code_point):
-                    return False
-
-        for i in range(length):
-            code_point = _get_code_point(data, i)
-            if (not _PyUnicode_IsNumeric(code_point) and
-                    not _PyUnicode_IsAlpha(code_point)):
-                return False
-
-        return True
-
-    return impl
-
-
 if sys.version_info[:2] >= (3, 7):
     @overload_method(types.UnicodeType, 'isascii')
     def unicode_isascii(data):

From c98755840fb227351c9906db5412df97d95ae1fc Mon Sep 17 00:00:00 2001
From: Stuart Archibald <stuart.archibald@googlemail.com>
Date: Tue, 31 Dec 2019 12:13:41 +0000
Subject: [PATCH 57/68] loop hoist invariant

---
 numba/unicode.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/numba/unicode.py b/numba/unicode.py
index d64f9663456..246bfad8295 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -1094,22 +1094,23 @@ def rsplit_impl(data, sep=None, maxsplit=-1):
         def _rsplit_char(data, ch, maxsplit):
             # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/stringlib/split.h#L242-L284    # noqa: E501
             result = []
+            ch_code_point = _get_code_point(ch, 0)
             i = j = len(data) - 1
             while i >= 0 and maxsplit > 0:
                 data_code_point = _get_code_point(data, i)
-                ch_code_point = _get_code_point(ch, 0)
                 if data_code_point == ch_code_point:
-                    result.append(data[i + 1:j + 1])
+                    result.append(data[i + 1 : j + 1])
                     j = i = i - 1
                     maxsplit -= 1
                 i -= 1
             if j >= -1:
-                result.append(data[0:j + 1])
+                result.append(data[0 : j + 1])
 
             return result[::-1]
 
         if maxsplit < 0:
             maxsplit = sys.maxsize
+
         sep_length = len(sep)
 
         if sep_length == 0:

From 9c80831d2f739ec20fe8ef40251d785870cb19b6 Mon Sep 17 00:00:00 2001
From: Stuart Archibald <stuart.archibald@googlemail.com>
Date: Tue, 31 Dec 2019 12:48:42 +0000
Subject: [PATCH 58/68] Fix bug

---
 numba/unicode.py | 38 +++++++++++++++++++++++---------------
 1 file changed, 23 insertions(+), 15 deletions(-)

diff --git a/numba/unicode.py b/numba/unicode.py
index 246bfad8295..8847a374c51 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -2068,44 +2068,52 @@ def unicode_casefold(data):
     """Implements str.casefold()"""
     return _do_casefold
 
+if sys.version_info[:2] >= (3, 7):
+    @overload_method(types.UnicodeType, 'isascii')
+    def unicode_isascii(data):
+        """Implements UnicodeType.isascii()"""
+
+        def impl(data):
+            return data._is_ascii
+        return impl
 
 @overload_method(types.UnicodeType, 'istitle')
-def unicode_istitle(s):
+def unicode_istitle(data):
     """
     Implements UnicodeType.istitle()
     The algorithm is an approximate translation from CPython:
     https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L11829-L11885 # noqa: E501
     """
 
-    def impl(s):
+    def impl(data):
+        length = len(data)
+        if length == 1:
+            char = _get_code_point(data, 0)
+            return _PyUnicode_IsUppercase(char) or _PyUnicode_IsTitlecase(char)
+
+        if length == 0:
+            return False
+
         cased = False
         previous_is_cased = False
-        for char in s:
+        for idx in range(length):
+            char = _get_code_point(data, idx)
             if _PyUnicode_IsUppercase(char) or _PyUnicode_IsTitlecase(char):
                 if previous_is_cased:
                     return False
-                cased = True
                 previous_is_cased = True
+                cased = True
             elif _PyUnicode_IsLowercase(char):
                 if not previous_is_cased:
                     return False
+                previous_is_cased = True
+                cased = True
             else:
                 previous_is_cased = False
 
         return cased
     return impl
 
-
-if sys.version_info[:2] >= (3, 7):
-    @overload_method(types.UnicodeType, 'isascii')
-    def unicode_isascii(data):
-        """Implements UnicodeType.isascii()"""
-
-        def impl(data):
-            return data._is_ascii
-        return impl
-
-
 @overload_method(types.UnicodeType, 'islower')
 def unicode_islower(data):
     """

From 32e8372418e9e1aac1931c29cf2fb1c81cf837af Mon Sep 17 00:00:00 2001
From: Stuart Archibald <stuart.archibald@googlemail.com>
Date: Tue, 31 Dec 2019 13:04:07 +0000
Subject: [PATCH 59/68] Fix flake8

---
 numba/unicode.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/numba/unicode.py b/numba/unicode.py
index 8847a374c51..726def15159 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -2068,6 +2068,7 @@ def unicode_casefold(data):
     """Implements str.casefold()"""
     return _do_casefold
 
+
 if sys.version_info[:2] >= (3, 7):
     @overload_method(types.UnicodeType, 'isascii')
     def unicode_isascii(data):
@@ -2077,6 +2078,7 @@ def impl(data):
             return data._is_ascii
         return impl
 
+
 @overload_method(types.UnicodeType, 'istitle')
 def unicode_istitle(data):
     """
@@ -2114,6 +2116,7 @@ def impl(data):
         return cased
     return impl
 
+
 @overload_method(types.UnicodeType, 'islower')
 def unicode_islower(data):
     """

From a37c1a089368551230976a88b28d81490aff087b Mon Sep 17 00:00:00 2001
From: Siu Kwan Lam <michael.lam.sk@gmail.com>
Date: Tue, 31 Dec 2019 10:40:10 -0600
Subject: [PATCH 60/68] Fix ljust

---
 numba/unicode.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/numba/unicode.py b/numba/unicode.py
index 726def15159..3eb89de4463 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -1179,9 +1179,14 @@ def unicode_Xjust(string, width, fillchar=' '):
             raise TypingError('The width must be an Integer')
 
         if isinstance(fillchar, types.UnicodeCharSeq):
-            def rjust_impl(string, width, fillchar=' '):
-                return string.rjust(width, str(fillchar))
-            return rjust_impl
+            if STRING_FIRST:
+                def ljust_impl(string, width, fillchar=' '):
+                    return string.ljust(width, str(fillchar))
+                return ljust_impl
+            else:
+                def rjust_impl(string, width, fillchar=' '):
+                    return string.rjust(width, str(fillchar))
+                return rjust_impl
 
         if not (fillchar == ' ' or
                 isinstance(fillchar, (types.Omitted, types.UnicodeType))):

From d2584f6eab4aa43430e9a95a033d33f3a0fe853f Mon Sep 17 00:00:00 2001
From: Stuart Archibald <stuart.archibald@googlemail.com>
Date: Thu, 2 Jan 2020 10:20:51 +0000
Subject: [PATCH 61/68] Fix pep8

---
 numba/unicode.py | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/numba/unicode.py b/numba/unicode.py
index 3eb89de4463..d0cb17181f1 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -1994,18 +1994,12 @@ def impl(data):
 overload_method(types.UnicodeType, 'isdigit')(gen_isX(_PyUnicode_IsDigit))
 
 # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L12017-L12045    # noqa: E501
-overload_method(
-    types.UnicodeType,
-    'isdecimal')(
-        gen_isX(_PyUnicode_IsDecimalDigit))
+overload_method(types.UnicodeType, 'isdecimal')(
+    gen_isX(_PyUnicode_IsDecimalDigit))
 
 # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L12188-L12213    # noqa: E501
-overload_method(
-    types.UnicodeType,
-    'isprintable')(
-        gen_isX(
-            _PyUnicode_IsPrintable,
-            False))
+overload_method(types.UnicodeType, 'isprintable')(
+    gen_isX(_PyUnicode_IsPrintable, False))
 
 
 def generate_operation_func(ascii_func, unicode_nres_func):

From 5ab03f32a53dd525882216b706e5e469f0582c86 Mon Sep 17 00:00:00 2001
From: Stuart Archibald <stuart.archibald@googlemail.com>
Date: Thu, 2 Jan 2020 10:44:12 +0000
Subject: [PATCH 62/68] refactor

---
 numba/unicode.py | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/numba/unicode.py b/numba/unicode.py
index d0cb17181f1..e210c69fa51 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -2001,8 +2001,8 @@ def impl(data):
 overload_method(types.UnicodeType, 'isprintable')(
     gen_isX(_PyUnicode_IsPrintable, False))
 
-
-def generate_operation_func(ascii_func, unicode_nres_func):
+# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9863-L9908    # noqa: E501
+def case_operation(ascii_func, unicode_func):
     """Generate common case operation performer."""
     def impl(data):
         length = len(data)
@@ -2012,14 +2012,13 @@ def impl(data):
         if data._is_ascii:
             res = _empty_string(data._kind, length, 1)
             ascii_func(data, res)
-
             return res
 
         # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9863-L9908    # noqa: E501
         tmp = _empty_string(PY_UNICODE_4BYTE_KIND, 3 * length, data._is_ascii)
         # maxchar should be inside of a list to be pass as argument by reference
         maxchars = [0]
-        newlength = unicode_nres_func(data, length, tmp, maxchars)
+        newlength = unicode_func(data, length, tmp, maxchars)
         maxchar = maxchars[0]
         newkind = _codepoint_to_kind(maxchar)
         res = _empty_string(newkind, newlength, _codepoint_is_ascii(maxchar))
@@ -2030,9 +2029,9 @@ def impl(data):
 
     return impl
 
-
+# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9819-L9834    # noqa: E501
 @register_jitable
-def _unicode_casefold_doer(data, length, res, maxchars):
+def _unicode_casefold(data, length, res, maxchars):
     k = 0
     mapped = np.zeros(3, dtype=_Py_UCS4)
     for idx in range(length):
@@ -2049,23 +2048,19 @@ def _unicode_casefold_doer(data, length, res, maxchars):
 
 
 @register_jitable
-def _ascii_casefold_doer(data, res):
+def _ascii_casefold(data, res):
     for idx in range(len(data)):
         code_point = _get_code_point(data, idx)
         _set_code_point(res, idx, _Py_TOLOWER(code_point))
 
 
-_do_casefold = register_jitable(generate_operation_func(_ascii_casefold_doer,
-                                                        _unicode_casefold_doer))
-
-
 # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L10782-L10791    # noqa: E501
 # mixed with
 # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9819-L9834    # noqa: E501
 @overload_method(types.UnicodeType, 'casefold')
 def unicode_casefold(data):
     """Implements str.casefold()"""
-    return _do_casefold
+    return case_operation(_ascii_casefold, _unicode_casefold)
 
 
 if sys.version_info[:2] >= (3, 7):

From 21a3f70c68e712b90753d7b136cd263a2259494b Mon Sep 17 00:00:00 2001
From: Stuart Archibald <stuart.archibald@googlemail.com>
Date: Thu, 2 Jan 2020 11:04:54 +0000
Subject: [PATCH 63/68] Fix title() to use ascii shortcut

---
 numba/unicode.py | 40 ++++++++++++++++++++++++----------------
 1 file changed, 24 insertions(+), 16 deletions(-)

diff --git a/numba/unicode.py b/numba/unicode.py
index e210c69fa51..34948d211d9 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -2001,6 +2001,7 @@ def impl(data):
 overload_method(types.UnicodeType, 'isprintable')(
     gen_isX(_PyUnicode_IsPrintable, False))
 
+
 # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9863-L9908    # noqa: E501
 def case_operation(ascii_func, unicode_func):
     """Generate common case operation performer."""
@@ -2174,11 +2175,11 @@ def _lower_ucs4(code_point, data, length, idx, mapped):
 
 # https://github.com/python/cpython/blob/201c8f79450628241574fba940e08107178dc3a5/Objects/unicodeobject.c#L9996-L10021    # noqa: E501
 @register_jitable
-def _do_title(data, length, res, maxchars):
+def _unicode_title(data, length, res, maxchars):
     """This is a translation of the function that titles a unicode string."""
     k = 0
     previous_cased = False
-    mapped = np.zeros(3, dtype=_Py_UCS4)
+    mapped = np.empty(3, dtype=_Py_UCS4)
     for idx in range(length):
         mapped.fill(0)
         code_point = _get_code_point(data, idx)
@@ -2195,25 +2196,32 @@ def _do_title(data, length, res, maxchars):
     return k
 
 
+# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/bytes_methods.c#L332-L352    # noqa: E501
+@register_jitable
+def _ascii_title(data, res):
+    """ Does .title() on an ASCII string """
+    previous_is_cased = False
+    for idx in range(len(data)):
+        code_point = _get_code_point(data, idx)
+        if _Py_ISLOWER(code_point):
+            if not previous_is_cased:
+                code_point = _Py_TOUPPER(code_point)
+            previous_is_cased = True
+        elif _Py_ISUPPER(code_point):
+            if previous_is_cased:
+                code_point = _Py_TOLOWER(code_point)
+            previous_is_cased = True
+        else:
+            previous_is_cased = False
+        _set_code_point(res, idx, code_point)
+
+
 # https://github.com/python/cpython/blob/201c8f79450628241574fba940e08107178dc3a5/Objects/unicodeobject.c#L10023-L10069    # noqa: E501
 @overload_method(types.UnicodeType, 'title')
 def unicode_title(data):
     """Implements str.title()"""
     # https://docs.python.org/3/library/stdtypes.html#str.title
-    def impl(data):
-        length = len(data)
-        tmp = _empty_string(PY_UNICODE_4BYTE_KIND, 3 * length, data._is_ascii)
-        # maxchar should be inside of a list to be pass as argument by reference
-        maxchar = 0
-        maxchars = [maxchar]
-        newlength = _do_title(data, length, tmp, maxchars)
-        maxchar, = maxchars
-        newkind = _codepoint_to_kind(maxchar)
-        res = _empty_string(newkind, newlength, _codepoint_is_ascii(maxchar))
-        for i in range(newlength):
-            _set_code_point(res, i, _get_code_point(tmp, i))
-        return res
-    return impl
+    return case_operation(_ascii_title, _unicode_title)
 
 
 # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L13140-L13147    # noqa: E501

From a848a1e40c405370f4b758af85ae33c0529aab14 Mon Sep 17 00:00:00 2001
From: Stuart Archibald <stuart.archibald@googlemail.com>
Date: Thu, 2 Jan 2020 13:51:18 +0000
Subject: [PATCH 64/68] Refactor unicode.capitalize

---
 numba/unicode.py | 87 ++++++++++++++++++++----------------------------
 1 file changed, 37 insertions(+), 50 deletions(-)

diff --git a/numba/unicode.py b/numba/unicode.py
index 34948d211d9..5fc67c55275 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -1763,63 +1763,50 @@ def impl(data):
                                                         _unicode_is_alnum))
 
 
-# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L10765-L10774    # noqa: E501
-@overload_method(types.UnicodeType, 'capitalize')
-def unicode_capitalize(data):
-    """Implements str.capitalize()"""
-    def impl(data):
-        length = len(data)
-        if length == 0:
-            return _empty_string(data._kind, length, data._is_ascii)
-
-        if data._is_ascii:
-            # https://github.com/python/cpython/blob/593bb30e82eded7f2ec02f7d1aa49742e6962113/Objects/bytes_methods.c#L361-L368    # noqa: E501
-            # mixed with:
-            # https://github.com/python/cpython/blob/593bb30e82eded7f2ec02f7d1aa49742e6962113/Objects/bytes_methods.c#L299-L307    # noqa: E501
-            res = _empty_string(data._kind, length, 1)
-            code_point = _get_code_point(data, 0)
-            _set_code_point(res, 0, _Py_TOUPPER(code_point))
-            for idx in range(1, length):
-                code_point = _get_code_point(data, idx)
-                _set_code_point(res, idx, _Py_TOLOWER(code_point))
-
-            return res
-
-        # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9863-L9908    # noqa: E501
-        # mixed with:
-        # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9737-L9759    # noqa: E501
-        k = 0
-        maxchar = 0
-        mapped = np.zeros(3, dtype=_Py_UCS4)
-        tmp = _empty_string(PY_UNICODE_4BYTE_KIND, 3 * length)
-        code_point = _get_code_point(data, 0)
+@register_jitable
+# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9737-L9759    # noqa: E501
+def _unicode_capitalize(data, length, res, maxchars):
+    k = 0
+    maxchar = 0
+    mapped = np.zeros(3, dtype=_Py_UCS4)
+    code_point = _get_code_point(data, 0)
 
-        # https://github.com/python/cpython/commit/b015fc86f7b1f35283804bfee788cce0a5495df7/Objects/unicodeobject.c#diff-220e5da0d1c8abf508b25c02da6ca16c    # noqa: E501
-        if _py38_or_later:
-            n_res = _PyUnicode_ToTitleFull(code_point, mapped)
-        else:
-            n_res = _PyUnicode_ToUpperFull(code_point, mapped)
+    # https://github.com/python/cpython/commit/b015fc86f7b1f35283804bfee788cce0a5495df7/Objects/unicodeobject.c#diff-220e5da0d1c8abf508b25c02da6ca16c    # noqa: E501
+    if _py38_or_later:
+        n_res = _PyUnicode_ToTitleFull(code_point, mapped)
+    else:
+        n_res = _PyUnicode_ToUpperFull(code_point, mapped)
 
+    for m in mapped[:n_res]:
+        maxchar = max(maxchar, m)
+        _set_code_point(res, k, m)
+        k += 1
+    for idx in range(1, length):
+        mapped.fill(0)
+        code_point = _get_code_point(data, idx)
+        n_res = _lower_ucs4(code_point, data, length, idx, mapped)
         for m in mapped[:n_res]:
             maxchar = max(maxchar, m)
-            _set_code_point(tmp, k, m)
+            _set_code_point(res, k, m)
             k += 1
-        for idx in range(1, length):
-            mapped.fill(0)
-            code_point = _get_code_point(data, idx)
-            n_res = _lower_ucs4(code_point, data, length, idx, mapped)
-            for m in mapped[:n_res]:
-                maxchar = max(maxchar, m)
-                _set_code_point(tmp, k, m)
-                k += 1
-        newkind = _codepoint_to_kind(maxchar)
-        res = _empty_string(newkind, k)
-        for i in range(k):
-            _set_code_point(res, i, _get_code_point(tmp, i))
+    maxchars[0] = maxchar
+    return k
 
-        return res
 
-    return impl
+# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/bytes_methods.c#L361-L382    # noqa: E501
+@register_jitable
+def _ascii_capitalize(data, res):
+    code_point = _get_code_point(data, 0)
+    _set_code_point(res, 0, _Py_TOUPPER(code_point))
+    for idx in range(1, len(data)):
+        code_point = _get_code_point(data, idx)
+        _set_code_point(res, idx, _Py_TOLOWER(code_point))
+
+
+# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L10765-L10774    # noqa: E501
+@overload_method(types.UnicodeType, 'capitalize')
+def unicode_capitalize(data):
+    return case_operation(_ascii_capitalize, _unicode_capitalize)
 
 
 def _is_upper(is_lower, is_upper, is_title):

From c785737b1cb9996f5877306c7280e619cd397336 Mon Sep 17 00:00:00 2001
From: Stuart Archibald <stuart.archibald@googlemail.com>
Date: Thu, 2 Jan 2020 14:08:19 +0000
Subject: [PATCH 65/68] Fix ordering of functions

---
 numba/unicode.py | 242 +++++++++++++++++++++++------------------------
 1 file changed, 121 insertions(+), 121 deletions(-)

diff --git a/numba/unicode.py b/numba/unicode.py
index 5fc67c55275..408e69865f8 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -1763,52 +1763,6 @@ def impl(data):
                                                         _unicode_is_alnum))
 
 
-@register_jitable
-# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9737-L9759    # noqa: E501
-def _unicode_capitalize(data, length, res, maxchars):
-    k = 0
-    maxchar = 0
-    mapped = np.zeros(3, dtype=_Py_UCS4)
-    code_point = _get_code_point(data, 0)
-
-    # https://github.com/python/cpython/commit/b015fc86f7b1f35283804bfee788cce0a5495df7/Objects/unicodeobject.c#diff-220e5da0d1c8abf508b25c02da6ca16c    # noqa: E501
-    if _py38_or_later:
-        n_res = _PyUnicode_ToTitleFull(code_point, mapped)
-    else:
-        n_res = _PyUnicode_ToUpperFull(code_point, mapped)
-
-    for m in mapped[:n_res]:
-        maxchar = max(maxchar, m)
-        _set_code_point(res, k, m)
-        k += 1
-    for idx in range(1, length):
-        mapped.fill(0)
-        code_point = _get_code_point(data, idx)
-        n_res = _lower_ucs4(code_point, data, length, idx, mapped)
-        for m in mapped[:n_res]:
-            maxchar = max(maxchar, m)
-            _set_code_point(res, k, m)
-            k += 1
-    maxchars[0] = maxchar
-    return k
-
-
-# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/bytes_methods.c#L361-L382    # noqa: E501
-@register_jitable
-def _ascii_capitalize(data, res):
-    code_point = _get_code_point(data, 0)
-    _set_code_point(res, 0, _Py_TOUPPER(code_point))
-    for idx in range(1, len(data)):
-        code_point = _get_code_point(data, idx)
-        _set_code_point(res, idx, _Py_TOLOWER(code_point))
-
-
-# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L10765-L10774    # noqa: E501
-@overload_method(types.UnicodeType, 'capitalize')
-def unicode_capitalize(data):
-    return case_operation(_ascii_capitalize, _unicode_capitalize)
-
-
 def _is_upper(is_lower, is_upper, is_title):
     # impl is an approximate translation of:
     # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L11794-L11827    # noqa: E501
@@ -2051,81 +2005,6 @@ def unicode_casefold(data):
     return case_operation(_ascii_casefold, _unicode_casefold)
 
 
-if sys.version_info[:2] >= (3, 7):
-    @overload_method(types.UnicodeType, 'isascii')
-    def unicode_isascii(data):
-        """Implements UnicodeType.isascii()"""
-
-        def impl(data):
-            return data._is_ascii
-        return impl
-
-
-@overload_method(types.UnicodeType, 'istitle')
-def unicode_istitle(data):
-    """
-    Implements UnicodeType.istitle()
-    The algorithm is an approximate translation from CPython:
-    https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L11829-L11885 # noqa: E501
-    """
-
-    def impl(data):
-        length = len(data)
-        if length == 1:
-            char = _get_code_point(data, 0)
-            return _PyUnicode_IsUppercase(char) or _PyUnicode_IsTitlecase(char)
-
-        if length == 0:
-            return False
-
-        cased = False
-        previous_is_cased = False
-        for idx in range(length):
-            char = _get_code_point(data, idx)
-            if _PyUnicode_IsUppercase(char) or _PyUnicode_IsTitlecase(char):
-                if previous_is_cased:
-                    return False
-                previous_is_cased = True
-                cased = True
-            elif _PyUnicode_IsLowercase(char):
-                if not previous_is_cased:
-                    return False
-                previous_is_cased = True
-                cased = True
-            else:
-                previous_is_cased = False
-
-        return cased
-    return impl
-
-
-@overload_method(types.UnicodeType, 'islower')
-def unicode_islower(data):
-    """
-    impl is an approximate translation of:
-    https://github.com/python/cpython/blob/201c8f79450628241574fba940e08107178dc3a5/Objects/unicodeobject.c#L11900-L11933    # noqa: E501
-    mixed with:
-    https://github.com/python/cpython/blob/201c8f79450628241574fba940e08107178dc3a5/Objects/bytes_methods.c#L131-L156    # noqa: E501
-    """
-
-    def impl(data):
-        length = len(data)
-        if length == 1:
-            return _PyUnicode_IsLowercase(_get_code_point(data, 0))
-        if length == 0:
-            return False
-
-        cased = False
-        for idx in range(length):
-            cp = _get_code_point(data, idx)
-            if _PyUnicode_IsUppercase(cp) or _PyUnicode_IsTitlecase(cp):
-                return False
-            elif not cased and _PyUnicode_IsLowercase(cp):
-                cased = True
-        return cased
-    return impl
-
-
 # https://github.com/python/cpython/blob/201c8f79450628241574fba940e08107178dc3a5/Objects/unicodeobject.c#L9856-L9883    # noqa: E501
 @register_jitable
 def _handle_capital_sigma(data, length, idx):
@@ -2160,6 +2039,52 @@ def _lower_ucs4(code_point, data, length, idx, mapped):
     return _PyUnicode_ToLowerFull(code_point, mapped)
 
 
+@register_jitable
+# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9737-L9759    # noqa: E501
+def _unicode_capitalize(data, length, res, maxchars):
+    k = 0
+    maxchar = 0
+    mapped = np.zeros(3, dtype=_Py_UCS4)
+    code_point = _get_code_point(data, 0)
+
+    # https://github.com/python/cpython/commit/b015fc86f7b1f35283804bfee788cce0a5495df7/Objects/unicodeobject.c#diff-220e5da0d1c8abf508b25c02da6ca16c    # noqa: E501
+    if _py38_or_later:
+        n_res = _PyUnicode_ToTitleFull(code_point, mapped)
+    else:
+        n_res = _PyUnicode_ToUpperFull(code_point, mapped)
+
+    for m in mapped[:n_res]:
+        maxchar = max(maxchar, m)
+        _set_code_point(res, k, m)
+        k += 1
+    for idx in range(1, length):
+        mapped.fill(0)
+        code_point = _get_code_point(data, idx)
+        n_res = _lower_ucs4(code_point, data, length, idx, mapped)
+        for m in mapped[:n_res]:
+            maxchar = max(maxchar, m)
+            _set_code_point(res, k, m)
+            k += 1
+    maxchars[0] = maxchar
+    return k
+
+
+# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/bytes_methods.c#L361-L382    # noqa: E501
+@register_jitable
+def _ascii_capitalize(data, res):
+    code_point = _get_code_point(data, 0)
+    _set_code_point(res, 0, _Py_TOUPPER(code_point))
+    for idx in range(1, len(data)):
+        code_point = _get_code_point(data, idx)
+        _set_code_point(res, idx, _Py_TOLOWER(code_point))
+
+
+# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L10765-L10774    # noqa: E501
+@overload_method(types.UnicodeType, 'capitalize')
+def unicode_capitalize(data):
+    return case_operation(_ascii_capitalize, _unicode_capitalize)
+
+
 # https://github.com/python/cpython/blob/201c8f79450628241574fba940e08107178dc3a5/Objects/unicodeobject.c#L9996-L10021    # noqa: E501
 @register_jitable
 def _unicode_title(data, length, res, maxchars):
@@ -2211,6 +2136,81 @@ def unicode_title(data):
     return case_operation(_ascii_title, _unicode_title)
 
 
+if sys.version_info[:2] >= (3, 7):
+    @overload_method(types.UnicodeType, 'isascii')
+    def unicode_isascii(data):
+        """Implements UnicodeType.isascii()"""
+
+        def impl(data):
+            return data._is_ascii
+        return impl
+
+
+@overload_method(types.UnicodeType, 'istitle')
+def unicode_istitle(data):
+    """
+    Implements UnicodeType.istitle()
+    The algorithm is an approximate translation from CPython:
+    https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L11829-L11885 # noqa: E501
+    """
+
+    def impl(data):
+        length = len(data)
+        if length == 1:
+            char = _get_code_point(data, 0)
+            return _PyUnicode_IsUppercase(char) or _PyUnicode_IsTitlecase(char)
+
+        if length == 0:
+            return False
+
+        cased = False
+        previous_is_cased = False
+        for idx in range(length):
+            char = _get_code_point(data, idx)
+            if _PyUnicode_IsUppercase(char) or _PyUnicode_IsTitlecase(char):
+                if previous_is_cased:
+                    return False
+                previous_is_cased = True
+                cased = True
+            elif _PyUnicode_IsLowercase(char):
+                if not previous_is_cased:
+                    return False
+                previous_is_cased = True
+                cased = True
+            else:
+                previous_is_cased = False
+
+        return cased
+    return impl
+
+
+@overload_method(types.UnicodeType, 'islower')
+def unicode_islower(data):
+    """
+    impl is an approximate translation of:
+    https://github.com/python/cpython/blob/201c8f79450628241574fba940e08107178dc3a5/Objects/unicodeobject.c#L11900-L11933    # noqa: E501
+    mixed with:
+    https://github.com/python/cpython/blob/201c8f79450628241574fba940e08107178dc3a5/Objects/bytes_methods.c#L131-L156    # noqa: E501
+    """
+
+    def impl(data):
+        length = len(data)
+        if length == 1:
+            return _PyUnicode_IsLowercase(_get_code_point(data, 0))
+        if length == 0:
+            return False
+
+        cased = False
+        for idx in range(length):
+            cp = _get_code_point(data, idx)
+            if _PyUnicode_IsUppercase(cp) or _PyUnicode_IsTitlecase(cp):
+                return False
+            elif not cased and _PyUnicode_IsLowercase(cp):
+                cased = True
+        return cased
+    return impl
+
+
 # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L13140-L13147    # noqa: E501
 @overload_method(types.UnicodeType, 'swapcase')
 def unicode_swapcase(data):

From 51f03cf722b02db1bd42174720cde2e0c99f52ae Mon Sep 17 00:00:00 2001
From: Stuart Archibald <stuart.archibald@googlemail.com>
Date: Thu, 2 Jan 2020 16:07:38 +0000
Subject: [PATCH 66/68] refactor swapcase

---
 numba/unicode.py | 93 +++++++++++++++++++++---------------------------
 1 file changed, 41 insertions(+), 52 deletions(-)

diff --git a/numba/unicode.py b/numba/unicode.py
index 408e69865f8..348d14e5124 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -2136,6 +2136,47 @@ def unicode_title(data):
     return case_operation(_ascii_title, _unicode_title)
 
 
+# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/bytes_methods.c#L391-L408    # noqa: E501
+@register_jitable
+def _ascii_swapcase(data, res):
+    for idx in range(len(data)):
+        code_point = _get_code_point(data, idx)
+        if _Py_ISUPPER(code_point):
+            code_point = _Py_TOLOWER(code_point)
+        elif _Py_ISLOWER(code_point):
+            code_point = _Py_TOUPPER(code_point)
+        _set_code_point(res, idx, code_point)
+
+
+# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9761-L9784    # noqa: E501
+@register_jitable
+def _unicode_swapcase(data, length, res, maxchars):
+    k = 0
+    maxchar = 0
+    mapped = np.empty(3, dtype=_Py_UCS4)
+    for idx in range(length):
+        mapped.fill(0)
+        code_point = _get_code_point(data, idx)
+        if _PyUnicode_IsUppercase(code_point):
+            n_res = _lower_ucs4(code_point, data, length, idx, mapped)
+        elif _PyUnicode_IsLowercase(code_point):
+            n_res = _PyUnicode_ToUpperFull(code_point, mapped)
+        else:
+            n_res = 1
+            mapped[0] = code_point
+        for m in mapped[:n_res]:
+            maxchar = max(maxchar, m)
+            _set_code_point(res, k, m)
+            k += 1
+    maxchars[0] = maxchar
+    return k
+
+
+@overload_method(types.UnicodeType, 'swapcase')
+def unicode_swapcase(data):
+    return case_operation(_ascii_swapcase, _unicode_swapcase)
+
+
 if sys.version_info[:2] >= (3, 7):
     @overload_method(types.UnicodeType, 'isascii')
     def unicode_isascii(data):
@@ -2211,58 +2252,6 @@ def impl(data):
     return impl
 
 
-# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L13140-L13147    # noqa: E501
-@overload_method(types.UnicodeType, 'swapcase')
-def unicode_swapcase(data):
-    """Implements str.swapcase()"""
-    def impl(data):
-        length = len(data)
-        if length == 0:
-            return _empty_string(data._kind, length, data._is_ascii)
-
-        if data._is_ascii:
-            res = _empty_string(data._kind, length, 1)
-            for idx in range(length):
-                code_point = _get_code_point(data, idx)
-                if _Py_ISUPPER(code_point):
-                    code_point = _Py_TOLOWER(code_point)
-                elif _Py_ISLOWER(code_point):
-                    code_point = _Py_TOUPPER(code_point)
-                _set_code_point(res, idx, code_point)
-
-            return res
-
-        # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9863-L9908    # noqa: E501
-        # mixed with:
-        # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9761-L9784    # noqa: E501
-        k = 0
-        maxchar = 0
-        mapped = np.zeros(3, dtype=_Py_UCS4)
-        tmp = _empty_string(PY_UNICODE_4BYTE_KIND, 3 * length)
-        for idx in range(length):
-            mapped.fill(0)
-            code_point = _get_code_point(data, idx)
-            if _PyUnicode_IsUppercase(code_point):
-                n_res = _lower_ucs4(code_point, data, length, idx, mapped)
-            elif _PyUnicode_IsLowercase(code_point):
-                n_res = _PyUnicode_ToUpperFull(code_point, mapped)
-            else:
-                n_res = 1
-                mapped[0] = code_point
-            for m in mapped[:n_res]:
-                maxchar = max(maxchar, m)
-                _set_code_point(tmp, k, m)
-                k += 1
-        newkind = _codepoint_to_kind(maxchar)
-        res = _empty_string(newkind, k)
-        for i in range(k):
-            _set_code_point(res, i, _get_code_point(tmp, i))
-
-        return res
-
-    return impl
-
-
 # https://github.com/python/cpython/blob/201c8f79450628241574fba940e08107178dc3a5/Objects/unicodeobject.c#L9946-L9965    # noqa: E501
 @register_jitable
 def _do_upper_or_lower(data, length, res, maxchars, lower):

From 378d87e4be91140b60a28e06332ae0c31d2b9874 Mon Sep 17 00:00:00 2001
From: Stuart Archibald <stuart.archibald@googlemail.com>
Date: Thu, 2 Jan 2020 16:46:29 +0000
Subject: [PATCH 67/68] Move functions around

---
 numba/unicode.py | 478 ++++++++++++++++++++++++-----------------------
 1 file changed, 246 insertions(+), 232 deletions(-)

diff --git a/numba/unicode.py b/numba/unicode.py
index 348d14e5124..f60cd7a0fa9 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -1369,30 +1369,9 @@ def zfill_impl(string, width):
     return zfill_impl
 
 
-# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L12126-L12161    # noqa: E501
-@overload_method(types.UnicodeType, 'isidentifier')
-def unicode_isidentifier(data):
-    """Implements UnicodeType.isidentifier()"""
-
-    def impl(data):
-        length = len(data)
-        if length == 0:
-            return False
-
-        first_cp = _get_code_point(data, 0)
-        if not _PyUnicode_IsXidStart(first_cp) and first_cp != 0x5F:
-            return False
-
-        for i in range(1, length):
-            code_point = _get_code_point(data, i)
-            if not _PyUnicode_IsXidContinue(code_point):
-                return False
-
-        return True
-
-    return impl
-
-
+# ------------------------------------------------------------------------------
+# Strip functions
+# ------------------------------------------------------------------------------
 @register_jitable
 def unicode_strip_left_bound(string, chars):
     chars = ' ' if chars is None else chars
@@ -1481,7 +1460,9 @@ def strip_impl(string, chars=None):
     return strip_impl
 
 
-# String creation
+# ------------------------------------------------------------------------------
+# Slice functions
+# ------------------------------------------------------------------------------
 
 @register_jitable
 def normalize_str_idx(idx, length, is_start=True):
@@ -1648,6 +1629,11 @@ def getitem_slice(s, idx):
             return getitem_slice
 
 
+# ------------------------------------------------------------------------------
+# String operations
+# ------------------------------------------------------------------------------
+
+
 @overload(operator.add)
 @overload(operator.iadd)
 def unicode_concat(a, b):
@@ -1717,6 +1703,55 @@ def impl(a):
         return impl
 
 
+@overload_method(types.UnicodeType, 'replace')
+def unicode_replace(s, old_str, new_str, count=-1):
+    thety = count
+    if isinstance(count, types.Omitted):
+        thety = count.value
+    elif isinstance(count, types.Optional):
+        thety = count.type
+
+    if not isinstance(thety, (int, types.Integer)):
+        raise TypingError('Unsupported parameters. The parametrs '
+                          'must be Integer. Given count: {}'.format(count))
+
+    if not isinstance(old_str, (types.UnicodeType, types.NoneType)):
+        raise TypingError('The object must be a UnicodeType.'
+                          ' Given: {}'.format(old_str))
+
+    if not isinstance(new_str, types.UnicodeType):
+        raise TypingError('The object must be a UnicodeType.'
+                          ' Given: {}'.format(new_str))
+
+    def impl(s, old_str, new_str, count=-1):
+        if count == 0:
+            return s
+        if old_str == '':
+            schars = list(s)
+            if count == -1:
+                return new_str + new_str.join(schars) + new_str
+            split_result = [new_str]
+            min_count = min(len(schars), count)
+            for i in range(min_count):
+                split_result.append(schars[i])
+                if i + 1 != min_count:
+                    split_result.append(new_str)
+                else:
+                    split_result.append(''.join(schars[(i + 1):]))
+            if count > len(schars):
+                split_result.append(new_str)
+            return ''.join(split_result)
+        schars = s.split(old_str, count)
+        result = new_str.join(schars)
+        return result
+
+    return impl
+
+# ------------------------------------------------------------------------------
+# String `is*()` methods
+# ------------------------------------------------------------------------------
+
+
 # generates isalpha/isalnum
 def gen_isAlX(ascii_func, unicode_func):
     def unicode_isAlX(data):
@@ -1793,51 +1828,6 @@ def impl(a):
                                                _PyUnicode_IsTitlecase))
 
 
-@overload_method(types.UnicodeType, 'replace')
-def unicode_replace(s, old_str, new_str, count=-1):
-    thety = count
-    if isinstance(count, types.Omitted):
-        thety = count.value
-    elif isinstance(count, types.Optional):
-        thety = count.type
-
-    if not isinstance(thety, (int, types.Integer)):
-        raise TypingError('Unsupported parameters. The parametrs '
-                          'must be Integer. Given count: {}'.format(count))
-
-    if not isinstance(old_str, (types.UnicodeType, types.NoneType)):
-        raise TypingError('The object must be a UnicodeType.'
-                          ' Given: {}'.format(old_str))
-
-    if not isinstance(new_str, types.UnicodeType):
-        raise TypingError('The object must be a UnicodeType.'
-                          ' Given: {}'.format(new_str))
-
-    def impl(s, old_str, new_str, count=-1):
-        if count == 0:
-            return s
-        if old_str == '':
-            schars = list(s)
-            if count == -1:
-                return new_str + new_str.join(schars) + new_str
-            split_result = [new_str]
-            min_count = min(len(schars), count)
-            for i in range(min_count):
-                split_result.append(schars[i])
-                if i + 1 != min_count:
-                    split_result.append(new_str)
-                else:
-                    split_result.append(''.join(schars[(i + 1):]))
-            if count > len(schars):
-                split_result.append(new_str)
-            return ''.join(split_result)
-        schars = s.split(old_str, count)
-        result = new_str.join(schars)
-        return result
-
-    return impl
-
-
 @overload_method(types.UnicodeType, 'isupper')
 def unicode_isupper(a):
     """
@@ -1851,59 +1841,106 @@ def impl(a):
     return impl
 
 
-@overload_method(types.UnicodeType, 'upper')
-def unicode_upper(a):
+if sys.version_info[:2] >= (3, 7):
+    @overload_method(types.UnicodeType, 'isascii')
+    def unicode_isascii(data):
+        """Implements UnicodeType.isascii()"""
+
+        def impl(data):
+            return data._is_ascii
+        return impl
+
+
+@overload_method(types.UnicodeType, 'istitle')
+def unicode_istitle(data):
     """
-    Implements .upper()
+    Implements UnicodeType.istitle()
+    The algorithm is an approximate translation from CPython:
+    https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L11829-L11885 # noqa: E501
     """
-    def impl(a):
-        # main structure is a translation of:
-        # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L13308-L13316    # noqa: E501
 
-        # ASCII fast path
-        l = len(a)
-        if a._is_ascii:
-            # This is an approximate translation of:
-            # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/bytes_methods.c#L300    # noqa: E501
-            ret = _empty_string(a._kind, l, a._is_ascii)
-            for idx in range(l):
-                code_point = _get_code_point(a, idx)
-                _set_code_point(ret, idx, _Py_TOUPPER(code_point))
-            return ret
-        else:
-            # This part in an amalgamation of two algorithms:
-            # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9864-L9908    # noqa: E501
-            # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9787-L9805    # noqa: E501
-            #
-            # The alg walks the string and writes the upper version of the code
-            # point into a 4byte kind unicode string and at the same time
-            # tracks the maximum width "upper" character encountered, following
-            # this the 4byte kind string is reinterpreted as needed into the
-            # maximum width kind string
-            tmp = _empty_string(PY_UNICODE_4BYTE_KIND, 3 * l, a._is_ascii)
-            mapped = np.array((3,), dtype=_Py_UCS4)
-            maxchar = 0
-            k = 0
-            for idx in range(l):
-                mapped[:] = 0
-                code_point = _get_code_point(a, idx)
-                n_res = _PyUnicode_ToUpperFull(_Py_UCS4(code_point), mapped)
-                for j in range(n_res):
-                    maxchar = max(maxchar, mapped[j])
-                    _set_code_point(tmp, k, mapped[j])
-                    k += 1
-            newlength = k
-            newkind = _codepoint_to_kind(maxchar)
-            ret = _empty_string(newkind, newlength,
-                                _codepoint_is_ascii(maxchar))
-            for i in range(newlength):
-                _set_code_point(ret, i, _get_code_point(tmp, i))
-            return ret
+    def impl(data):
+        length = len(data)
+        if length == 1:
+            char = _get_code_point(data, 0)
+            return _PyUnicode_IsUppercase(char) or _PyUnicode_IsTitlecase(char)
+
+        if length == 0:
+            return False
+
+        cased = False
+        previous_is_cased = False
+        for idx in range(length):
+            char = _get_code_point(data, idx)
+            if _PyUnicode_IsUppercase(char) or _PyUnicode_IsTitlecase(char):
+                if previous_is_cased:
+                    return False
+                previous_is_cased = True
+                cased = True
+            elif _PyUnicode_IsLowercase(char):
+                if not previous_is_cased:
+                    return False
+                previous_is_cased = True
+                cased = True
+            else:
+                previous_is_cased = False
+
+        return cased
     return impl
 
-# generator for simple unicode "isX" methods
 
+@overload_method(types.UnicodeType, 'islower')
+def unicode_islower(data):
+    """
+    impl is an approximate translation of:
+    https://github.com/python/cpython/blob/201c8f79450628241574fba940e08107178dc3a5/Objects/unicodeobject.c#L11900-L11933    # noqa: E501
+    mixed with:
+    https://github.com/python/cpython/blob/201c8f79450628241574fba940e08107178dc3a5/Objects/bytes_methods.c#L131-L156    # noqa: E501
+    """
+
+    def impl(data):
+        length = len(data)
+        if length == 1:
+            return _PyUnicode_IsLowercase(_get_code_point(data, 0))
+        if length == 0:
+            return False
+
+        cased = False
+        for idx in range(length):
+            cp = _get_code_point(data, idx)
+            if _PyUnicode_IsUppercase(cp) or _PyUnicode_IsTitlecase(cp):
+                return False
+            elif not cased and _PyUnicode_IsLowercase(cp):
+                cased = True
+        return cased
+    return impl
+
+
+# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L12126-L12161    # noqa: E501
+@overload_method(types.UnicodeType, 'isidentifier')
+def unicode_isidentifier(data):
+    """Implements UnicodeType.isidentifier()"""
+
+    def impl(data):
+        length = len(data)
+        if length == 0:
+            return False
+
+        first_cp = _get_code_point(data, 0)
+        if not _PyUnicode_IsXidStart(first_cp) and first_cp != 0x5F:
+            return False
 
+        for i in range(1, length):
+            code_point = _get_code_point(data, i)
+            if not _PyUnicode_IsXidContinue(code_point):
+                return False
+
+        return True
+
+    return impl
+
+
+# generator for simple unicode "isX" methods
 def gen_isX(_PyUnicode_IS_func, empty_is_false=True):
     def unicode_isX(data):
         def impl(data):
@@ -1942,6 +1979,10 @@ def impl(data):
 overload_method(types.UnicodeType, 'isprintable')(
     gen_isX(_PyUnicode_IsPrintable, False))
 
+# ------------------------------------------------------------------------------
+# String methods that apply a transformation to the characters themselves
+# ------------------------------------------------------------------------------
+
 
 # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9863-L9908    # noqa: E501
 def case_operation(ascii_func, unicode_func):
@@ -1971,6 +2012,94 @@ def impl(data):
 
     return impl
 
+
+@overload_method(types.UnicodeType, 'upper')
+def unicode_upper(a):
+    """
+    Implements .upper()
+    """
+    def impl(a):
+        # main structure is a translation of:
+        # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L13308-L13316    # noqa: E501
+
+        # ASCII fast path
+        l = len(a)
+        if a._is_ascii:
+            # This is an approximate translation of:
+            # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/bytes_methods.c#L300    # noqa: E501
+            ret = _empty_string(a._kind, l, a._is_ascii)
+            for idx in range(l):
+                code_point = _get_code_point(a, idx)
+                _set_code_point(ret, idx, _Py_TOUPPER(code_point))
+            return ret
+        else:
+            # This part in an amalgamation of two algorithms:
+            # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9864-L9908    # noqa: E501
+            # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9787-L9805    # noqa: E501
+            #
+            # The alg walks the string and writes the upper version of the code
+            # point into a 4byte kind unicode string and at the same time
+            # tracks the maximum width "upper" character encountered, following
+            # this the 4byte kind string is reinterpreted as needed into the
+            # maximum width kind string
+            tmp = _empty_string(PY_UNICODE_4BYTE_KIND, 3 * l, a._is_ascii)
+            mapped = np.array((3,), dtype=_Py_UCS4)
+            maxchar = 0
+            k = 0
+            for idx in range(l):
+                mapped[:] = 0
+                code_point = _get_code_point(a, idx)
+                n_res = _PyUnicode_ToUpperFull(_Py_UCS4(code_point), mapped)
+                for j in range(n_res):
+                    maxchar = max(maxchar, mapped[j])
+                    _set_code_point(tmp, k, mapped[j])
+                    k += 1
+            newlength = k
+            newkind = _codepoint_to_kind(maxchar)
+            ret = _empty_string(newkind, newlength,
+                                _codepoint_is_ascii(maxchar))
+            for i in range(newlength):
+                _set_code_point(ret, i, _get_code_point(tmp, i))
+            return ret
+    return impl
+
+
+@overload_method(types.UnicodeType, 'lower')
+def unicode_lower(data):
+    """Implements .lower()"""
+    def impl(data):
+        # main structure is a translation of:
+        # https://github.com/python/cpython/blob/201c8f79450628241574fba940e08107178dc3a5/Objects/unicodeobject.c#L12380-L12388    # noqa: E501
+
+        # ASCII fast path
+        length = len(data)
+        if data._is_ascii:
+            # This is an approximate translation of:
+            # https://github.com/python/cpython/blob/201c8f79450628241574fba940e08107178dc3a5/Objects/bytes_methods.c#L247-L255    # noqa: E501
+            res = _empty_string(data._kind, length, data._is_ascii)
+            for idx in range(length):
+                code_point = _get_code_point(data, idx)
+                _set_code_point(res, idx, _Py_TOLOWER(code_point))
+            return res
+        else:
+            # This is an approximate translation of:
+            # https://github.com/python/cpython/blob/201c8f79450628241574fba940e08107178dc3a5/Objects/unicodeobject.c#L10023-L10069    # noqa: E501
+            tmp = _empty_string(PY_UNICODE_4BYTE_KIND, 3 * length,
+                                data._is_ascii)
+            # maxchar is inside of a list to be pass as argument by reference
+            maxchars = [0]
+            newlength = _do_upper_or_lower(data, length, tmp, maxchars,
+                                           lower=True)
+            maxchar = maxchars[0]
+            newkind = _codepoint_to_kind(maxchar)
+            res = _empty_string(newkind, newlength,
+                                _codepoint_is_ascii(maxchar))
+            for i in range(newlength):
+                _set_code_point(res, i, _get_code_point(tmp, i))
+            return res
+
+    return impl
+
 # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9819-L9834    # noqa: E501
 @register_jitable
 def _unicode_casefold(data, length, res, maxchars):
@@ -1996,9 +2125,6 @@ def _ascii_casefold(data, res):
         _set_code_point(res, idx, _Py_TOLOWER(code_point))
 
 
-# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L10782-L10791    # noqa: E501
-# mixed with
-# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9819-L9834    # noqa: E501
 @overload_method(types.UnicodeType, 'casefold')
 def unicode_casefold(data):
     """Implements str.casefold()"""
@@ -2039,8 +2165,8 @@ def _lower_ucs4(code_point, data, length, idx, mapped):
     return _PyUnicode_ToLowerFull(code_point, mapped)
 
 
-@register_jitable
 # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9737-L9759    # noqa: E501
+@register_jitable
 def _unicode_capitalize(data, length, res, maxchars):
     k = 0
     maxchar = 0
@@ -2177,81 +2303,6 @@ def unicode_swapcase(data):
     return case_operation(_ascii_swapcase, _unicode_swapcase)
 
 
-if sys.version_info[:2] >= (3, 7):
-    @overload_method(types.UnicodeType, 'isascii')
-    def unicode_isascii(data):
-        """Implements UnicodeType.isascii()"""
-
-        def impl(data):
-            return data._is_ascii
-        return impl
-
-
-@overload_method(types.UnicodeType, 'istitle')
-def unicode_istitle(data):
-    """
-    Implements UnicodeType.istitle()
-    The algorithm is an approximate translation from CPython:
-    https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L11829-L11885 # noqa: E501
-    """
-
-    def impl(data):
-        length = len(data)
-        if length == 1:
-            char = _get_code_point(data, 0)
-            return _PyUnicode_IsUppercase(char) or _PyUnicode_IsTitlecase(char)
-
-        if length == 0:
-            return False
-
-        cased = False
-        previous_is_cased = False
-        for idx in range(length):
-            char = _get_code_point(data, idx)
-            if _PyUnicode_IsUppercase(char) or _PyUnicode_IsTitlecase(char):
-                if previous_is_cased:
-                    return False
-                previous_is_cased = True
-                cased = True
-            elif _PyUnicode_IsLowercase(char):
-                if not previous_is_cased:
-                    return False
-                previous_is_cased = True
-                cased = True
-            else:
-                previous_is_cased = False
-
-        return cased
-    return impl
-
-
-@overload_method(types.UnicodeType, 'islower')
-def unicode_islower(data):
-    """
-    impl is an approximate translation of:
-    https://github.com/python/cpython/blob/201c8f79450628241574fba940e08107178dc3a5/Objects/unicodeobject.c#L11900-L11933    # noqa: E501
-    mixed with:
-    https://github.com/python/cpython/blob/201c8f79450628241574fba940e08107178dc3a5/Objects/bytes_methods.c#L131-L156    # noqa: E501
-    """
-
-    def impl(data):
-        length = len(data)
-        if length == 1:
-            return _PyUnicode_IsLowercase(_get_code_point(data, 0))
-        if length == 0:
-            return False
-
-        cased = False
-        for idx in range(length):
-            cp = _get_code_point(data, idx)
-            if _PyUnicode_IsUppercase(cp) or _PyUnicode_IsTitlecase(cp):
-                return False
-            elif not cased and _PyUnicode_IsLowercase(cp):
-                cased = True
-        return cased
-    return impl
-
-
 # https://github.com/python/cpython/blob/201c8f79450628241574fba940e08107178dc3a5/Objects/unicodeobject.c#L9946-L9965    # noqa: E501
 @register_jitable
 def _do_upper_or_lower(data, length, res, maxchars, lower):
@@ -2271,43 +2322,6 @@ def _do_upper_or_lower(data, length, res, maxchars, lower):
     return k
 
 
-@overload_method(types.UnicodeType, 'lower')
-def unicode_lower(data):
-    """Implements .lower()"""
-    def impl(data):
-        # main structure is a translation of:
-        # https://github.com/python/cpython/blob/201c8f79450628241574fba940e08107178dc3a5/Objects/unicodeobject.c#L12380-L12388    # noqa: E501
-
-        # ASCII fast path
-        length = len(data)
-        if data._is_ascii:
-            # This is an approximate translation of:
-            # https://github.com/python/cpython/blob/201c8f79450628241574fba940e08107178dc3a5/Objects/bytes_methods.c#L247-L255    # noqa: E501
-            res = _empty_string(data._kind, length, data._is_ascii)
-            for idx in range(length):
-                code_point = _get_code_point(data, idx)
-                _set_code_point(res, idx, _Py_TOLOWER(code_point))
-            return res
-        else:
-            # This is an approximate translation of:
-            # https://github.com/python/cpython/blob/201c8f79450628241574fba940e08107178dc3a5/Objects/unicodeobject.c#L10023-L10069    # noqa: E501
-            tmp = _empty_string(PY_UNICODE_4BYTE_KIND, 3 * length,
-                                data._is_ascii)
-            # maxchar is inside of a list to be pass as argument by reference
-            maxchars = [0]
-            newlength = _do_upper_or_lower(data, length, tmp, maxchars,
-                                           lower=True)
-            maxchar = maxchars[0]
-            newkind = _codepoint_to_kind(maxchar)
-            res = _empty_string(newkind, newlength,
-                                _codepoint_is_ascii(maxchar))
-            for i in range(newlength):
-                _set_code_point(res, i, _get_code_point(tmp, i))
-            return res
-
-    return impl
-
-
 @lower_builtin('getiter', types.UnicodeType)
 def getiter_unicode(context, builder, sig, args):
     [ty] = sig.args

From a3970cea4b23bbf3d4a0d84c13ff7515929db8f7 Mon Sep 17 00:00:00 2001
From: Stuart Archibald <stuart.archibald@googlemail.com>
Date: Thu, 2 Jan 2020 17:08:14 +0000
Subject: [PATCH 68/68] Sort out lower/upper

---
 numba/unicode.py | 206 +++++++++++++++++------------------------------
 1 file changed, 76 insertions(+), 130 deletions(-)

diff --git a/numba/unicode.py b/numba/unicode.py
index f60cd7a0fa9..f5b113d795e 100644
--- a/numba/unicode.py
+++ b/numba/unicode.py
@@ -2013,92 +2013,87 @@ def impl(data):
     return impl
 
 
-@overload_method(types.UnicodeType, 'upper')
-def unicode_upper(a):
-    """
-    Implements .upper()
-    """
-    def impl(a):
-        # main structure is a translation of:
-        # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L13308-L13316    # noqa: E501
+# https://github.com/python/cpython/blob/201c8f79450628241574fba940e08107178dc3a5/Objects/unicodeobject.c#L9856-L9883    # noqa: E501
+@register_jitable
+def _handle_capital_sigma(data, length, idx):
+    """This is a translation of the function that handles the capital sigma."""
+    c = 0
+    j = idx - 1
+    while j >= 0:
+        c = _get_code_point(data, j)
+        if not _PyUnicode_IsCaseIgnorable(c):
+            break
+        j -= 1
+    final_sigma = (j >= 0 and _PyUnicode_IsCased(c))
+    if final_sigma:
+        j = idx + 1
+        while j < length:
+            c = _get_code_point(data, j)
+            if not _PyUnicode_IsCaseIgnorable(c):
+                break
+            j += 1
+        final_sigma = (j == length or (not _PyUnicode_IsCased(c)))
+
+    return 0x3c2 if final_sigma else 0x3c3
+
+
+# https://github.com/python/cpython/blob/201c8f79450628241574fba940e08107178dc3a5/Objects/unicodeobject.c#L9885-L9895    # noqa: E501
+@register_jitable
+def _lower_ucs4(code_point, data, length, idx, mapped):
+    """This is a translation of the function that lowers a character."""
+    if code_point == 0x3A3:
+        mapped[0] = _handle_capital_sigma(data, length, idx)
+        return 1
+    return _PyUnicode_ToLowerFull(code_point, mapped)
 
-        # ASCII fast path
-        l = len(a)
-        if a._is_ascii:
-            # This is an approximate translation of:
-            # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/bytes_methods.c#L300    # noqa: E501
-            ret = _empty_string(a._kind, l, a._is_ascii)
-            for idx in range(l):
-                code_point = _get_code_point(a, idx)
-                _set_code_point(ret, idx, _Py_TOUPPER(code_point))
-            return ret
-        else:
-            # This part in an amalgamation of two algorithms:
-            # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9864-L9908    # noqa: E501
-            # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9787-L9805    # noqa: E501
-            #
-            # The alg walks the string and writes the upper version of the code
-            # point into a 4byte kind unicode string and at the same time
-            # tracks the maximum width "upper" character encountered, following
-            # this the 4byte kind string is reinterpreted as needed into the
-            # maximum width kind string
-            tmp = _empty_string(PY_UNICODE_4BYTE_KIND, 3 * l, a._is_ascii)
-            mapped = np.array((3,), dtype=_Py_UCS4)
-            maxchar = 0
-            k = 0
-            for idx in range(l):
-                mapped[:] = 0
-                code_point = _get_code_point(a, idx)
-                n_res = _PyUnicode_ToUpperFull(_Py_UCS4(code_point), mapped)
-                for j in range(n_res):
-                    maxchar = max(maxchar, mapped[j])
-                    _set_code_point(tmp, k, mapped[j])
-                    k += 1
-            newlength = k
-            newkind = _codepoint_to_kind(maxchar)
-            ret = _empty_string(newkind, newlength,
-                                _codepoint_is_ascii(maxchar))
-            for i in range(newlength):
-                _set_code_point(ret, i, _get_code_point(tmp, i))
-            return ret
-    return impl
+
+# https://github.com/python/cpython/blob/201c8f79450628241574fba940e08107178dc3a5/Objects/unicodeobject.c#L9946-L9965    # noqa: E501
+def _gen_unicode_upper_or_lower(lower):
+    def _do_upper_or_lower(data, length, res, maxchars):
+        k = 0
+        for idx in range(length):
+            mapped = np.zeros(3, dtype=_Py_UCS4)
+            code_point = _get_code_point(data, idx)
+            if lower:
+                n_res = _lower_ucs4(code_point, data, length, idx, mapped)
+            else:
+                # might be needed if call _do_upper_or_lower in unicode_upper
+                n_res = _PyUnicode_ToUpperFull(code_point, mapped)
+            for m in mapped[:n_res]:
+                maxchars[0] = max(maxchars[0], m)
+                _set_code_point(res, k, m)
+                k += 1
+        return k
+    return _do_upper_or_lower
+
+
+_unicode_upper = register_jitable(_gen_unicode_upper_or_lower(False))
+_unicode_lower = register_jitable(_gen_unicode_upper_or_lower(True))
+
+
+def _gen_ascii_upper_or_lower(func):
+    def _ascii_upper_or_lower(data, res):
+        for idx in range(len(data)):
+            code_point = _get_code_point(data, idx)
+            _set_code_point(res, idx, func(code_point))
+    return _ascii_upper_or_lower
+
+
+_ascii_upper = register_jitable(_gen_ascii_upper_or_lower(_Py_TOUPPER))
+_ascii_lower = register_jitable(_gen_ascii_upper_or_lower(_Py_TOLOWER))
 
 
 @overload_method(types.UnicodeType, 'lower')
 def unicode_lower(data):
     """Implements .lower()"""
-    def impl(data):
-        # main structure is a translation of:
-        # https://github.com/python/cpython/blob/201c8f79450628241574fba940e08107178dc3a5/Objects/unicodeobject.c#L12380-L12388    # noqa: E501
+    return case_operation(_ascii_lower, _unicode_lower)
 
-        # ASCII fast path
-        length = len(data)
-        if data._is_ascii:
-            # This is an approximate translation of:
-            # https://github.com/python/cpython/blob/201c8f79450628241574fba940e08107178dc3a5/Objects/bytes_methods.c#L247-L255    # noqa: E501
-            res = _empty_string(data._kind, length, data._is_ascii)
-            for idx in range(length):
-                code_point = _get_code_point(data, idx)
-                _set_code_point(res, idx, _Py_TOLOWER(code_point))
-            return res
-        else:
-            # This is an approximate translation of:
-            # https://github.com/python/cpython/blob/201c8f79450628241574fba940e08107178dc3a5/Objects/unicodeobject.c#L10023-L10069    # noqa: E501
-            tmp = _empty_string(PY_UNICODE_4BYTE_KIND, 3 * length,
-                                data._is_ascii)
-            # maxchar is inside of a list to be pass as argument by reference
-            maxchars = [0]
-            newlength = _do_upper_or_lower(data, length, tmp, maxchars,
-                                           lower=True)
-            maxchar = maxchars[0]
-            newkind = _codepoint_to_kind(maxchar)
-            res = _empty_string(newkind, newlength,
-                                _codepoint_is_ascii(maxchar))
-            for i in range(newlength):
-                _set_code_point(res, i, _get_code_point(tmp, i))
-            return res
 
-    return impl
+@overload_method(types.UnicodeType, 'upper')
+def unicode_upper(data):
+    """Implements .upper()"""
+    return case_operation(_ascii_upper, _unicode_upper)
+
 
 # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9819-L9834    # noqa: E501
 @register_jitable
@@ -2131,40 +2126,6 @@ def unicode_casefold(data):
     return case_operation(_ascii_casefold, _unicode_casefold)
 
 
-# https://github.com/python/cpython/blob/201c8f79450628241574fba940e08107178dc3a5/Objects/unicodeobject.c#L9856-L9883    # noqa: E501
-@register_jitable
-def _handle_capital_sigma(data, length, idx):
-    """This is a translation of the function that handles the capital sigma."""
-    c = 0
-    j = idx - 1
-    while j >= 0:
-        c = _get_code_point(data, j)
-        if not _PyUnicode_IsCaseIgnorable(c):
-            break
-        j -= 1
-    final_sigma = (j >= 0 and _PyUnicode_IsCased(c))
-    if final_sigma:
-        j = idx + 1
-        while j < length:
-            c = _get_code_point(data, j)
-            if not _PyUnicode_IsCaseIgnorable(c):
-                break
-            j += 1
-        final_sigma = (j == length or (not _PyUnicode_IsCased(c)))
-
-    return 0x3c2 if final_sigma else 0x3c3
-
-
-# https://github.com/python/cpython/blob/201c8f79450628241574fba940e08107178dc3a5/Objects/unicodeobject.c#L9885-L9895    # noqa: E501
-@register_jitable
-def _lower_ucs4(code_point, data, length, idx, mapped):
-    """This is a translation of the function that lowers a character."""
-    if code_point == 0x3A3:
-        mapped[0] = _handle_capital_sigma(data, length, idx)
-        return 1
-    return _PyUnicode_ToLowerFull(code_point, mapped)
-
-
 # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9737-L9759    # noqa: E501
 @register_jitable
 def _unicode_capitalize(data, length, res, maxchars):
@@ -2302,24 +2263,9 @@ def _unicode_swapcase(data, length, res, maxchars):
 def unicode_swapcase(data):
     return case_operation(_ascii_swapcase, _unicode_swapcase)
 
-
-# https://github.com/python/cpython/blob/201c8f79450628241574fba940e08107178dc3a5/Objects/unicodeobject.c#L9946-L9965    # noqa: E501
-@register_jitable
-def _do_upper_or_lower(data, length, res, maxchars, lower):
-    k = 0
-    for idx in range(length):
-        mapped = np.zeros(3, dtype=_Py_UCS4)
-        code_point = _get_code_point(data, idx)
-        if lower:
-            n_res = _lower_ucs4(code_point, data, length, idx, mapped)
-        else:
-            # might be needed if call _do_upper_or_lower in unicode_upper
-            n_res = _PyUnicode_ToUpperFull(code_point, mapped)
-        for m in mapped[:n_res]:
-            maxchars[0] = max(maxchars[0], m)
-            _set_code_point(res, k, m)
-            k += 1
-    return k
+# ------------------------------------------------------------------------------
+# iteration
+# ------------------------------------------------------------------------------
 
 
 @lower_builtin('getiter', types.UnicodeType)