From 6c996e425b0306c4ad24c32635fb297807b0742f Mon Sep 17 00:00:00 2001 From: mkrastev Date: Fri, 21 Mar 2025 16:41:12 +0100 Subject: [PATCH 1/2] version bump to make this compatible with pyproject.toml --- .gitignore | 3 + .idea/.gitignore | 3 - .idea/fast-slic.iml | 2 - .idea/misc.xml | 7 -- .idea/modules.xml | 8 -- .idea/vcs.xml | 6 -- Pipfile | 14 --- Pipfile.lock | 134 ------------------------- cpuid/LICENSE | 20 ---- cpuid/__init__.py | 0 cpuid/cpuid.py | 154 ----------------------------- pyproject.toml | 30 ++++++ setup.py | 231 +++++++++++++++++++++++++++++++++++++++++--- 13 files changed, 251 insertions(+), 361 deletions(-) delete mode 100644 .idea/.gitignore delete mode 100644 .idea/fast-slic.iml delete mode 100644 .idea/misc.xml delete mode 100644 .idea/modules.xml delete mode 100644 .idea/vcs.xml delete mode 100644 Pipfile delete mode 100644 Pipfile.lock delete mode 100644 cpuid/LICENSE delete mode 100644 cpuid/__init__.py delete mode 100644 cpuid/cpuid.py create mode 100644 pyproject.toml diff --git a/.gitignore b/.gitignore index 953864b..00d56bb 100644 --- a/.gitignore +++ b/.gitignore @@ -173,3 +173,6 @@ prof/ [._]*.un~ *.dSYM/ + +.idea +.vscode \ No newline at end of file diff --git a/.idea/.gitignore b/.idea/.gitignore deleted file mode 100644 index 0e40fe8..0000000 --- a/.idea/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ - -# Default ignored files -/workspace.xml \ No newline at end of file diff --git a/.idea/fast-slic.iml b/.idea/fast-slic.iml deleted file mode 100644 index f08604b..0000000 --- a/.idea/fast-slic.iml +++ /dev/null @@ -1,2 +0,0 @@ - - \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml deleted file mode 100644 index 8822db8..0000000 --- a/.idea/misc.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml deleted file mode 100644 index 93884b6..0000000 --- a/.idea/modules.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - - - \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml deleted file mode 100644 index 94a25f7..0000000 --- a/.idea/vcs.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/Pipfile b/Pipfile deleted file mode 100644 index 4eff8a5..0000000 --- a/Pipfile +++ /dev/null @@ -1,14 +0,0 @@ -[[source]] -url = "https://pypi.org/simple" -verify_ssl = true -name = "pypi" - -[packages] -numpy = "*" - -[dev-packages] -pytest = "*" -pillow = "*" - -[requires] -python_version = "3.6" diff --git a/Pipfile.lock b/Pipfile.lock deleted file mode 100644 index 7a081b0..0000000 --- a/Pipfile.lock +++ /dev/null @@ -1,134 +0,0 @@ -{ - "_meta": { - "hash": { - "sha256": "0d52e91b535579d724f14d3549f20a234182057117703fc15619c7191cd8f53b" - }, - "pipfile-spec": 6, - "requires": { - "python_version": "3.6" - }, - "sources": [ - { - "name": "pypi", - "url": "https://pypi.org/simple", - "verify_ssl": true - } - ] - }, - "default": { - "numpy": { - "hashes": [ - "sha256:0e2eed77804b2a6a88741f8fcac02c5499bba3953ec9c71e8b217fad4912c56c", - "sha256:1c666f04553ef70fda54adf097dbae7080645435fc273e2397f26bbf1d127bbb", - "sha256:1f46532afa7b2903bfb1b79becca2954c0a04389d19e03dc73f06b039048ac40", - "sha256:315fa1b1dfc16ae0f03f8fd1c55f23fd15368710f641d570236f3d78af55e340", - "sha256:3d5fcea4f5ed40c3280791d54da3ad2ecf896f4c87c877b113576b8280c59441", - "sha256:48241759b99d60aba63b0e590332c600fc4b46ad597c9b0a53f350b871ef0634", - "sha256:4b4f2924b36d857cf302aec369caac61e43500c17eeef0d7baacad1084c0ee84", - "sha256:54fe3b7ed9e7eb928bbc4318f954d133851865f062fa4bbb02ef8940bc67b5d2", - "sha256:5a8f021c70e6206c317974c93eaaf9bc2b56295b6b1cacccf88846e44a1f33fc", - "sha256:754a6be26d938e6ca91942804eb209307b73f806a1721176278a6038869a1686", - "sha256:771147e654e8b95eea1293174a94f34e2e77d5729ad44aefb62fbf8a79747a15", - "sha256:78a6f89da87eeb48014ec652a65c4ffde370c036d780a995edaeb121d3625621", - "sha256:7fde5c2a3a682a9e101e61d97696687ebdba47637611378b4127fe7e47fdf2bf", - "sha256:80d99399c97f646e873dd8ce87c38cfdbb668956bbc39bc1e6cac4b515bba2a0", - "sha256:88a72c1e45a0ae24d1f249a529d9f71fe82e6fa6a3fd61414b829396ec585900", - "sha256:a4f4460877a16ac73302a9c077ca545498d9fe64e6a81398d8e1a67e4695e3df", - "sha256:a61255a765b3ac73ee4b110b28fccfbf758c985677f526c2b4b39c48cc4b509d", - "sha256:ab4896a8c910b9a04c0142871d8800c76c8a2e5ff44763513e1dd9d9631ce897", - "sha256:abbd6b1c2ef6199f4b7ca9f818eb6b31f17b73a6110aadc4e4298c3f00fab24e", - "sha256:b16d88da290334e33ea992c56492326ea3b06233a00a1855414360b77ca72f26", - "sha256:b78a1defedb0e8f6ae1eb55fa6ac74ab42acc4569c3a2eacc2a407ee5d42ebcb", - "sha256:cfef82c43b8b29ca436560d51b2251d5117818a8d1fb74a8384a83c096745dad", - "sha256:d160e57731fcdec2beda807ebcabf39823c47e9409485b5a3a1db3a8c6ce763e" - ], - "index": "pypi", - "version": "==1.16.3" - } - }, - "develop": { - "atomicwrites": { - "hashes": [ - "sha256:03472c30eb2c5d1ba9227e4c2ca66ab8287fbfbbda3888aa93dc2e28fc6811b4", - "sha256:75a9445bac02d8d058d5e1fe689654ba5a6556a1dfd8ce6ec55a0ed79866cfa6" - ], - "version": "==1.3.0" - }, - "attrs": { - "hashes": [ - "sha256:69c0dbf2ed392de1cb5ec704444b08a5ef81680a61cb899dc08127123af36a79", - "sha256:f0b870f674851ecbfbbbd364d6b5cbdff9dcedbc7f3f5e18a6891057f21fe399" - ], - "version": "==19.1.0" - }, - "more-itertools": { - "hashes": [ - "sha256:2112d2ca570bb7c3e53ea1a35cd5df42bb0fd10c45f0fb97178679c3c03d64c7", - "sha256:c3e4748ba1aad8dba30a4886b0b1a2004f9a863837b8654e7059eebf727afa5a" - ], - "markers": "python_version > '2.7'", - "version": "==7.0.0" - }, - "pillow": { - "hashes": [ - "sha256:15c056bfa284c30a7f265a41ac4cbbc93bdbfc0dfe0613b9cb8a8581b51a9e55", - "sha256:1a4e06ba4f74494ea0c58c24de2bb752818e9d504474ec95b0aa94f6b0a7e479", - "sha256:1c3c707c76be43c9e99cb7e3d5f1bee1c8e5be8b8a2a5eeee665efbf8ddde91a", - "sha256:1fd0b290203e3b0882d9605d807b03c0f47e3440f97824586c173eca0aadd99d", - "sha256:24114e4a6e1870c5a24b1da8f60d0ba77a0b4027907860188ea82bd3508c80eb", - "sha256:258d886a49b6b058cd7abb0ab4b2b85ce78669a857398e83e8b8e28b317b5abb", - "sha256:33c79b6dd6bc7f65079ab9ca5bebffb5f5d1141c689c9c6a7855776d1b09b7e8", - "sha256:367385fc797b2c31564c427430c7a8630db1a00bd040555dfc1d5c52e39fcd72", - "sha256:3c1884ff078fb8bf5f63d7d86921838b82ed4a7d0c027add773c2f38b3168754", - "sha256:44e5240e8f4f8861d748f2a58b3f04daadab5e22bfec896bf5434745f788f33f", - "sha256:46aa988e15f3ea72dddd81afe3839437b755fffddb5e173886f11460be909dce", - "sha256:74d90d499c9c736d52dd6d9b7221af5665b9c04f1767e35f5dd8694324bd4601", - "sha256:809c0a2ce9032cbcd7b5313f71af4bdc5c8c771cb86eb7559afd954cab82ebb5", - "sha256:85d1ef2cdafd5507c4221d201aaf62fc9276f8b0f71bd3933363e62a33abc734", - "sha256:8c3889c7681af77ecfa4431cd42a2885d093ecb811e81fbe5e203abc07e0995b", - "sha256:9218d81b9fca98d2c47d35d688a0cea0c42fd473159dfd5612dcb0483c63e40b", - "sha256:9aa4f3827992288edd37c9df345783a69ef58bd20cc02e64b36e44bcd157bbf1", - "sha256:9d80f44137a70b6f84c750d11019a3419f409c944526a95219bea0ac31f4dd91", - "sha256:b7ebd36128a2fe93991293f997e44be9286503c7530ace6a55b938b20be288d8", - "sha256:c4c78e2c71c257c136cdd43869fd3d5e34fc2162dc22e4a5406b0ebe86958239", - "sha256:c6a842537f887be1fe115d8abb5daa9bc8cc124e455ff995830cc785624a97af", - "sha256:cf0a2e040fdf5a6d95f4c286c6ef1df6b36c218b528c8a9158ec2452a804b9b8", - "sha256:cfd28aad6fc61f7a5d4ee556a997dc6e5555d9381d1390c00ecaf984d57e4232", - "sha256:dca5660e25932771460d4688ccbb515677caaf8595f3f3240ec16c117deff89a", - "sha256:de7aedc85918c2f887886442e50f52c1b93545606317956d65f342bd81cb4fc3", - "sha256:e6c0bbf8e277b74196e3140c35f9a1ae3eafd818f7f2d3a15819c49135d6c062" - ], - "index": "pypi", - "version": "==6.0.0" - }, - "pluggy": { - "hashes": [ - "sha256:19ecf9ce9db2fce065a7a0586e07cfb4ac8614fe96edf628a264b1c70116cf8f", - "sha256:84d306a647cc805219916e62aab89caa97a33a1dd8c342e87a37f91073cd4746" - ], - "version": "==0.9.0" - }, - "py": { - "hashes": [ - "sha256:64f65755aee5b381cea27766a3a147c3f15b9b6b9ac88676de66ba2ae36793fa", - "sha256:dc639b046a6e2cff5bbe40194ad65936d6ba360b52b3c3fe1d08a82dd50b5e53" - ], - "version": "==1.8.0" - }, - "pytest": { - "hashes": [ - "sha256:3773f4c235918987d51daf1db66d51c99fac654c81d6f2f709a046ab446d5e5d", - "sha256:b7802283b70ca24d7119b32915efa7c409982f59913c1a6c0640aacf118b95f5" - ], - "index": "pypi", - "version": "==4.4.1" - }, - "six": { - "hashes": [ - "sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c", - "sha256:d16a0141ec1a18405cd4ce8b4613101da75da0e9a7aec5bdd4fa804d0e0eba73" - ], - "version": "==1.12.0" - } - } -} diff --git a/cpuid/LICENSE b/cpuid/LICENSE deleted file mode 100644 index af36480..0000000 --- a/cpuid/LICENSE +++ /dev/null @@ -1,20 +0,0 @@ -The MIT License (MIT) - -Copyright (c) 2014 Anders Høst - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/cpuid/__init__.py b/cpuid/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/cpuid/cpuid.py b/cpuid/cpuid.py deleted file mode 100644 index 9abbf84..0000000 --- a/cpuid/cpuid.py +++ /dev/null @@ -1,154 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (c) 2018 Anders Høst -# - -from __future__ import print_function - -import platform -import os -import ctypes -from ctypes import c_uint32, c_int, c_long, c_ulong, c_size_t, c_void_p, POINTER, CFUNCTYPE - -# Posix x86_64: -# Three first call registers : RDI, RSI, RDX -# Volatile registers : RAX, RCX, RDX, RSI, RDI, R8-11 - -# Windows x86_64: -# Three first call registers : RCX, RDX, R8 -# Volatile registers : RAX, RCX, RDX, R8-11 - -# cdecl 32 bit: -# Three first call registers : Stack (%esp) -# Volatile registers : EAX, ECX, EDX - -_POSIX_64_OPC = [ - 0x53, # push %rbx - 0x89, 0xf0, # mov %esi,%eax - 0x89, 0xd1, # mov %edx,%ecx - 0x0f, 0xa2, # cpuid - 0x89, 0x07, # mov %eax,(%rdi) - 0x89, 0x5f, 0x04, # mov %ebx,0x4(%rdi) - 0x89, 0x4f, 0x08, # mov %ecx,0x8(%rdi) - 0x89, 0x57, 0x0c, # mov %edx,0xc(%rdi) - 0x5b, # pop %rbx - 0xc3 # retq -] - -_WINDOWS_64_OPC = [ - 0x53, # push %rbx - 0x89, 0xd0, # mov %edx,%eax - 0x49, 0x89, 0xc9, # mov %rcx,%r9 - 0x44, 0x89, 0xc1, # mov %r8d,%ecx - 0x0f, 0xa2, # cpuid - 0x41, 0x89, 0x01, # mov %eax,(%r9) - 0x41, 0x89, 0x59, 0x04, # mov %ebx,0x4(%r9) - 0x41, 0x89, 0x49, 0x08, # mov %ecx,0x8(%r9) - 0x41, 0x89, 0x51, 0x0c, # mov %edx,0xc(%r9) - 0x5b, # pop %rbx - 0xc3 # retq -] - -_CDECL_32_OPC = [ - 0x53, # push %ebx - 0x57, # push %edi - 0x8b, 0x7c, 0x24, 0x0c, # mov 0xc(%esp),%edi - 0x8b, 0x44, 0x24, 0x10, # mov 0x10(%esp),%eax - 0x8b, 0x4c, 0x24, 0x14, # mov 0x14(%esp),%ecx - 0x0f, 0xa2, # cpuid - 0x89, 0x07, # mov %eax,(%edi) - 0x89, 0x5f, 0x04, # mov %ebx,0x4(%edi) - 0x89, 0x4f, 0x08, # mov %ecx,0x8(%edi) - 0x89, 0x57, 0x0c, # mov %edx,0xc(%edi) - 0x5f, # pop %edi - 0x5b, # pop %ebx - 0xc3 # ret -] - -is_windows = os.name == "nt" -is_64bit = ctypes.sizeof(ctypes.c_voidp) == 8 - -class CPUID_struct(ctypes.Structure): - _fields_ = [(r, c_uint32) for r in ("eax", "ebx", "ecx", "edx")] - -class CPUID(object): - def __init__(self): - if platform.machine() not in ("AMD64", "x86_64", "x86", "i686"): - raise SystemError("Only available for x86") - - if is_windows: - if is_64bit: - # VirtualAlloc seems to fail under some weird - # circumstances when ctypes.windll.kernel32 is - # used under 64 bit Python. CDLL fixes this. - self.win = ctypes.CDLL("kernel32.dll") - opc = _WINDOWS_64_OPC - else: - # Here ctypes.windll.kernel32 is needed to get the - # right DLL. Otherwise it will fail when running - # 32 bit Python on 64 bit Windows. - self.win = ctypes.windll.kernel32 - opc = _CDECL_32_OPC - else: - opc = _POSIX_64_OPC if is_64bit else _CDECL_32_OPC - - size = len(opc) - code = (ctypes.c_ubyte * size)(*opc) - - if is_windows: - self.win.VirtualAlloc.restype = c_void_p - self.win.VirtualAlloc.argtypes = [ctypes.c_void_p, ctypes.c_size_t, ctypes.c_ulong, ctypes.c_ulong] - self.addr = self.win.VirtualAlloc(None, size, 0x1000, 0x40) - if not self.addr: - raise MemoryError("Could not allocate RWX memory") - else: - self.libc = ctypes.cdll.LoadLibrary(None) - self.libc.valloc.restype = ctypes.c_void_p - self.libc.valloc.argtypes = [ctypes.c_size_t] - self.addr = self.libc.valloc(size) - if not self.addr: - raise MemoryError("Could not allocate memory") - - self.libc.mprotect.restype = c_int - self.libc.mprotect.argtypes = [c_void_p, c_size_t, c_int] - ret = self.libc.mprotect(self.addr, size, 1 | 2 | 4) - if ret != 0: - raise OSError("Failed to set RWX") - - - ctypes.memmove(self.addr, code, size) - - func_type = CFUNCTYPE(None, POINTER(CPUID_struct), c_uint32, c_uint32) - self.func_ptr = func_type(self.addr) - - def __call__(self, eax, ecx=0): - struct = CPUID_struct() - self.func_ptr(struct, eax, ecx) - return struct.eax, struct.ebx, struct.ecx, struct.edx - - def __del__(self): - if is_windows: - self.win.VirtualFree.restype = c_long - self.win.VirtualFree.argtypes = [c_void_p, c_size_t, c_ulong] - self.win.VirtualFree(self.addr, 0, 0x8000) - elif self.libc: - # Seems to throw exception when the program ends and - # libc is cleaned up before the object? - self.libc.free.restype = None - self.libc.free.argtypes = [c_void_p] - self.libc.free(self.addr) - -if __name__ == "__main__": - def valid_inputs(): - cpuid = CPUID() - for eax in (0x0, 0x80000000): - highest, _, _, _ = cpuid(eax) - while eax <= highest: - regs = cpuid(eax) - yield (eax, regs) - eax += 1 - - print(" ".join(x.ljust(8) for x in ("CPUID", "A", "B", "C", "D")).strip()) - for eax, regs in valid_inputs(): - print("%08x" % eax, " ".join("%08x" % reg for reg in regs)) - diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..b1ce3db --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,30 @@ +[project] +name = "fast-slic" +version = "0.4.1" +description = "Fast Slic Superpixel Implementation" +requires-python = ">=3.9" +authors = [ + { name = "Alchan Kim", email = "a9413miky@gmail.com" } +] +license = { text = "MIT" } +classifiers = [ + "License :: OSI Approved :: MIT License", + "Programming Language :: Python", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: Implementation :: CPython" +] +dependencies = [ + "numpy" +] + +[build-system] +build-backend = "setuptools.build_meta" +requires = [ + "setuptools", + "wheel", + "cython", + "numpy", +] diff --git a/setup.py b/setup.py index 2dbe486..ffe342b 100755 --- a/setup.py +++ b/setup.py @@ -91,22 +91,224 @@ def _compile_and_check(c_content, compiler_args = []): return result == 0 def _check_openmp(): - import os, tempfile, subprocess, shutil - # see http://openmp.org/wp/openmp-compilers/ omp_test = \ r""" #include #include - int main() { +int main() { #pragma omp parallel - printf("Hello from thread %d, nthreads %d\n", omp_get_thread_num(), omp_get_num_threads()); - } - """ +printf("Hello from thread %d, nthreads %d\n", omp_get_thread_num(), omp_get_num_threads()); +} +""" return _compile_and_check(omp_test, ['-fopenmp', '-lgomp']) def _check_avx2(): - from cpuid.cpuid import CPUID + # The MIT License (MIT) + + # Copyright (c) 2014 Anders Høst + + # Permission is hereby granted, free of charge, to any person obtaining a copy of + # this software and associated documentation files (the "Software"), to deal in + # the Software without restriction, including without limitation the rights to + # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + # the Software, and to permit persons to whom the Software is furnished to do so, + # subject to the following conditions: + + # The above copyright notice and this permission notice shall be included in all + # copies or substantial portions of the Software. + + # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + + import platform + import os + import ctypes + from ctypes import c_uint32, c_int, c_long, c_ulong, c_size_t, c_void_p, POINTER, CFUNCTYPE + + # Posix x86_64: + # Three first call registers : RDI, RSI, RDX + # Volatile registers : RAX, RCX, RDX, RSI, RDI, R8-11 + + # Windows x86_64: + # Three first call registers : RCX, RDX, R8 + # Volatile registers : RAX, RCX, RDX, R8-11 + + # cdecl 32 bit: + # Three first call registers : Stack (%esp) + # Volatile registers : EAX, ECX, EDX + + _POSIX_64_OPC = [ + 0x53, # push %rbx + 0x89, + 0xF0, # mov %esi,%eax + 0x89, + 0xD1, # mov %edx,%ecx + 0x0F, + 0xA2, # cpuid + 0x89, + 0x07, # mov %eax,(%rdi) + 0x89, + 0x5F, + 0x04, # mov %ebx,0x4(%rdi) + 0x89, + 0x4F, + 0x08, # mov %ecx,0x8(%rdi) + 0x89, + 0x57, + 0x0C, # mov %edx,0xc(%rdi) + 0x5B, # pop %rbx + 0xC3, # retq + ] + + _WINDOWS_64_OPC = [ + 0x53, # push %rbx + 0x89, + 0xD0, # mov %edx,%eax + 0x49, + 0x89, + 0xC9, # mov %rcx,%r9 + 0x44, + 0x89, + 0xC1, # mov %r8d,%ecx + 0x0F, + 0xA2, # cpuid + 0x41, + 0x89, + 0x01, # mov %eax,(%r9) + 0x41, + 0x89, + 0x59, + 0x04, # mov %ebx,0x4(%r9) + 0x41, + 0x89, + 0x49, + 0x08, # mov %ecx,0x8(%r9) + 0x41, + 0x89, + 0x51, + 0x0C, # mov %edx,0xc(%r9) + 0x5B, # pop %rbx + 0xC3, # retq + ] + + _CDECL_32_OPC = [ + 0x53, # push %ebx + 0x57, # push %edi + 0x8B, + 0x7C, + 0x24, + 0x0C, # mov 0xc(%esp),%edi + 0x8B, + 0x44, + 0x24, + 0x10, # mov 0x10(%esp),%eax + 0x8B, + 0x4C, + 0x24, + 0x14, # mov 0x14(%esp),%ecx + 0x0F, + 0xA2, # cpuid + 0x89, + 0x07, # mov %eax,(%edi) + 0x89, + 0x5F, + 0x04, # mov %ebx,0x4(%edi) + 0x89, + 0x4F, + 0x08, # mov %ecx,0x8(%edi) + 0x89, + 0x57, + 0x0C, # mov %edx,0xc(%edi) + 0x5F, # pop %edi + 0x5B, # pop %ebx + 0xC3, # ret + ] + + is_windows = os.name == "nt" + is_64bit = ctypes.sizeof(ctypes.c_voidp) == 8 + + + class CPUID_struct(ctypes.Structure): + _fields_ = [(r, c_uint32) for r in ("eax", "ebx", "ecx", "edx")] + + + class CPUID(object): + def __init__(self): + if platform.machine() not in ("AMD64", "x86_64", "x86", "i686"): + raise SystemError("Only available for x86") + + if is_windows: + if is_64bit: + # VirtualAlloc seems to fail under some weird + # circumstances when ctypes.windll.kernel32 is + # used under 64 bit Python. CDLL fixes this. + self.win = ctypes.CDLL("kernel32.dll") + opc = _WINDOWS_64_OPC + else: + # Here ctypes.windll.kernel32 is needed to get the + # right DLL. Otherwise it will fail when running + # 32 bit Python on 64 bit Windows. + self.win = ctypes.windll.kernel32 + opc = _CDECL_32_OPC + else: + opc = _POSIX_64_OPC if is_64bit else _CDECL_32_OPC + + size = len(opc) + code = (ctypes.c_ubyte * size)(*opc) + + if is_windows: + self.win.VirtualAlloc.restype = c_void_p + self.win.VirtualAlloc.argtypes = [ + ctypes.c_void_p, + ctypes.c_size_t, + ctypes.c_ulong, + ctypes.c_ulong, + ] + self.addr = self.win.VirtualAlloc(None, size, 0x1000, 0x40) + if not self.addr: + raise MemoryError("Could not allocate RWX memory") + else: + self.libc = ctypes.cdll.LoadLibrary(None) + self.libc.valloc.restype = ctypes.c_void_p + self.libc.valloc.argtypes = [ctypes.c_size_t] + self.addr = self.libc.valloc(size) + if not self.addr: + raise MemoryError("Could not allocate memory") + + self.libc.mprotect.restype = c_int + self.libc.mprotect.argtypes = [c_void_p, c_size_t, c_int] + ret = self.libc.mprotect(self.addr, size, 1 | 2 | 4) + if ret != 0: + raise OSError("Failed to set RWX") + + ctypes.memmove(self.addr, code, size) + + func_type = CFUNCTYPE(None, POINTER(CPUID_struct), c_uint32, c_uint32) + self.func_ptr = func_type(self.addr) + + def __call__(self, eax, ecx=0): + struct = CPUID_struct() + self.func_ptr(struct, eax, ecx) + return struct.eax, struct.ebx, struct.ecx, struct.edx + + def __del__(self): + if is_windows: + self.win.VirtualFree.restype = c_long + self.win.VirtualFree.argtypes = [c_void_p, c_size_t, c_ulong] + self.win.VirtualFree(self.addr, 0, 0x8000) + elif self.libc: + # Seems to throw exception when the program ends and + # libc is cleaned up before the object? + self.libc.free.restype = None + self.libc.free.argtypes = [c_void_p] + self.libc.free(self.addr) + try: # Invoke CPUID instruction with eax 0x7 # ECX bit 5: AVX2 support @@ -116,17 +318,20 @@ def _check_avx2(): bits = bin(output_ebx)[::-1] avx2_support = bits[5] return avx2_support == '1' - except: + + except Exception as e: + with open('error.log', 'w') as f: + f.write(f"Failed to check AVX2 support: {e}") return False def _check_neon(): neon_test = r""" #include - int main() { - const uint16x8_t constant = { 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF }; - const uint32x4_t vadded = vpaddlq_u16(constant); - return 0; - } +int main() { + const uint16x8_t constant = { 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF }; + const uint32x4_t vadded = vpaddlq_u16(constant); + return 0; +} """ return _compile_and_check(neon_test, ["-mfpu=neon"]) From 1198a8154602d1d1ace86a2663b38e33698ed370 Mon Sep 17 00:00:00 2001 From: mkrastev Date: Fri, 21 Mar 2025 17:19:11 +0100 Subject: [PATCH 2/2] clean up the repo --- MANIFEST.in | 8 -------- cfast_slic.pxd => fast_slic/_cython/cfast_slic.pxd | 10 +++++----- cfast_slic.pyx => fast_slic/_cython/cfast_slic.pyx | 0 csimple_crf.pyx => fast_slic/_cython/csimple_crf.pyx | 2 +- pyproject.toml | 6 ++++++ setup.py | 4 ++-- 6 files changed, 14 insertions(+), 16 deletions(-) delete mode 100644 MANIFEST.in rename cfast_slic.pxd => fast_slic/_cython/cfast_slic.pxd (95%) rename cfast_slic.pyx => fast_slic/_cython/cfast_slic.pyx (100%) rename csimple_crf.pyx => fast_slic/_cython/csimple_crf.pyx (99%) diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index 977da36..0000000 --- a/MANIFEST.in +++ /dev/null @@ -1,8 +0,0 @@ -global-include *.pyx -global-include *.pxd -global-include *.cpp -global-include *.h -global-include *.hpp -exclude cfast_slic.c -exclude cfast_slic.cpp -exclude csimple_crf.cpp diff --git a/cfast_slic.pxd b/fast_slic/_cython/cfast_slic.pxd similarity index 95% rename from cfast_slic.pxd rename to fast_slic/_cython/cfast_slic.pxd index 0b3de23..32c51d2 100644 --- a/cfast_slic.pxd +++ b/fast_slic/_cython/cfast_slic.pxd @@ -4,7 +4,7 @@ from libc.stdint cimport uint8_t, uint32_t, uint16_t, int16_t from libcpp cimport bool from libcpp.string cimport string -cdef extern from "src/fast-slic-common.h": +cdef extern from "../../src/fast-slic-common.h": ctypedef struct Cluster: float y float x @@ -23,14 +23,14 @@ cdef extern from "src/fast-slic-common.h": uint32_t **neighbors; -cdef extern from "src/fast-slic.h": +cdef extern from "../../src/fast-slic.h": Connectivity* fast_slic_get_connectivity(int H, int W, int K, const uint16_t *assignment) nogil Connectivity* fast_slic_knn_connectivity(int H, int W, int K, const Cluster* clusters, int num_neighbors) nogil void fast_slic_free_connectivity(Connectivity* conn) nogil void fast_slic_get_mask_density(int H, int W, int K, const Cluster* clusters, const uint16_t* assignment, const uint8_t *mask, uint8_t *cluster_densities) nogil void fast_slic_cluster_density_to_mask(int H, int W, int K, const Cluster *clusters, const uint16_t* assignment, const uint8_t *cluster_densities, uint8_t *result) nogil -cdef extern from "src/context.h" namespace "fslic": +cdef extern from "../../src/context.h" namespace "fslic": cdef cppclass BaseContext[DistType]: int16_t subsample_stride_config int num_threads @@ -74,7 +74,7 @@ cdef extern from "src/context.h" namespace "fslic": void set_arch(const char* arch) Context* build(int H, int W, int K, const uint8_t* image, Cluster *clusters) -cdef extern from "src/lsc.h" namespace "fslic": +cdef extern from "../../src/lsc.h" namespace "fslic": cdef cppclass ContextLSC(ContextRealDist): ContextLSC(int H, int W, int K, const uint8_t* image, Cluster *clusters) except + @@ -88,7 +88,7 @@ cdef extern from "src/lsc.h" namespace "fslic": ContextLSC* build(int H, int W, int K, const uint8_t* image, Cluster *clusters) -cdef extern from "src/cca.h" namespace "cca": +cdef extern from "../../src/cca.h" namespace "cca": cdef cppclass ConnectivityEnforcer: ConnectivityEnforcer(const uint16_t *labels, int H, int W, int K, int min_threshold) void execute(uint16_t *out) diff --git a/cfast_slic.pyx b/fast_slic/_cython/cfast_slic.pyx similarity index 100% rename from cfast_slic.pyx rename to fast_slic/_cython/cfast_slic.pyx diff --git a/csimple_crf.pyx b/fast_slic/_cython/csimple_crf.pyx similarity index 99% rename from csimple_crf.pyx rename to fast_slic/_cython/csimple_crf.pyx index 34c693b..f7772e2 100644 --- a/csimple_crf.pyx +++ b/fast_slic/_cython/csimple_crf.pyx @@ -11,7 +11,7 @@ from libc.stdint cimport uint32_t, int32_t from libcpp.vector cimport vector from cython.operator cimport dereference as deref, preincrement -cdef extern from "src/simple-crf.hpp": +cdef extern from "../../src/simple-crf.hpp": ctypedef int simple_crf_time_t ctypedef struct SimpleCRFParams: diff --git a/pyproject.toml b/pyproject.toml index b1ce3db..428c480 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,3 +28,9 @@ requires = [ "cython", "numpy", ] + +[tool.setuptools.package-data] +"fast_slic" = ["*.pyx", "*.pxd", "*.cpp", "*.h", "*.hpp"] + +[tool.setuptools.exclude-package-data] +"fast_slic" = ["_cython/cfast_slic.c", "_cython/cfast_slic.cpp", "_cython/csimple_crf.cpp"] diff --git a/setup.py b/setup.py index ffe342b..226ff00 100755 --- a/setup.py +++ b/setup.py @@ -397,7 +397,7 @@ def _check_neon(): Extension( "cfast_slic", include_dirs=[np.get_include()], - sources=cpp_sources + ["cfast_slic.pyx"], + sources=cpp_sources + ["fast_slic/_cython/cfast_slic.pyx"], extra_compile_args=extra_compile_args, extra_link_args=extra_link_args, language="c++", @@ -405,7 +405,7 @@ def _check_neon(): Extension( "csimple_crf", include_dirs=[np.get_include()], - sources=["src/simple-crf.cpp", "csimple_crf.pyx"], + sources=["src/simple-crf.cpp", "fast_slic/_cython/csimple_crf.pyx"], extra_compile_args=extra_compile_args, extra_link_args=extra_link_args, language="c++",