[neon/backports-noble/xsimd/Neon/release] debian/patches: patches recommended by krita

Jonathan Riddell null at kde.org
Wed Oct 30 11:47:44 GMT 2024


Git commit 359fc483ce89f09dd7efb2a110d3c5087cc3caa2 by Jonathan Riddell.
Committed on 30/10/2024 at 11:47.
Pushed by jriddell into branch 'Neon/release'.

patches recommended by krita

A  +80   -0    debian/patches/80a59235e3ffa51659aaa06f002bfd088b77023c.diff
A  +141  -0    debian/patches/96edf0340492fa9c080f5182b38358ca85baef5e.diff
A  +2    -0    debian/patches/series

https://invent.kde.org/neon/backports-noble/xsimd/-/commit/359fc483ce89f09dd7efb2a110d3c5087cc3caa2

diff --git a/debian/patches/80a59235e3ffa51659aaa06f002bfd088b77023c.diff b/debian/patches/80a59235e3ffa51659aaa06f002bfd088b77023c.diff
new file mode 100644
index 0000000..290447f
--- /dev/null
+++ b/debian/patches/80a59235e3ffa51659aaa06f002bfd088b77023c.diff
@@ -0,0 +1,80 @@
+commit 80a59235e3ffa51659aaa06f002bfd088b77023c
+Author: Dmitry Kazakov <dimula73 at gmail.com>
+Date:   Fri Jun 14 10:19:55 2024 +0200
+
+    Fix xsimd::available_architectures().has() for sve and rvv archs
+    
+    Ideally the patch CPU detection code should also check if the length
+    of SVE and RVV is actually supported by the current CPU implementation
+    (i.e. ZCR_Elx.LEN register for SVE and something else for RVV), but
+    I don't have such CPUs/emulators handy, so I cannot add such checks.
+    
+    Given that xsimd::available_architectures().has() is a new feature
+    of XSIMD13 and the length check has never been present in XSIMD, this
+    bug is not a regression at least.
+    
+    The patch also adds a unittest that reproduces the error the patch fixes
+
+diff --git a/include/xsimd/config/xsimd_cpuid.hpp b/include/xsimd/config/xsimd_cpuid.hpp
+index 6dda3be..8021fce 100644
+--- a/include/xsimd/config/xsimd_cpuid.hpp
++++ b/include/xsimd/config/xsimd_cpuid.hpp
+@@ -42,6 +42,10 @@ namespace xsimd
+ #define ARCH_FIELD_EX(arch, field_name) \
+     unsigned field_name;                \
+     XSIMD_INLINE bool has(::xsimd::arch) const { return this->field_name; }
++
++#define ARCH_FIELD_EX_REUSE(arch, field_name) \
++    XSIMD_INLINE bool has(::xsimd::arch) const { return this->field_name; }
++
+ #define ARCH_FIELD(name) ARCH_FIELD_EX(name, name)
+ 
+             ARCH_FIELD(sse2)
+@@ -72,8 +76,12 @@ namespace xsimd
+             ARCH_FIELD(neon)
+             ARCH_FIELD(neon64)
+             ARCH_FIELD_EX(i8mm<::xsimd::neon64>, i8mm_neon64)
+-            ARCH_FIELD(sve)
+-            ARCH_FIELD(rvv)
++            ARCH_FIELD_EX(detail::sve<512>, sve)
++            ARCH_FIELD_EX_REUSE(detail::sve<256>, sve)
++            ARCH_FIELD_EX_REUSE(detail::sve<128>, sve)
++            ARCH_FIELD_EX(detail::rvv<512>, rvv)
++            ARCH_FIELD_EX_REUSE(detail::rvv<256>, rvv)
++            ARCH_FIELD_EX_REUSE(detail::rvv<128>, rvv)
+             ARCH_FIELD(wasm)
+ 
+ #undef ARCH_FIELD
+diff --git a/test/test_arch.cpp b/test/test_arch.cpp
+index b420733..f1f50d5 100644
+--- a/test/test_arch.cpp
++++ b/test/test_arch.cpp
+@@ -38,6 +38,16 @@ struct check_supported
+     }
+ };
+ 
++struct check_cpu_has_intruction_set
++{
++    template <class Arch>
++    void operator()(Arch arch) const
++    {
++        static_assert(std::is_same<decltype(xsimd::available_architectures().has(arch)), bool>::value,
++                      "cannot test instruction set availability on CPU");
++    }
++};
++
+ struct check_available
+ {
+     template <class Arch>
+@@ -71,6 +81,11 @@ TEST_CASE("[multi arch support]")
+         xsimd::supported_architectures::for_each(check_supported {});
+     }
+ 
++    SUBCASE("xsimd::available_architectures::has")
++    {
++        xsimd::all_architectures::for_each(check_cpu_has_intruction_set {});
++    }
++
+     SUBCASE("xsimd::default_arch::name")
+     {
+         constexpr char const* name = xsimd::default_arch::name();
diff --git a/debian/patches/96edf0340492fa9c080f5182b38358ca85baef5e.diff b/debian/patches/96edf0340492fa9c080f5182b38358ca85baef5e.diff
new file mode 100644
index 0000000..25ee834
--- /dev/null
+++ b/debian/patches/96edf0340492fa9c080f5182b38358ca85baef5e.diff
@@ -0,0 +1,141 @@
+commit 96edf0340492fa9c080f5182b38358ca85baef5e
+Author: Dmitry Kazakov <dimula73 at gmail.com>
+Date:   Tue May 28 22:21:08 2024 +0200
+
+    Fix detection of SSE/AVX/AVX512 when they are explicitly  disabled by OS
+    
+    Some CPU vulnerability mitigations may disable AVX functionality
+    on the hardware level via the XCR0 register. We should check that
+    manually to verify that OS actually allows us to use this feature.
+    
+    See https://bugs.kde.org/show_bug.cgi?id=484622
+    
+    Fix #1025
+
+diff --git a/include/xsimd/config/xsimd_cpuid.hpp b/include/xsimd/config/xsimd_cpuid.hpp
+index f22089b..6dda3be 100644
+--- a/include/xsimd/config/xsimd_cpuid.hpp
++++ b/include/xsimd/config/xsimd_cpuid.hpp
+@@ -114,6 +114,35 @@ namespace xsimd
+ #endif
+ 
+ #elif defined(__x86_64__) || defined(__i386__) || defined(_M_AMD64) || defined(_M_IX86)
++
++                auto get_xcr0_low = []() noexcept
++                {
++                    uint32_t xcr0;
++
++#if defined(_MSC_VER) && _MSC_VER >= 1400
++
++                    xcr0 = (uint32_t)_xgetbv(0);
++
++#elif defined(__GNUC__)
++
++                    __asm__(
++                        "xorl %%ecx, %%ecx\n"
++                        "xgetbv\n"
++                        : "=a"(xcr0)
++                        :
++#if defined(__i386__)
++                        : "ecx", "edx"
++#else
++                        : "rcx", "rdx"
++#endif
++                    );
++
++#else /* _MSC_VER < 1400 */
++#error "_MSC_VER < 1400 is not supported"
++#endif /* _MSC_VER && _MSC_VER >= 1400 */
++                    return xcr0;
++                };
++
+                 auto get_cpuid = [](int reg[4], int level, int count = 0) noexcept
+                 {
+ 
+@@ -148,19 +177,43 @@ namespace xsimd
+ 
+                 get_cpuid(regs1, 0x1);
+ 
+-                sse2 = regs1[3] >> 26 & 1;
+-                sse3 = regs1[2] >> 0 & 1;
+-                ssse3 = regs1[2] >> 9 & 1;
+-                sse4_1 = regs1[2] >> 19 & 1;
+-                sse4_2 = regs1[2] >> 20 & 1;
+-                fma3_sse42 = regs1[2] >> 12 & 1;
++                // OS can explicitly disable the usage of SSE/AVX extensions
++                // by setting an appropriate flag in CR0 register
++                //
++                // https://docs.kernel.org/admin-guide/hw-vuln/gather_data_sampling.html
++
++                unsigned sse_state_os_enabled = 1;
++                unsigned avx_state_os_enabled = 1;
++                unsigned avx512_state_os_enabled = 1;
++
++                // OSXSAVE: A value of 1 indicates that the OS has set CR4.OSXSAVE[bit
++                // 18] to enable XSETBV/XGETBV instructions to access XCR0 and
++                // to support processor extended state management using
++                // XSAVE/XRSTOR.
++                bool osxsave = regs1[2] >> 27 & 1;
++                if (osxsave)
++                {
++
++                    uint32_t xcr0 = get_xcr0_low();
++
++                    sse_state_os_enabled = xcr0 >> 1 & 1;
++                    avx_state_os_enabled = xcr0 >> 2 & sse_state_os_enabled;
++                    avx512_state_os_enabled = xcr0 >> 6 & avx_state_os_enabled;
++                }
++
++                sse2 = regs1[3] >> 26 & sse_state_os_enabled;
++                sse3 = regs1[2] >> 0 & sse_state_os_enabled;
++                ssse3 = regs1[2] >> 9 & sse_state_os_enabled;
++                sse4_1 = regs1[2] >> 19 & sse_state_os_enabled;
++                sse4_2 = regs1[2] >> 20 & sse_state_os_enabled;
++                fma3_sse42 = regs1[2] >> 12 & sse_state_os_enabled;
+ 
+-                avx = regs1[2] >> 28 & 1;
++                avx = regs1[2] >> 28 & avx_state_os_enabled;
+                 fma3_avx = avx && fma3_sse42;
+ 
+                 int regs8[4];
+                 get_cpuid(regs8, 0x80000001);
+-                fma4 = regs8[2] >> 16 & 1;
++                fma4 = regs8[2] >> 16 & avx_state_os_enabled;
+ 
+                 // sse4a = regs[2] >> 6 & 1;
+ 
+@@ -168,23 +221,23 @@ namespace xsimd
+ 
+                 int regs7[4];
+                 get_cpuid(regs7, 0x7);
+-                avx2 = regs7[1] >> 5 & 1;
++                avx2 = regs7[1] >> 5 & avx_state_os_enabled;
+ 
+                 int regs7a[4];
+                 get_cpuid(regs7a, 0x7, 0x1);
+-                avxvnni = regs7a[0] >> 4 & 1;
++                avxvnni = regs7a[0] >> 4 & avx_state_os_enabled;
+ 
+                 fma3_avx2 = avx2 && fma3_sse42;
+ 
+-                avx512f = regs7[1] >> 16 & 1;
+-                avx512cd = regs7[1] >> 28 & 1;
+-                avx512dq = regs7[1] >> 17 & 1;
+-                avx512bw = regs7[1] >> 30 & 1;
+-                avx512er = regs7[1] >> 27 & 1;
+-                avx512pf = regs7[1] >> 26 & 1;
+-                avx512ifma = regs7[1] >> 21 & 1;
+-                avx512vbmi = regs7[2] >> 1 & 1;
+-                avx512vnni_bw = regs7[2] >> 11 & 1;
++                avx512f = regs7[1] >> 16 & avx512_state_os_enabled;
++                avx512cd = regs7[1] >> 28 & avx512_state_os_enabled;
++                avx512dq = regs7[1] >> 17 & avx512_state_os_enabled;
++                avx512bw = regs7[1] >> 30 & avx512_state_os_enabled;
++                avx512er = regs7[1] >> 27 & avx512_state_os_enabled;
++                avx512pf = regs7[1] >> 26 & avx512_state_os_enabled;
++                avx512ifma = regs7[1] >> 21 & avx512_state_os_enabled;
++                avx512vbmi = regs7[2] >> 1 & avx512_state_os_enabled;
++                avx512vnni_bw = regs7[2] >> 11 & avx512_state_os_enabled;
+                 avx512vnni_vbmi = avx512vbmi && avx512vnni_bw;
+ #endif
+             }
diff --git a/debian/patches/series b/debian/patches/series
new file mode 100644
index 0000000..762a4a3
--- /dev/null
+++ b/debian/patches/series
@@ -0,0 +1,2 @@
+80a59235e3ffa51659aaa06f002bfd088b77023c.diff
+96edf0340492fa9c080f5182b38358ca85baef5e.diff


More information about the Neon-commits mailing list