diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6cb92b2..08ecc01 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,11 +1,12 @@
-cmake_minimum_required(VERSION 3.8)
+cmake_minimum_required(VERSION 3.12)
 project(fbitset VERSION 0.1.0 LANGUAGES CXX)
 
 # OPTIONS
 option(BUILD_TESTS "Build unit tests" ON)
 
 # Set the building options.
-set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD 20)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
 
 include_directories("${PROJECT_SOURCE_DIR}/include")
 
diff --git a/README.md b/README.md
index c889a7a..5a54a61 100644
--- a/README.md
+++ b/README.md
@@ -27,11 +27,11 @@ latest C++17 compile-time features and template programming, it is ensured that
 loops can be correctly unrolled for internal storage, which can be verified by
 the assembly output from G++ and Clang++.
 
-Since this library is designed for use cases with shear demand on performance,
+Since this library is designed for use cases with sheer demand on performance,
 in addition to making sure the unrolling of the loops for internal storage,
-some very portable GCC compiler intrinsics are also used.  This could utilize
-native instructions for some bitwise operations when they are available, like
-the `popcnt`, `bsf`, and `bsr` x86-64 instructions.
+the C++20 standard library `<bit>` header functions are used for bit operations.
+These compile to efficient native instructions for bitwise operations when they
+are available, like the `popcnt`, `bsf`, and `bsr` x86-64 instructions.
 
 In addition to being highly optimized, by modern C++ template programming, the
 bit set is also very tunable.  Things like the internal integral type used to
diff --git a/include/fbitset.hpp b/include/fbitset.hpp
index 5f10286..82aaa63 100644
--- a/include/fbitset.hpp
+++ b/include/fbitset.hpp
@@ -8,6 +8,7 @@
 #define FBITSET_FBITSET_H
 
 #include <array>
+#include <bit>
 #include <cassert>
 #include <functional>
 #include <initializer_list>
@@ -36,30 +37,25 @@ namespace internal {
     template <> constexpr bool is_no_ext<No_ext> = true;
 
     //
-    // Misc bit operations
+    // Bit operations using C++20 <bit> header
     //
-    // TODO: Make these operations cross-platform by conditional compilation of
-    // intrinsic functions and possibly a fallback mode.
-    //
-    // TODO: Investigate a SIMD-based solution to these problems for
-    // SSE/AVX/Neon instructions.
+    // These functions use the standard library implementations which
+    // compile to efficient hardware instructions on supporting platforms.
     //
 
     /** Counts the number of leading zeros.
      *
      * The input cannot be zero.
      */
-    inline Size clz(unsigned int x) { return __builtin_clz(x); }
-    inline Size clz(unsigned long x) { return __builtin_clzl(x); }
-    inline Size clz(unsigned long long x) { return __builtin_clzll(x); }
+    template <typename T>
+    inline Size clz(T x) { return static_cast<Size>(std::countl_zero(x)); }
 
     /** Counts the number of trailing zeros.
      *
      * The input cannot be zero.
      */
-    inline Size ctz(unsigned int x) { return __builtin_ctz(x); }
-    inline Size ctz(unsigned long x) { return __builtin_ctzl(x); }
-    inline Size ctz(unsigned long long x) { return __builtin_ctzll(x); }
+    template <typename T>
+    inline Size ctz(T x) { return static_cast<Size>(std::countr_zero(x)); }
 
     /** Finds the index of the first set bit.
      *
@@ -68,17 +64,13 @@ namespace internal {
     template <typename T> Size fls(T x)
     {
         assert(x != 0);
-        return std::numeric_limits<T>::digits - clz(x) - 1;
+        return static_cast<Size>(std::bit_width(x) - 1);
     }
 
     /** Counts the number of set bits.
      */
-    inline Size popcount(unsigned int x) { return __builtin_popcount(x); }
-    inline Size popcount(unsigned long x) { return __builtin_popcountl(x); }
-    inline Size popcount(unsigned long long x)
-    {
-        return __builtin_popcountll(x);
-    }
+    template <typename T>
+    inline Size popcount(T x) { return static_cast<Size>(std::popcount(x)); }
 }
 
 /** The core base class.
@@ -755,6 +747,9 @@ class Fbitset : public Fbitset_base<N, L, E> {
     //
 
     /** Finds the index of the last (highest) set bit.
+     *
+     * @return The index of the highest set bit, or std::numeric_limits<Size>::max()
+     *         if no bit is set.
      */
     Size find_last() const noexcept
     {
@@ -766,7 +761,7 @@ class Fbitset : public Fbitset_base<N, L, E> {
                 return idx + LIMB_BITS * (i - 1);
             }
         }
-        return -1;
+        return std::numeric_limits<Size>::max();
     }
 
     /** Counts the number of all set bits inside the set.
diff --git a/test/basic.cpp b/test/basic.cpp
index 2ccb5f9..ada996f 100644
--- a/test/basic.cpp
+++ b/test/basic.cpp
@@ -70,7 +70,7 @@ TEST_CASE("Fbitset has basic behaviour")
             using Curr = std::decay_t<decltype(i)>;
 
             // This could give very good coverage of different branches.
-            for (Size n_set : { 0, 16, 32, 48, 64 }) {
+            for (Size n_set : { Size(0), Size(16), Size(32), Size(48), Size(64) }) {
                 Curr curr(n_set, true);
                 CHECK(curr.size() >= n_set);
                 for (Size j = 0; j < curr.size(); ++j) {
@@ -286,7 +286,7 @@ TEST_CASE("Fbitset has basic behaviour")
             i.flip(2);
 
             auto fin = i.find_last();
-            CHECK(fin == -1);
+            CHECK(fin == std::numeric_limits<Size>::max());
         });
     }