diff --git a/arrow-arith/Cargo.toml b/arrow-arith/Cargo.toml
index f2a4604c116e..6a02f543ecda 100644
--- a/arrow-arith/Cargo.toml
+++ b/arrow-arith/Cargo.toml
@@ -42,3 +42,6 @@ arrow-data = { workspace = true }
 arrow-schema = { workspace = true }
 chrono = { workspace = true }
 num-traits = { version = "0.2.19", default-features = false, features = ["std"] }
+
+[dev-dependencies]
+rand = { version = "0.9", default-features = false, features = ["std", "std_rng", "thread_rng"] }
diff --git a/arrow-arith/src/bitwise.rs b/arrow-arith/src/bitwise.rs
index aedeecd5b835..1daefa6cfd86 100644
--- a/arrow-arith/src/bitwise.rs
+++ b/arrow-arith/src/bitwise.rs
@@ -390,3 +390,244 @@ mod tests {
         assert_eq!(expected, result);
     }
 }
+
+// Helper functions for reference implementations
+fn ref_bitwise_and<T: std::ops::BitAnd<Output = T>>(a: Option<T>, b: Option<T>) -> Option<T> {
+    match (a, b) {
+        (Some(a), Some(b)) => Some(a & b),
+        _ => None,
+    }
+}
+
+fn ref_bitwise_or<T: std::ops::BitOr<Output = T>>(a: Option<T>, b: Option<T>) -> Option<T> {
+    match (a, b) {
+        (Some(a), Some(b)) => Some(a | b),
+        _ => None,
+    }
+}
+
+fn ref_bitwise_xor<T: std::ops::BitXor<Output = T>>(a: Option<T>, b: Option<T>) -> Option<T> {
+    match (a, b) {
+        (Some(a), Some(b)) => Some(a ^ b),
+        _ => None,
+    }
+}
+
+fn ref_bitwise_shift_left<T>(a: Option<T>, b: Option<T>) -> Option<T>
+where
+    T: num_traits::WrappingShl<Output = T> + num_traits::AsPrimitive<usize>,
+{
+    match (a, b) {
+        (Some(a), Some(b)) => {
+            let b = b.as_();
+            Some(a.wrapping_shl(b as u32))
+        }
+        _ => None,
+    }
+}
+
+fn ref_bitwise_shift_right<T>(a: Option<T>, b: Option<T>) -> Option<T>
+where
+    T: num_traits::WrappingShr<Output = T> + num_traits::AsPrimitive<usize>,
+{
+    match (a, b) {
+        (Some(a), Some(b)) => {
+            let b = b.as_();
+            Some(a.wrapping_shr(b as u32))
+        }
+        _ => None,
+    }
+}
+
+fn ref_bitwise_and_not<T>(a: Option<T>, b: Option<T>) -> Option<T>
+where
+    T: std::ops::BitAnd<Output = T> + std::ops::Not<Output = T>,
+{
+    match (a, b) {
+        (Some(a), Some(b)) => Some(a & !b),
+        _ => None,
+    }
+}
+
+fn ref_bitwise_not<T: std::ops::Not<Output = T>>(a: Option<T>) -> Option<T> {
+    a.map(|x| !x)
+}
+
+#[test]
+fn test_primitive_bitwise_binary_random_equivalence() {
+    use rand::{Rng, SeedableRng};
+
+    // Use a fixed seed for reproducible tests
+    let mut rng = rand::rngs::StdRng::from_seed([42u8; 32]);
+
+    for _ in 0..10 { // 10 iterations
+        let len = rng.random_range(1..=64);
+
+        // Generate for i32
+        let mut left_vec_i32 = Vec::with_capacity(len);
+        let mut right_vec_i32 = Vec::with_capacity(len);
+        for _ in 0..len {
+            let is_null = rng.random_bool(0.2);
+            let val = if is_null { None } else { Some(rng.random::<i32>()) };
+            left_vec_i32.push(val);
+            let is_null = rng.random_bool(0.2);
+            let val = if is_null { None } else { Some(rng.random::<i32>()) };
+            right_vec_i32.push(val);
+        }
+        let left_i32 = Int32Array::from(left_vec_i32.clone());
+        let right_i32 = Int32Array::from(right_vec_i32.clone());
+        let (left_slice_i32, right_slice_i32) = if len > 1 {
+            let slice_len = len - 1;
+            (left_i32.slice(1, slice_len), right_i32.slice(1, slice_len))
+        } else {
+            (left_i32.clone(), right_i32.clone())
+        };
+
+        // Test bitwise_and for i32
+        let result = bitwise_and(&left_i32, &right_i32).unwrap();
+        let expected: Vec<Option<i32>> = left_vec_i32.iter().zip(&right_vec_i32).map(|(a, b)| ref_bitwise_and(*a, *b)).collect();
+        let result_vec: Vec<Option<i32>> = result.iter().collect();
+        assert_eq!(result_vec, expected, "bitwise_and full i32 mismatch");
+
+        if len > 1 {
+            let result_slice = bitwise_and(&left_slice_i32, &right_slice_i32).unwrap();
+            let expected_slice: Vec<Option<i32>> = left_vec_i32[1..].iter().zip(&right_vec_i32[1..]).map(|(a, b)| ref_bitwise_and(*a, *b)).collect();
+            let result_slice_vec: Vec<Option<i32>> = result_slice.iter().collect();
+            assert_eq!(result_slice_vec, expected_slice, "bitwise_and sliced i32 mismatch");
+        }
+
+        // Test bitwise_or for i32
+        let result = bitwise_or(&left_i32, &right_i32).unwrap();
+        let expected: Vec<Option<i32>> = left_vec_i32.iter().zip(&right_vec_i32).map(|(a, b)| ref_bitwise_or(*a, *b)).collect();
+        let result_vec: Vec<Option<i32>> = result.iter().collect();
+        assert_eq!(result_vec, expected, "bitwise_or full i32 mismatch");
+
+        // Test bitwise_xor for i32
+        let result = bitwise_xor(&left_i32, &right_i32).unwrap();
+        let expected: Vec<Option<i32>> = left_vec_i32.iter().zip(&right_vec_i32).map(|(a, b)| ref_bitwise_xor(*a, *b)).collect();
+        let result_vec: Vec<Option<i32>> = result.iter().collect();
+        assert_eq!(result_vec, expected, "bitwise_xor full i32 mismatch");
+
+        // Test bitwise_shift_left for i32
+        let result = bitwise_shift_left(&left_i32, &right_i32).unwrap();
+        let expected: Vec<Option<i32>> = left_vec_i32.iter().zip(&right_vec_i32).map(|(a, b)| ref_bitwise_shift_left(*a, *b)).collect();
+        let result_vec: Vec<Option<i32>> = result.iter().collect();
+        assert_eq!(result_vec, expected, "bitwise_shift_left full i32 mismatch");
+
+        // Test bitwise_shift_right for i32
+        let result = bitwise_shift_right(&left_i32, &right_i32).unwrap();
+        let expected: Vec<Option<i32>> = left_vec_i32.iter().zip(&right_vec_i32).map(|(a, b)| ref_bitwise_shift_right(*a, *b)).collect();
+        let result_vec: Vec<Option<i32>> = result.iter().collect();
+        assert_eq!(result_vec, expected, "bitwise_shift_right full i32 mismatch");
+
+        // Test bitwise_and_not for i32
+        let result = bitwise_and_not(&left_i32, &right_i32).unwrap();
+        let expected: Vec<Option<i32>> = left_vec_i32.iter().zip(&right_vec_i32).map(|(a, b)| ref_bitwise_and_not(*a, *b)).collect();
+        let result_vec: Vec<Option<i32>> = result.iter().collect();
+        assert_eq!(result_vec, expected, "bitwise_and_not full i32 mismatch");
+
+        // Generate for u32
+        let mut left_vec_u32 = Vec::with_capacity(len);
+        let mut right_vec_u32 = Vec::with_capacity(len);
+        for _ in 0..len {
+            let is_null = rng.random_bool(0.2);
+            let val = if is_null { None } else { Some(rng.random::<u32>()) };
+            left_vec_u32.push(val);
+            let is_null = rng.random_bool(0.2);
+            let val = if is_null { None } else { Some(rng.random::<u32>()) };
+            right_vec_u32.push(val);
+        }
+        let left_u32 = UInt32Array::from(left_vec_u32.clone());
+        let right_u32 = UInt32Array::from(right_vec_u32.clone());
+
+        // Test bitwise_and for u32
+        let result = bitwise_and(&left_u32, &right_u32).unwrap();
+        let expected: Vec<Option<u32>> = left_vec_u32.iter().zip(&right_vec_u32).map(|(a, b)| ref_bitwise_and(*a, *b)).collect();
+        let result_vec: Vec<Option<u32>> = result.iter().collect();
+        assert_eq!(result_vec, expected, "bitwise_and full u32 mismatch");
+
+        // Test bitwise_or for u32
+        let result = bitwise_or(&left_u32, &right_u32).unwrap();
+        let expected: Vec<Option<u32>> = left_vec_u32.iter().zip(&right_vec_u32).map(|(a, b)| ref_bitwise_or(*a, *b)).collect();
+        let result_vec: Vec<Option<u32>> = result.iter().collect();
+        assert_eq!(result_vec, expected, "bitwise_or full u32 mismatch");
+
+        // Test bitwise_xor for u32
+        let result = bitwise_xor(&left_u32, &right_u32).unwrap();
+        let expected: Vec<Option<u32>> = left_vec_u32.iter().zip(&right_vec_u32).map(|(a, b)| ref_bitwise_xor(*a, *b)).collect();
+        let result_vec: Vec<Option<u32>> = result.iter().collect();
+        assert_eq!(result_vec, expected, "bitwise_xor full u32 mismatch");
+
+        // Test bitwise_shift_left for u32
+        let result = bitwise_shift_left(&left_u32, &right_u32).unwrap();
+        let expected: Vec<Option<u32>> = left_vec_u32.iter().zip(&right_vec_u32).map(|(a, b)| ref_bitwise_shift_left(*a, *b)).collect();
+        let result_vec: Vec<Option<u32>> = result.iter().collect();
+        assert_eq!(result_vec, expected, "bitwise_shift_left full u32 mismatch");
+
+        // Test bitwise_shift_right for u32
+        let result = bitwise_shift_right(&left_u32, &right_u32).unwrap();
+        let expected: Vec<Option<u32>> = left_vec_u32.iter().zip(&right_vec_u32).map(|(a, b)| ref_bitwise_shift_right(*a, *b)).collect();
+        let result_vec: Vec<Option<u32>> = result.iter().collect();
+        assert_eq!(result_vec, expected, "bitwise_shift_right full u32 mismatch");
+
+        // Test bitwise_and_not for u32
+        let result = bitwise_and_not(&left_u32, &right_u32).unwrap();
+        let expected: Vec<Option<u32>> = left_vec_u32.iter().zip(&right_vec_u32).map(|(a, b)| ref_bitwise_and_not(*a, *b)).collect();
+        let result_vec: Vec<Option<u32>> = result.iter().collect();
+        assert_eq!(result_vec, expected, "bitwise_and_not full u32 mismatch");
+    }
+}
+
+#[test]
+fn test_primitive_bitwise_unary_random_equivalence() {
+    use rand::{Rng, SeedableRng};
+
+    // Use a fixed seed for reproducible tests
+    let mut rng = rand::rngs::StdRng::from_seed([43u8; 32]);
+
+    for _ in 0..10 { // 10 iterations
+        let len = rng.random_range(1..=64);
+
+        // Generate for i32
+        let mut vec_i32 = Vec::with_capacity(len);
+        for _ in 0..len {
+            let is_null = rng.random_bool(0.2);
+            let val = if is_null { None } else { Some(rng.random::<i32>()) };
+            vec_i32.push(val);
+        }
+        let array_i32 = Int32Array::from(vec_i32.clone());
+        let array_slice_i32 = if len > 1 {
+            array_i32.slice(1, len - 1)
+        } else {
+            array_i32.clone()
+        };
+
+        // Test bitwise_not for i32
+        let result = bitwise_not(&array_i32).unwrap();
+        let expected: Vec<Option<i32>> = vec_i32.iter().map(|a| ref_bitwise_not(*a)).collect();
+        let result_vec: Vec<Option<i32>> = result.iter().collect();
+        assert_eq!(result_vec, expected, "bitwise_not full i32 mismatch");
+
+        if len > 1 {
+            let result_slice = bitwise_not(&array_slice_i32).unwrap();
+            let expected_slice: Vec<Option<i32>> = vec_i32[1..].iter().map(|a| ref_bitwise_not(*a)).collect();
+            let result_slice_vec: Vec<Option<i32>> = result_slice.iter().collect();
+            assert_eq!(result_slice_vec, expected_slice, "bitwise_not sliced i32 mismatch");
+        }
+
+        // Generate for u32
+        let mut vec_u32 = Vec::with_capacity(len);
+        for _ in 0..len {
+            let is_null = rng.random_bool(0.2);
+            let val = if is_null { None } else { Some(rng.random::<u32>()) };
+            vec_u32.push(val);
+        }
+        let array_u32 = UInt32Array::from(vec_u32.clone());
+
+        // Test bitwise_not for u32
+        let result = bitwise_not(&array_u32).unwrap();
+        let expected: Vec<Option<u32>> = vec_u32.iter().map(|a| ref_bitwise_not(*a)).collect();
+        let result_vec: Vec<Option<u32>> = result.iter().collect();
+        assert_eq!(result_vec, expected, "bitwise_not full u32 mismatch");
+    }
+}
diff --git a/arrow-arith/src/boolean.rs b/arrow-arith/src/boolean.rs
index d94df49de256..ef9f085284a6 100644
--- a/arrow-arith/src/boolean.rs
+++ b/arrow-arith/src/boolean.rs
@@ -24,7 +24,7 @@
 
 use arrow_array::*;
 use arrow_buffer::buffer::{bitwise_bin_op_helper, bitwise_quaternary_op_helper};
-use arrow_buffer::{BooleanBuffer, NullBuffer, buffer_bin_and_not};
+use arrow_buffer::{BooleanBuffer, NullBuffer};
 use arrow_schema::ArrowError;
 
 /// Logical 'and' boolean values with Kleene logic
@@ -252,7 +252,7 @@ where
 /// assert_eq!(and_ab, BooleanArray::from(vec![Some(false), Some(true), None]));
 /// ```
 pub fn and(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArray, ArrowError> {
-    binary_boolean_kernel(left, right, |a, b| a & b)
+    left.binary(right, |a, b| a & b)
 }
 
 /// Performs `OR` operation on two arrays. If either left or right value is null then the
@@ -269,7 +269,7 @@ pub fn and(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArray, Ar
 /// assert_eq!(or_ab, BooleanArray::from(vec![Some(true), Some(true), None]));
 /// ```
 pub fn or(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArray, ArrowError> {
-    binary_boolean_kernel(left, right, |a, b| a | b)
+    left.binary(right, |a, b| a | b)
 }
 
 /// Performs `AND_NOT` operation on two arrays. If either left or right value is null then the
@@ -287,10 +287,7 @@ pub fn or(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArray, Arr
 /// // It's equal to and(left, not(right))
 /// assert_eq!(andn_ab, and(&a, &not(&b).unwrap()).unwrap());
 pub fn and_not(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArray, ArrowError> {
-    binary_boolean_kernel(left, right, |a, b| {
-        let buffer = buffer_bin_and_not(a.inner(), b.offset(), b.inner(), a.offset(), a.len());
-        BooleanBuffer::new(buffer, left.offset(), left.len())
-    })
+    left.binary(right, |a, b| a & !b)
 }
 
 /// Performs unary `NOT` operation on an arrays. If value is null then the result is also
@@ -306,9 +303,7 @@ pub fn and_not(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArray
 /// assert_eq!(not_a, BooleanArray::from(vec![Some(true), Some(false), None]));
 /// ```
 pub fn not(left: &BooleanArray) -> Result<BooleanArray, ArrowError> {
-    let nulls = left.nulls().cloned();
-    let values = !left.values();
-    Ok(BooleanArray::new(values, nulls))
+    Ok(left.unary(|a| !a))
 }
 
 /// Returns a non-null [BooleanArray] with whether each value of the array is null.
@@ -971,4 +966,336 @@ mod tests {
         .into_iter()
         .collect()
     }
+
+    #[test]
+    fn test_boolean_kernels_with_nulls_and_offsets() {
+        // Construct BooleanArrays with mixed values and nulls
+        let left = BooleanArray::from(vec![
+            Some(true), Some(false), None, Some(true), Some(false), None, Some(true)
+        ]);
+        let right = BooleanArray::from(vec![
+            None, Some(true), Some(false), None, Some(true), Some(false), Some(true)
+        ]);
+
+        // Create sliced views with non-zero offsets
+        let left_sliced = left.slice(1, 5); // Some(false), None, Some(true), Some(false), None
+        let right_sliced = right.slice(2, 5); // Some(false), None, Some(true), Some(false), Some(true)
+
+        // Test and
+        let result_full = and(&left, &right).unwrap();
+        let result_sliced = and(&left_sliced, &right_sliced).unwrap();
+
+        let expected_full = BooleanArray::from(vec![
+            None, Some(false), None, None, Some(false), None, Some(true)
+        ]);
+        let expected_sliced = BooleanArray::from(vec![
+            Some(false), None, Some(true), Some(false), None
+        ]);
+
+        assert_eq!(result_full, expected_full);
+        assert_eq!(result_sliced, expected_sliced);
+
+        // Test or
+        let result_full = or(&left, &right).unwrap();
+        let result_sliced = or(&left_sliced, &right_sliced).unwrap();
+
+        let expected_full = BooleanArray::from(vec![
+            None, Some(true), None, None, Some(true), None, Some(true)
+        ]);
+        let expected_sliced = BooleanArray::from(vec![
+            Some(false), None, Some(true), Some(false), None
+        ]);
+
+        assert_eq!(result_full, expected_full);
+        assert_eq!(result_sliced, expected_sliced);
+
+        // Test and_kleene: true if both true, false if either false, null otherwise
+        let result_full = and_kleene(&left, &right).unwrap();
+        let result_sliced = and_kleene(&left_sliced, &right_sliced).unwrap();
+
+        let expected_full = BooleanArray::from(vec![
+            None, Some(false), Some(false), None, Some(false), Some(false), Some(true)
+        ]);
+        let expected_sliced = BooleanArray::from(vec![
+            Some(false), None, Some(true), Some(false), None
+        ]);
+
+        assert_eq!(result_full, expected_full);
+        assert_eq!(result_sliced, expected_sliced);
+
+        // Test or_kleene: false if both false, true if either true, null otherwise
+        let result_full = or_kleene(&left, &right).unwrap();
+        let result_sliced = or_kleene(&left_sliced, &right_sliced).unwrap();
+
+        let expected_full = BooleanArray::from(vec![
+            Some(true), Some(true), None, Some(true), Some(true), None, Some(true)
+        ]);
+        let expected_sliced = BooleanArray::from(vec![
+            Some(false), None, Some(true), Some(false), Some(true)
+        ]);
+
+        assert_eq!(result_full, expected_full);
+        assert_eq!(result_sliced, expected_sliced);
+
+        // Test not
+        let result_full = not(&left).unwrap();
+        let result_sliced = not(&left_sliced).unwrap();
+
+        let expected_full = BooleanArray::from(vec![
+            Some(false), Some(true), None, Some(false), Some(true), None, Some(false)
+        ]);
+        let expected_sliced = BooleanArray::from(vec![
+            Some(true), None, Some(false), Some(true), None
+        ]);
+
+        assert_eq!(result_full, expected_full);
+        assert_eq!(result_sliced, expected_sliced);
+    }
+
+    #[test]
+    fn test_boolean_kernels_zero_length_and_all_null() {
+        // Empty arrays
+        let empty = BooleanArray::from(Vec::<Option<bool>>::new());
+        let result_and = and(&empty, &empty).unwrap();
+        let result_or = or(&empty, &empty).unwrap();
+        let result_not = not(&empty).unwrap();
+        let result_and_kleene = and_kleene(&empty, &empty).unwrap();
+        let result_or_kleene = or_kleene(&empty, &empty).unwrap();
+
+        assert_eq!(result_and.len(), 0);
+        assert_eq!(result_or.len(), 0);
+        assert_eq!(result_not.len(), 0);
+        assert_eq!(result_and_kleene.len(), 0);
+        assert_eq!(result_or_kleene.len(), 0);
+
+        // All-null arrays
+        let all_null = BooleanArray::new_null(5);
+        let result_and = and(&all_null, &all_null).unwrap();
+        let result_or = or(&all_null, &all_null).unwrap();
+        let result_not = not(&all_null).unwrap();
+        let result_and_kleene = and_kleene(&all_null, &all_null).unwrap();
+        let result_or_kleene = or_kleene(&all_null, &all_null).unwrap();
+
+        assert_eq!(result_and, all_null);
+        assert_eq!(result_or, all_null);
+        assert_eq!(result_not, all_null);
+        assert_eq!(result_and_kleene, all_null);
+        assert_eq!(result_or_kleene, all_null);
+
+        // Array with only first element non-null
+        let partial = BooleanArray::from(vec![Some(true), None, None, None, None]);
+        let result_not = not(&partial).unwrap();
+        let expected_not = BooleanArray::from(vec![Some(false), None, None, None, None]);
+        assert_eq!(result_not, expected_not);
+
+        // Array with only last element non-null
+        let partial = BooleanArray::from(vec![None, None, None, None, Some(false)]);
+        let result_not = not(&partial).unwrap();
+        let expected_not = BooleanArray::from(vec![None, None, None, None, Some(true)]);
+        assert_eq!(result_not, expected_not);
+    }
+
+    // Helper functions for reference implementations
+    fn ref_and_sql(a: Option<bool>, b: Option<bool>) -> Option<bool> {
+        match (a, b) {
+            (Some(a), Some(b)) => Some(a & b),
+            _ => None,
+        }
+    }
+
+    fn ref_or_sql(a: Option<bool>, b: Option<bool>) -> Option<bool> {
+        match (a, b) {
+            (Some(a), Some(b)) => Some(a | b),
+            _ => None,
+        }
+    }
+
+    fn ref_and_kleene(a: Option<bool>, b: Option<bool>) -> Option<bool> {
+        match (a, b) {
+            (Some(a), Some(b)) => Some(a & b),
+            (None, Some(b)) => if !b { Some(false) } else { None },
+            (Some(a), None) => if !a { Some(false) } else { None },
+            (None, None) => None,
+        }
+    }
+
+    fn ref_or_kleene(a: Option<bool>, b: Option<bool>) -> Option<bool> {
+        match (a, b) {
+            (Some(a), Some(b)) => Some(a | b),
+            (None, Some(b)) => if b { Some(true) } else { None },
+            (Some(a), None) => if a { Some(true) } else { None },
+            (None, None) => None,
+        }
+    }
+
+    fn ref_not(a: Option<bool>) -> Option<bool> {
+        a.map(|x| !x)
+    }
+
+    #[test]
+    fn test_boolean_kernels_random_equivalence() {
+        use rand::{Rng, SeedableRng};
+
+        // Use a fixed seed for reproducible tests
+        let mut rng = rand::rngs::StdRng::from_seed([48u8; 32]);
+
+        for _ in 0..20 { // 20 random iterations
+            // Pick random length 1..64
+            let len = rng.random_range(1..=64);
+
+            // Generate random Vec<Option<bool>> for left and right
+            let mut left_vec = Vec::with_capacity(len);
+            let mut right_vec = Vec::with_capacity(len);
+            for _ in 0..len {
+                let is_null = rng.random_bool(0.2); // 20% chance of null
+                let val = if is_null { None } else { Some(rng.random_bool(0.5)) };
+                left_vec.push(val);
+                let is_null = rng.random_bool(0.2);
+                let val = if is_null { None } else { Some(rng.random_bool(0.5)) };
+                right_vec.push(val);
+            }
+
+            // Construct BooleanArrays
+            let left = BooleanArray::from(left_vec.clone());
+            let right = BooleanArray::from(right_vec.clone());
+
+            // Construct sliced variants if possible
+            let (left_slice, right_slice) = if len > 1 {
+                let slice_len = len - 1;
+                (left.slice(1, slice_len), right.slice(1, slice_len))
+            } else {
+                (left.clone(), right.clone()) // fallback for len=1
+            };
+
+            // Test each kernel
+            let kernels = vec![
+                ("and", Box::new(|l: &BooleanArray, r: &BooleanArray| and(l, r).unwrap()) as Box<dyn Fn(&BooleanArray, &BooleanArray) -> BooleanArray>),
+                ("or", Box::new(|l, r| or(l, r).unwrap())),
+                ("and_kleene", Box::new(|l, r| and_kleene(l, r).unwrap())),
+                ("or_kleene", Box::new(|l, r| or_kleene(l, r).unwrap())),
+            ];
+
+            for (name, kernel) in kernels {
+                // Full arrays
+                let result = kernel(&left, &right);
+                let expected: Vec<Option<bool>> = left_vec.iter().zip(&right_vec).map(|(a, b)| match name {
+                    "and" => ref_and_sql(*a, *b),
+                    "or" => ref_or_sql(*a, *b),
+                    "and_kleene" => ref_and_kleene(*a, *b),
+                    "or_kleene" => ref_or_kleene(*a, *b),
+                    _ => unreachable!(),
+                }).collect();
+                let result_vec: Vec<Option<bool>> = result.iter().collect();
+                assert_eq!(result_vec, expected, "Full {} mismatch", name);
+
+                // Sliced arrays
+                if len > 1 {
+                    let result_slice = kernel(&left_slice, &right_slice);
+                    let expected_slice: Vec<Option<bool>> = left_vec[1..].iter().zip(&right_vec[1..]).map(|(a, b)| match name {
+                        "and" => ref_and_sql(*a, *b),
+                        "or" => ref_or_sql(*a, *b),
+                        "and_kleene" => ref_and_kleene(*a, *b),
+                        "or_kleene" => ref_or_kleene(*a, *b),
+                        _ => unreachable!(),
+                    }).collect();
+                    let result_slice_vec: Vec<Option<bool>> = result_slice.iter().collect();
+                    assert_eq!(result_slice_vec, expected_slice, "Sliced {} mismatch", name);
+                }
+            }
+
+            // Test not separately
+            let result_not = not(&left).unwrap();
+            let expected_not: Vec<Option<bool>> = left_vec.iter().map(|a| ref_not(*a)).collect();
+            let result_not_vec: Vec<Option<bool>> = result_not.iter().collect();
+            assert_eq!(result_not_vec, expected_not, "Full not mismatch");
+
+            if len > 1 {
+                let result_not_slice = not(&left_slice).unwrap();
+                let expected_not_slice: Vec<Option<bool>> = left_vec[1..].iter().map(|a| ref_not(*a)).collect();
+                let result_not_slice_vec: Vec<Option<bool>> = result_not_slice.iter().collect();
+                assert_eq!(result_not_slice_vec, expected_not_slice, "Sliced not mismatch");
+            }
+        }
+    }
+
+    #[test]
+    fn test_boolean_array_byte_boundary_regressions() {
+        // Test historically dangerous bitmap patterns for BooleanArray binary/unary operations
+        // Construct BooleanArray from Vec<Option<bool>> with length 10: [T, F, None, T, F, None, T, F, None, T]
+        // Underlying bitmap: bits for values and nulls
+        let data = vec![Some(true), Some(false), None, Some(true), Some(false), None, Some(true), Some(false), None, Some(true)];
+        let array = BooleanArray::from(data.clone());
+
+        // Slice cases: (slice_start, slice_len, description)
+        let slice_cases = vec![
+            (0, 5, "start=0, len=5"),
+            (1, 4, "start=1, len=4 (offset+len=5)"),
+            (3, 5, "start=3, len=5 (cross potential boundary)"),
+            (5, 5, "start=5, len=5"),
+        ];
+
+        for (start, len, desc) in slice_cases {
+            let slice = array.slice(start, len);
+            let slice_data = &data[start..start+len];
+
+            // Test unary NOT
+            let result_not = slice.unary(|a| !a);
+            let expected_not: Vec<Option<bool>> = slice_data.iter().map(|x| x.map(|b| !b)).collect();
+            let result_not_vec: Vec<Option<bool>> = result_not.iter().collect();
+            assert_eq!(result_not_vec, expected_not, "NOT {} mismatch", desc);
+
+            // For binary, need another slice; use the same slice for simplicity, but with different op
+            // Test binary AND with itself (should be identity for non-null)
+            let result_and = slice.binary(&slice, |a, b| a & b).unwrap();
+            let expected_and: Vec<Option<bool>> = slice_data.iter().map(|x| match x {
+                Some(b) => Some(b & b),
+                None => None,
+            }).collect();
+            let result_and_vec: Vec<Option<bool>> = result_and.iter().collect();
+            assert_eq!(result_and_vec, expected_and, "AND self {} mismatch", desc);
+
+            // Test binary OR with itself
+            let result_or = slice.binary(&slice, |a, b| a | b).unwrap();
+            let expected_or: Vec<Option<bool>> = slice_data.iter().map(|x| match x {
+                Some(b) => Some(b | b),
+                None => None,
+            }).collect();
+            let result_or_vec: Vec<Option<bool>> = result_or.iter().collect();
+            assert_eq!(result_or_vec, expected_or, "OR self {} mismatch", desc);
+        }
+    }
+
+    #[test]
+    fn test_and_kleene_byte_boundary_regressions() {
+        // Test and_kleene with slices that hit byte boundaries
+        let left_data = vec![Some(true), Some(false), None, Some(true), Some(false), None, Some(true), Some(false), None, Some(true)];
+        let right_data = vec![Some(false), Some(true), Some(true), Some(false), Some(true), Some(false), Some(true), Some(false), Some(true), Some(false)];
+        let left = BooleanArray::from(left_data.clone());
+        let right = BooleanArray::from(right_data.clone());
+
+        // Slice cases
+        let slice_cases = vec![
+            (0, 5),
+            (1, 4),
+            (3, 5),
+            (5, 5),
+        ];
+
+        for (start, len) in slice_cases {
+            let left_slice = left.slice(start, len);
+            let right_slice = right.slice(start, len);
+            let left_slice_data = &left_data[start..start+len];
+            let right_slice_data = &right_data[start..start+len];
+
+            let result = and_kleene(&left_slice, &right_slice).unwrap();
+            let expected: Vec<Option<bool>> = left_slice_data.iter().zip(right_slice_data).map(|(a, b)| match (a, b) {
+                (Some(a), Some(b)) => Some(a & b),
+                (None, Some(b)) => if !b { Some(false) } else { None },
+                (Some(a), None) => if !a { Some(false) } else { None },
+                (None, None) => None,
+            }).collect();
+            let result_vec: Vec<Option<bool>> = result.iter().collect();
+            assert_eq!(result_vec, expected, "and_kleene slice start={}, len={} mismatch", start, len);
+        }
+    }
 }
diff --git a/arrow-array/src/array/boolean_array.rs b/arrow-array/src/array/boolean_array.rs
index 7967084aa7ab..9cc22ff242ec 100644
--- a/arrow-array/src/array/boolean_array.rs
+++ b/arrow-array/src/array/boolean_array.rs
@@ -21,7 +21,7 @@ use crate::iterator::BooleanIter;
 use crate::{Array, ArrayAccessor, ArrayRef, Scalar};
 use arrow_buffer::{BooleanBuffer, Buffer, MutableBuffer, NullBuffer, bit_util};
 use arrow_data::{ArrayData, ArrayDataBuilder};
-use arrow_schema::DataType;
+use arrow_schema::{ArrowError, DataType};
 use std::any::Any;
 use std::sync::Arc;
 
@@ -284,6 +284,52 @@ impl BooleanArray {
     pub fn into_parts(self) -> (BooleanBuffer, Option<NullBuffer>) {
         (self.values, self.nulls)
     }
+
+    /// Apply a bitwise unary operation to this array, returning a new array.
+    ///
+    /// The operation is applied to the values, and nulls are cloned.
+    ///
+    /// # Arguments
+    ///
+    /// * `op` - The unary operation to apply.
+    pub fn unary<F>(&self, op: F) -> Self
+    where
+        F: Fn(u64) -> u64 + Copy,
+    {
+        let buffer = self.values().inner().bitwise_unary(self.values().offset(), self.len(), op);
+        let values = BooleanBuffer::new(buffer, 0, self.len());
+        let nulls = self.nulls().cloned();
+        Self::new(values, nulls)
+    }
+
+    /// Apply a bitwise binary operation between this array and another, returning a new array.
+    ///
+    /// The operation is applied to the values, and nulls are combined as union.
+    ///
+    /// # Arguments
+    ///
+    /// * `other` - The other array.
+    /// * `op` - The binary operation to apply.
+    pub fn binary<F>(&self, other: &BooleanArray, op: F) -> Result<Self, ArrowError>
+    where
+        F: Fn(u64, u64) -> u64 + Copy,
+    {
+        if self.len() != other.len() {
+            return Err(ArrowError::ComputeError(
+                "Cannot perform bitwise operation on arrays of different length".to_string(),
+            ));
+        }
+        let buffer = self.values().inner().bitwise_binary(
+            other.values().inner(),
+            self.values().offset(),
+            other.values().offset(),
+            self.len(),
+            op,
+        );
+        let values = BooleanBuffer::new(buffer, 0, self.len());
+        let nulls = NullBuffer::union(self.nulls(), other.nulls());
+        Ok(Self::new(values, nulls))
+    }
 }
 
 impl Array for BooleanArray {
@@ -829,4 +875,45 @@ mod tests {
         assert_eq!(values.values(), &[0b1000_0000]);
         assert!(nulls.is_none());
     }
+
+    #[test]
+    fn test_boolean_array_binary_nulls() {
+        // Test BooleanArray::binary with nulls
+        let left = BooleanArray::from(vec![Some(true), None, Some(false), Some(true)]);
+        let right = BooleanArray::from(vec![Some(false), Some(true), None, Some(false)]);
+
+        // Test and
+        let result = BooleanArray::binary(&left, &right, |a, b| a & b).unwrap();
+        let expected = BooleanArray::from(vec![Some(false), None, None, Some(false)]);
+        assert_eq!(result.iter().collect::<Vec<_>>(), expected.iter().collect::<Vec<_>>());
+
+        // Test or
+        let result = BooleanArray::binary(&left, &right, |a, b| a | b).unwrap();
+        let expected = BooleanArray::from(vec![Some(true), None, None, Some(true)]);
+        assert_eq!(result.iter().collect::<Vec<_>>(), expected.iter().collect::<Vec<_>>());
+
+        // Test with offsets (sliced arrays)
+        let left_sliced = left.slice(1, 3); // [None, false, true]
+        let right_sliced = right.slice(1, 3); // [true, None, false]
+
+        let result = BooleanArray::binary(&left_sliced, &right_sliced, |a, b| a & b).unwrap();
+        let expected = BooleanArray::from(vec![None, None, Some(false)]);
+        assert_eq!(result.iter().collect::<Vec<_>>(), expected.iter().collect::<Vec<_>>());
+    }
+
+    #[test]
+    fn test_boolean_array_unary_nulls() {
+        // Test BooleanArray::unary with nulls
+        let array = BooleanArray::from(vec![Some(true), None, Some(false), Some(true)]);
+
+        let result = BooleanArray::unary(&array, |a| !a);
+        let expected = BooleanArray::from(vec![Some(false), None, Some(true), Some(false)]);
+        assert_eq!(result.iter().collect::<Vec<_>>(), expected.iter().collect::<Vec<_>>());
+
+        // Test with offsets
+        let sliced = array.slice(1, 3); // [None, false, true]
+        let result = BooleanArray::unary(&sliced, |a| !a);
+        let expected = BooleanArray::from(vec![None, Some(true), Some(false)]);
+        assert_eq!(result.iter().collect::<Vec<_>>(), expected.iter().collect::<Vec<_>>());
+    }
 }
diff --git a/arrow-buffer/src/buffer/boolean.rs b/arrow-buffer/src/buffer/boolean.rs
index a3bcabbfdb34..0cd169b4dff7 100644
--- a/arrow-buffer/src/buffer/boolean.rs
+++ b/arrow-buffer/src/buffer/boolean.rs
@@ -18,8 +18,7 @@
 use crate::bit_chunk_iterator::BitChunks;
 use crate::bit_iterator::{BitIndexIterator, BitIndexU32Iterator, BitIterator, BitSliceIterator};
 use crate::{
-    BooleanBufferBuilder, Buffer, MutableBuffer, bit_util, buffer_bin_and, buffer_bin_or,
-    buffer_bin_xor, buffer_unary_not,
+    BooleanBufferBuilder, Buffer, MutableBuffer, bit_util,
 };
 
 use std::ops::{BitAnd, BitOr, BitXor, Not};
@@ -224,7 +223,7 @@ impl Not for &BooleanBuffer {
 
     fn not(self) -> Self::Output {
         BooleanBuffer {
-            buffer: buffer_unary_not(&self.buffer, self.offset, self.len),
+            buffer: self.buffer.bitwise_unary(self.offset, self.len, |a| !a),
             offset: 0,
             len: self.len,
         }
@@ -237,7 +236,7 @@ impl BitAnd<&BooleanBuffer> for &BooleanBuffer {
     fn bitand(self, rhs: &BooleanBuffer) -> Self::Output {
         assert_eq!(self.len, rhs.len);
         BooleanBuffer {
-            buffer: buffer_bin_and(&self.buffer, self.offset, &rhs.buffer, rhs.offset, self.len),
+            buffer: self.buffer.bitwise_binary(&rhs.buffer, self.offset, rhs.offset, self.len, |a, b| a & b),
             offset: 0,
             len: self.len,
         }
@@ -250,7 +249,7 @@ impl BitOr<&BooleanBuffer> for &BooleanBuffer {
     fn bitor(self, rhs: &BooleanBuffer) -> Self::Output {
         assert_eq!(self.len, rhs.len);
         BooleanBuffer {
-            buffer: buffer_bin_or(&self.buffer, self.offset, &rhs.buffer, rhs.offset, self.len),
+            buffer: self.buffer.bitwise_binary(&rhs.buffer, self.offset, rhs.offset, self.len, |a, b| a | b),
             offset: 0,
             len: self.len,
         }
@@ -263,7 +262,7 @@ impl BitXor<&BooleanBuffer> for &BooleanBuffer {
     fn bitxor(self, rhs: &BooleanBuffer) -> Self::Output {
         assert_eq!(self.len, rhs.len);
         BooleanBuffer {
-            buffer: buffer_bin_xor(&self.buffer, self.offset, &rhs.buffer, rhs.offset, self.len),
+            buffer: self.buffer.bitwise_binary(&rhs.buffer, self.offset, rhs.offset, self.len, |a, b| a ^ b),
             offset: 0,
             len: self.len,
         }
diff --git a/arrow-buffer/src/buffer/immutable.rs b/arrow-buffer/src/buffer/immutable.rs
index 55a4621540c8..170e2740d628 100644
--- a/arrow-buffer/src/buffer/immutable.rs
+++ b/arrow-buffer/src/buffer/immutable.rs
@@ -28,7 +28,6 @@ use crate::{bit_util, bytes::Bytes, native::ArrowNativeType};
 #[cfg(feature = "pool")]
 use crate::pool::MemoryPool;
 
-use super::ops::bitwise_unary_op_helper;
 use super::{MutableBuffer, ScalarBuffer};
 
 /// A contiguous memory region that can be shared with other buffers and across
@@ -344,7 +343,7 @@ impl Buffer {
             return self.slice_with_length(offset / 8, bit_util::ceil(len, 8));
         }
 
-        bitwise_unary_op_helper(self, offset, len, |a| a)
+        self.bitwise_unary(offset, len, |a| a)
     }
 
     /// Returns a `BitChunks` instance which can be used to iterate over this buffers bits
@@ -444,6 +443,77 @@ impl Buffer {
     pub fn claim(&self, pool: &dyn MemoryPool) {
         self.data.claim(pool)
     }
+
+    /// Apply a bitwise unary operation to this buffer, returning a new buffer.
+    ///
+    /// The operation is applied to `len` bits starting at `offset` bits.
+    ///
+    /// # Arguments
+    ///
+    /// * `offset` - The bit offset to start the operation.
+    /// * `len` - The number of bits to process.
+    /// * `op` - The unary operation to apply.
+    pub fn bitwise_unary<F>(&self, offset: usize, len: usize, op: F) -> Buffer
+    where
+        F: Fn(u64) -> u64,
+    {
+        // reserve capacity and set length so we can get a typed view of u64 chunks
+        let mut result =
+            MutableBuffer::new(crate::util::bit_util::ceil(len, 8)).with_bitset(len / 64 * 8, false);
+
+        let left_chunks = self.bit_chunks(offset, len);
+
+        let result_chunks = result.typed_data_mut::<u64>().iter_mut();
+
+        result_chunks
+            .zip(left_chunks.iter())
+            .for_each(|(res, left)| {
+                *res = op(left);
+            });
+
+        let remainder_bytes = crate::util::bit_util::ceil(left_chunks.remainder_len(), 8);
+        let rem = op(left_chunks.remainder_bits());
+        // we are counting its starting from the least significant bit, to to_le_bytes should be correct
+        let rem = &rem.to_le_bytes()[0..remainder_bytes];
+        result.extend_from_slice(rem);
+
+        result.into()
+    }
+
+    /// Apply a bitwise binary operation between this buffer and another, returning a new buffer.
+    ///
+    /// The operation is applied to `len` bits starting at `self_offset` in self and `other_offset` in other.
+    ///
+    /// # Arguments
+    ///
+    /// * `other` - The other buffer.
+    /// * `self_offset` - The bit offset in self.
+    /// * `other_offset` - The bit offset in other.
+    /// * `len` - The number of bits to process.
+    /// * `op` - The binary operation to apply.
+    pub fn bitwise_binary<F>(&self, other: &Buffer, self_offset: usize, other_offset: usize, len: usize, op: F) -> Buffer
+    where
+        F: Fn(u64, u64) -> u64,
+    {
+        let left_chunks = self.bit_chunks(self_offset, len);
+        let right_chunks = other.bit_chunks(other_offset, len);
+
+        let chunks = left_chunks
+            .iter()
+            .zip(right_chunks.iter())
+            .map(|(left, right)| op(left, right));
+        // Soundness: `BitChunks` is a `BitChunks` iterator which
+        // correctly reports its upper bound
+        let mut buffer = unsafe { MutableBuffer::from_trusted_len_iter(chunks) };
+
+        let remainder_bytes = crate::util::bit_util::ceil(left_chunks.remainder_len(), 8);
+        let rem = op(left_chunks.remainder_bits(), right_chunks.remainder_bits());
+        // we are counting its starting from the least significant bit, to to_le_bytes should be correct
+        let rem = &rem.to_le_bytes()[0..remainder_bytes];
+        buffer.extend_from_slice(rem);
+
+        buffer.into()
+    }
 }
 
 /// Note that here we deliberately do not implement
@@ -598,6 +668,7 @@ impl<T: ArrowNativeType> FromIterator<T> for Buffer {
 #[cfg(test)]
 mod tests {
     use crate::i256;
+    use rand::{Rng, SeedableRng};
     use std::panic::{RefUnwindSafe, UnwindSafe};
     use std::thread;
 
@@ -1065,4 +1136,252 @@ mod tests {
         drop(capture);
         assert_eq!(buffer2.strong_count(), 1);
     }
+
+    #[test]
+    fn test_buffer_bitwise_binary_offsets() {
+        use crate::buffer::ops::{buffer_bin_and, buffer_bin_or};
+
+        let left = Buffer::from(vec![0b10101010u8, 0b01010101u8]);
+        let right = Buffer::from(vec![0b11110000u8, 0b00001111u8]);
+
+        // Test with offset 0, len full (16 bits)
+        let new_result = left.bitwise_binary(&right, 0, 0, 16, |a, b| a & b);
+        let old_result = buffer_bin_and(&left, 0, &right, 0, 16);
+        assert_eq!(new_result, old_result);
+
+        // Test with offset 1, len 7 (crosses byte boundary)
+        let new_result = left.bitwise_binary(&right, 1, 1, 7, |a, b| a & b);
+        let old_result = buffer_bin_and(&left, 1, &right, 1, 7);
+        assert_eq!(new_result, old_result);
+
+        // Test tail bits, len not multiple of 8 (10 bits)
+        let new_result = left.bitwise_binary(&right, 0, 0, 10, |a, b| a & b);
+        let old_result = buffer_bin_and(&left, 0, &right, 0, 10);
+        assert_eq!(new_result, old_result);
+
+        // Test or operation
+        let new_result = left.bitwise_binary(&right, 0, 0, 16, |a, b| a | b);
+        let old_result = buffer_bin_or(&left, 0, &right, 0, 16);
+        assert_eq!(new_result, old_result);
+    }
+
+    #[test]
+    fn test_buffer_bitwise_unary_offsets() {
+        use crate::buffer::ops::buffer_unary_not;
+
+        let buffer = Buffer::from(vec![0b10101010u8, 0b01010101u8]);
+
+        // Test with offset 0, len full
+        let new_result = buffer.bitwise_unary(0, 16, |a| !a);
+        let old_result = buffer_unary_not(&buffer, 0, 16);
+        assert_eq!(new_result, old_result);
+
+        // Test with offset 2, len 10 (crosses byte, tail bits)
+        let new_result = buffer.bitwise_unary(2, 10, |a| !a);
+        let old_result = buffer_unary_not(&buffer, 2, 10);
+        assert_eq!(new_result, old_result);
+    }
+
+    #[test]
+    fn test_buffer_bitwise_binary_random_equivalence() {
+        use crate::buffer::ops::{buffer_bin_and, buffer_bin_or, buffer_bin_xor};
+
+        // Use a fixed seed for reproducible tests
+        let mut rng = rand::rngs::StdRng::from_seed([42u8; 32]);
+
+        let buffer_sizes = [8, 16, 32, 64];
+        let offsets = [0, 1, 3, 7, 8, 13];
+        let lengths = [1, 2, 7, 8, 9, 15, 16];
+
+        for &size in &buffer_sizes {
+            for _ in 0..5 { // Generate 5 random pairs per size
+                let left_vec: Vec<u8> = (0..size).map(|_| rng.random::<u8>()).collect();
+                let right_vec: Vec<u8> = (0..size).map(|_| rng.random::<u8>()).collect();
+                let left = Buffer::from(left_vec);
+                let right = Buffer::from(right_vec);
+                let buffer_bits = left.len() * 8;
+
+                for &offset in &offsets {
+                    if offset >= buffer_bits { continue; }
+                    for &len in &lengths {
+                        if offset + len > buffer_bits { continue; }
+
+                        // Test AND
+                        let new_and = left.bitwise_binary(&right, offset, offset, len, |a, b| a & b);
+                        let old_and = buffer_bin_and(&left, offset, &right, offset, len);
+                        assert_eq!(new_and, old_and, "AND failed for offset={}, len={}", offset, len);
+
+                        // Test OR
+                        let new_or = left.bitwise_binary(&right, offset, offset, len, |a, b| a | b);
+                        let old_or = buffer_bin_or(&left, offset, &right, offset, len);
+                        assert_eq!(new_or, old_or, "OR failed for offset={}, len={}", offset, len);
+
+                        // Test XOR
+                        let new_xor = left.bitwise_binary(&right, offset, offset, len, |a, b| a ^ b);
+                        let old_xor = buffer_bin_xor(&left, offset, &right, offset, len);
+                        assert_eq!(new_xor, old_xor, "XOR failed for offset={}, len={}", offset, len);
+                    }
+                }
+            }
+        }
+    }
+
+    #[test]
+    fn test_buffer_bitwise_unary_random_equivalence() {
+        use crate::buffer::ops::buffer_unary_not;
+
+        // Use a fixed seed for reproducible tests
+        let mut rng = rand::rngs::StdRng::from_seed([43u8; 32]);
+
+        let buffer_sizes = [8, 16, 32, 64];
+        let offsets = [0, 1, 3, 7, 8, 13];
+        let lengths = [1, 2, 7, 8, 9, 15, 16];
+
+        for &size in &buffer_sizes {
+            for _ in 0..5 { // Generate 5 random buffers per size
+                let vec: Vec<u8> = (0..size).map(|_| rng.random::<u8>()).collect();
+                let buffer = Buffer::from(vec);
+                let buffer_bits = buffer.len() * 8;
+
+                for &offset in &offsets {
+                    if offset >= buffer_bits { continue; }
+                    for &len in &lengths {
+                        if offset + len > buffer_bits { continue; }
+
+                        let new_not = buffer.bitwise_unary(offset, len, |a| !a);
+                        let old_not = buffer_unary_not(&buffer, offset, len);
+                        assert_eq!(new_not, old_not, "NOT failed for offset={}, len={}", offset, len);
+                    }
+                }
+            }
+        }
+    }
+
+    #[test]
+    fn test_buffer_bitwise_boundaries() {
+        // Use a fixed seed for reproducible tests
+        let mut rng = rand::rngs::StdRng::from_seed([45u8; 32]);
+
+        // Create a large buffer: 1024 bytes
+        let data: Vec<u8> = (0..1024).map(|_| rng.random::<u8>()).collect();
+        let buffer = Buffer::from(data);
+        let total_bits = buffer.len() * 8;
+
+        // Boundary configurations for unary
+        let boundary_configs = vec![
+            (0, 0), // zero length
+            (0, total_bits), // full length
+            (1, total_bits - 1), // offset 1, to end
+            (7, total_bits - 7), // offset 7, crosses byte
+            (8, total_bits - 8), // offset at byte boundary
+            (total_bits - 1, 1), // last bit
+            (total_bits - 8, 8), // last byte
+            (total_bits - 9, 9), // last byte plus one
+        ];
+
+        for (offset, len) in boundary_configs {
+            if offset + len > total_bits {
+                continue; // skip invalid
+            }
+
+            // Test bitwise_unary
+            let result = buffer.bitwise_unary(offset, len, |a| !a);
+            let expected_len = (len + 7) / 8;
+            assert_eq!(result.len(), expected_len, "Wrong length for offset={}, len={}", offset, len);
+
+            // Idempotence: NOT twice should be identity, but since result is packed, compare lengths
+            let result_twice = Buffer::from(result.clone()).bitwise_unary(0, len, |a| !a);
+            assert_eq!(result_twice.len(), expected_len, "NOT twice length mismatch for offset={}, len={}", offset, len);
+        }
+    }
+
+    #[test]
+    fn test_buffer_bitwise_binary_boundaries() {
+        use crate::buffer::ops::{buffer_bin_and, buffer_bin_or};
+
+        // Use a fixed seed for reproducible tests
+        let mut rng = rand::rngs::StdRng::from_seed([46u8; 32]);
+
+        // Create two large buffers: 1024 bytes each
+        let data_left: Vec<u8> = (0..1024).map(|_| rng.random::<u8>()).collect();
+        let data_right: Vec<u8> = (0..1024).map(|_| rng.random::<u8>()).collect();
+        let left = Buffer::from(data_left);
+        let right = Buffer::from(data_right);
+        let total_bits = left.len() * 8;
+
+        // Boundary configurations for binary
+        let boundary_configs = vec![
+            (0, 0), // zero length
+            (0, total_bits), // full length
+            (1, total_bits - 1), // offset 1, to end
+            (7, total_bits - 7), // offset 7, crosses byte
+            (8, total_bits - 8), // offset at byte boundary
+            (total_bits - 1, 1), // last bit
+            (total_bits - 8, 8), // last byte
+            (total_bits - 9, 9), // last byte plus one
+        ];
+
+        for (offset, len) in boundary_configs {
+            if offset + len > total_bits {
+                continue; // skip invalid
+            }
+
+            // Test bitwise_binary AND
+            let result_and = left.bitwise_binary(&right, offset, offset, len, |a, b| a & b);
+            let expected_len = (len + 7) / 8;
+            assert_eq!(result_and.len(), expected_len, "AND wrong length for offset={}, len={}", offset, len);
+
+            // Compare with legacy for a few cases
+            if len <= 64 { // to keep test fast
+                let old_and = buffer_bin_and(&left, offset, &right, offset, len);
+                assert_eq!(result_and, old_and, "AND mismatch with legacy for offset={}, len={}", offset, len);
+            }
+
+            // Test bitwise_binary OR
+            let result_or = left.bitwise_binary(&right, offset, offset, len, |a, b| a | b);
+            assert_eq!(result_or.len(), expected_len, "OR wrong length for offset={}, len={}", offset, len);
+
+            // Compare with legacy for a few cases
+            if len <= 64 {
+                let old_or = buffer_bin_or(&left, offset, &right, offset, len);
+                assert_eq!(result_or, old_or, "OR mismatch with legacy for offset={}, len={}", offset, len);
+            }
+        }
+    }
+
+
+
+    #[test]
+    fn test_buffer_bitwise_byte_boundary_regressions() {
+        use crate::buffer::ops::{buffer_bin_and, buffer_bin_or, buffer_unary_not};
+
+        // Construct small buffers
+        let left = Buffer::from(vec![0b11110000u8, 0b00001111u8]);   // 240, 15
+        let right = Buffer::from(vec![0b10101010u8, 0b01010101u8]);  // 170, 85
+
+        // (offset, len, description)
+        let cases: &[(usize, usize, &str)] = &[
+            (0, 8, "(offset=0, len=8) exact byte"),
+            (1, 7, "(offset=1, len=7) (offset+len)%8 == 0"),
+            (3, 5, "(offset=3, len=5) partial byte"),
+            (4, 8, "(offset=4, len=8) cross-byte"),
+        ];
+
+        for &(offset, len, desc) in cases {
+            // New AND vs legacy AND
+            let result_and = left.bitwise_binary(&right, offset, offset, len, |a, b| a & b);
+            let legacy_and = buffer_bin_and(&left, offset, &right, offset, len);
+            assert_eq!(result_and.as_slice(), &legacy_and[..], "AND {}", desc);
+
+            // New OR vs legacy OR
+            let result_or = left.bitwise_binary(&right, offset, offset, len, |a, b| a | b);
+            let legacy_or = buffer_bin_or(&left, offset, &right, offset, len);
+            assert_eq!(result_or.as_slice(), &legacy_or[..], "OR {}", desc);
+
+            // New NOT vs legacy NOT (unary, using only `left`)
+            let result_not = left.bitwise_unary(offset, len, |a| !a);
+            let legacy_not = buffer_unary_not(&left, offset, len);
+            assert_eq!(result_not.as_slice(), &legacy_not[..], "NOT {}", desc);
+        }
+    }
 }
diff --git a/arrow-buffer/src/buffer/mutable.rs b/arrow-buffer/src/buffer/mutable.rs
index 83c4d878346c..eae7fba82d8b 100644
--- a/arrow-buffer/src/buffer/mutable.rs
+++ b/arrow-buffer/src/buffer/mutable.rs
@@ -23,7 +23,7 @@ use crate::alloc::{ALIGNMENT, Deallocation};
 use crate::{
     bytes::Bytes,
     native::{ArrowNativeType, ToByteSlice},
-    util::bit_util,
+    util::bit_util::{self, apply_bitwise_binary_op, apply_bitwise_unary_op},
 };
 
 #[cfg(feature = "pool")]
@@ -596,6 +596,40 @@ impl MutableBuffer {
     pub fn claim(&self, pool: &dyn MemoryPool) {
         *self.reservation.lock().unwrap() = Some(pool.reserve(self.capacity()));
     }
+
+    /// Apply a bitwise unary operation in place to this buffer.
+    ///
+    /// The operation is applied to `len` bits starting at `offset` bits.
+    ///
+    /// # Arguments
+    ///
+    /// * `offset` - The bit offset to start the operation.
+    /// * `len` - The number of bits to process.
+    /// * `op` - The unary operation to apply.
+    pub fn bitwise_unary_inplace<F>(&mut self, offset: usize, len: usize, op: F)
+    where
+        F: Fn(u64) -> u64,
+    {
+        apply_bitwise_unary_op(self.as_slice_mut(), offset, len, op)
+    }
+
+    /// Apply a bitwise binary operation in place between this buffer and another.
+    ///
+    /// The operation is applied to `len` bits starting at `self_offset` in self and `other_offset` in other.
+    ///
+    /// # Arguments
+    ///
+    /// * `other` - The other buffer.
+    /// * `self_offset` - The bit offset in self.
+    /// * `other_offset` - The bit offset in other.
+    /// * `len` - The number of bits to process.
+    /// * `op` - The binary operation to apply.
+    pub fn bitwise_binary_inplace<F>(&mut self, other: &Buffer, self_offset: usize, other_offset: usize, len: usize, op: F)
+    where
+        F: Fn(u64, u64) -> u64,
+    {
+        apply_bitwise_binary_op(self.as_slice_mut(), self_offset, other.as_slice(), other_offset, len, op)
+    }
 }
 
 /// Creates a non-null pointer with alignment of [`ALIGNMENT`]
@@ -931,6 +965,7 @@ impl<T: ArrowNativeType> std::iter::FromIterator<T> for MutableBuffer {
 
 #[cfg(test)]
 mod tests {
+    use rand::{Rng, SeedableRng};
     use super::*;
 
     #[test]
@@ -1382,4 +1417,179 @@ mod tests {
         let data_1000: Vec<i32> = (0..1000).collect();
         test_repeat_count(repeat_count, &data_1000);
     }
+
+    #[test]
+    fn test_mutable_buffer_bitwise_inplace() {
+        let initial = vec![0b10101010u8, 0b01010101u8];
+        let other = Buffer::from(vec![0b11110000u8, 0b00001111u8]);
+
+        // Test in-place and vs allocating
+        let mut mutable = MutableBuffer::from(initial.clone());
+        mutable.bitwise_binary_inplace(&other, 0, 0, 16, |a, b| a & b);
+        let inplace_result = Buffer::from(mutable);
+
+        let allocating_result = Buffer::from(initial.clone()).bitwise_binary(&other, 0, 0, 16, |a, b| a & b);
+
+        assert_eq!(inplace_result, allocating_result);
+
+        // Test with offsets
+        let mut mutable = MutableBuffer::from(initial.clone());
+        mutable.bitwise_binary_inplace(&other, 1, 1, 7, |a, b| a | b);
+        let inplace_result = Buffer::from(mutable);
+
+        let allocating_result = Buffer::from(initial.clone()).bitwise_binary(&other, 1, 1, 7, |a, b| a | b);
+
+        // Extract the operated bits: bits 1 to 7 from inplace_result (all in byte 0)
+        let operated_value = ((inplace_result.as_slice()[0] >> 1) & 0x7F) as u8;
+        assert_eq!(operated_value, allocating_result.as_slice()[0]);
+
+        // Test unary in-place
+        let mut mutable = MutableBuffer::from(initial.clone());
+        mutable.bitwise_unary_inplace(0, 16, |a| !a);
+        let inplace_result = Buffer::from(mutable);
+
+        let allocating_result = Buffer::from(initial).bitwise_unary(0, 16, |a| !a);
+
+        assert_eq!(inplace_result, allocating_result);
+    }
+
+    #[test]
+    fn test_mutable_buffer_bitwise_inplace_random_equivalence() {
+        // Use a fixed seed for reproducible tests
+        let mut rng = rand::rngs::StdRng::from_seed([44u8; 32]);
+
+        let buffer_sizes = [8, 16, 32, 64];
+        let offsets = [0, 1, 3, 7, 8, 13];
+        let lengths = [1, 2, 7, 8, 9, 15, 16];
+
+        for &size in &buffer_sizes {
+            for _ in 0..5 { // Generate 5 random pairs per size
+                let initial_vec: Vec<u8> = (0..size).map(|_| rng.random::<u8>()).collect();
+                let other_vec: Vec<u8> = (0..size).map(|_| rng.random::<u8>()).collect();
+                let other = Buffer::from(other_vec);
+                let buffer_bits = initial_vec.len() * 8;
+
+                for &offset in &offsets {
+                    if offset >= buffer_bits { continue; }
+                    for &len in &lengths {
+                        if offset + len > buffer_bits { continue; }
+
+                        // Test binary AND in-place
+                        let mut mutable = MutableBuffer::from(initial_vec.clone());
+                        mutable.bitwise_binary_inplace(&other, offset, offset, len, |a, b| a & b);
+                        let inplace_result = Buffer::from(mutable);
+
+                        let allocating_result = Buffer::from(initial_vec.clone()).bitwise_binary(&other, offset, offset, len, |a, b| a & b);
+
+                        // Compare bit by bit
+                        for i in 0..len {
+                            let inplace_bit = crate::bit_util::get_bit(inplace_result.as_slice(), offset + i);
+                            let expected_bit = crate::bit_util::get_bit(allocating_result.as_slice(), i);
+                            assert_eq!(inplace_bit, expected_bit, "AND bit {} mismatch for offset={}, len={}", i, offset, len);
+                        }
+
+                        // Test binary OR in-place
+                        let mut mutable = MutableBuffer::from(initial_vec.clone());
+                        mutable.bitwise_binary_inplace(&other, offset, offset, len, |a, b| a | b);
+                        let inplace_result = Buffer::from(mutable);
+
+                        let allocating_result = Buffer::from(initial_vec.clone()).bitwise_binary(&other, offset, offset, len, |a, b| a | b);
+
+                        // Compare bit by bit
+                        for i in 0..len {
+                            let inplace_bit = crate::bit_util::get_bit(inplace_result.as_slice(), offset + i);
+                            let expected_bit = crate::bit_util::get_bit(allocating_result.as_slice(), i);
+                            assert_eq!(inplace_bit, expected_bit, "OR bit {} mismatch for offset={}, len={}", i, offset, len);
+                        }
+
+                        // Test unary NOT in-place
+                        let mut mutable = MutableBuffer::from(initial_vec.clone());
+                        mutable.bitwise_unary_inplace(offset, len, |a| !a);
+                        let inplace_result = Buffer::from(mutable);
+
+                        let allocating_result = Buffer::from(initial_vec.clone()).bitwise_unary(offset, len, |a| !a);
+
+                        // Compare bit by bit
+                        for i in 0..len {
+                            let inplace_bit = crate::bit_util::get_bit(inplace_result.as_slice(), offset + i);
+                            let expected_bit = crate::bit_util::get_bit(allocating_result.as_slice(), i);
+                            assert_eq!(inplace_bit, expected_bit, "NOT bit {} mismatch for offset={}, len={}", i, offset, len);
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    #[test]
+    fn test_mutable_buffer_bitwise_inplace_boundaries() {
+        // Use a fixed seed for reproducible tests
+        let mut rng = rand::rngs::StdRng::from_seed([47u8; 32]);
+
+        // Create large buffers: 1024 bytes
+        let data: Vec<u8> = (0..1024).map(|_| rng.random::<u8>()).collect();
+        let other_data: Vec<u8> = (0..1024).map(|_| rng.random::<u8>()).collect();
+        let other = Buffer::from(other_data);
+        let total_bits = data.len() * 8;
+
+        // Boundary configurations
+        let boundary_configs = vec![
+            (0, 0), // zero length
+            (0, total_bits), // full length
+            (1, total_bits - 1), // offset 1, to end
+            (7, total_bits - 7), // offset 7, crosses byte
+            (8, total_bits - 8), // offset at byte boundary
+            (total_bits - 1, 1), // last bit
+            (total_bits - 8, 8), // last byte
+            (total_bits - 9, 9), // last byte plus one
+        ];
+
+        for (offset, len) in boundary_configs {
+            if offset + len > total_bits {
+                continue; // skip invalid
+            }
+
+            // Test unary in-place
+            let mut mutable = MutableBuffer::from(data.clone());
+            mutable.bitwise_unary_inplace(offset, len, |a| !a);
+            let inplace_result = Buffer::from(mutable);
+
+            let allocating_result = Buffer::from(data.clone()).bitwise_unary(offset, len, |a| !a);
+
+            // Compare bit by bit
+            for i in 0..len {
+                let inplace_bit = crate::bit_util::get_bit(inplace_result.as_slice(), offset + i);
+                let expected_bit = crate::bit_util::get_bit(allocating_result.as_slice(), i);
+                assert_eq!(inplace_bit, expected_bit, "Unary bit {} mismatch for offset={}, len={}", i, offset, len);
+            }
+
+            // Test binary in-place AND
+            let mut mutable = MutableBuffer::from(data.clone());
+            mutable.bitwise_binary_inplace(&other, offset, offset, len, |a, b| a & b);
+            let inplace_result = Buffer::from(mutable);
+
+            let allocating_result = Buffer::from(data.clone()).bitwise_binary(&other, offset, offset, len, |a, b| a & b);
+
+            // Compare bit by bit
+            for i in 0..len {
+                let inplace_bit = crate::bit_util::get_bit(inplace_result.as_slice(), offset + i);
+                let expected_bit = crate::bit_util::get_bit(allocating_result.as_slice(), i);
+                assert_eq!(inplace_bit, expected_bit, "Binary AND bit {} mismatch for offset={}, len={}", i, offset, len);
+            }
+
+            // Test binary in-place OR
+            let mut mutable = MutableBuffer::from(data.clone());
+            mutable.bitwise_binary_inplace(&other, offset, offset, len, |a, b| a | b);
+            let inplace_result = Buffer::from(mutable);
+
+            let allocating_result = Buffer::from(data.clone()).bitwise_binary(&other, offset, offset, len, |a, b| a | b);
+
+            // Compare bit by bit
+            for i in 0..len {
+                let inplace_bit = crate::bit_util::get_bit(inplace_result.as_slice(), offset + i);
+                let expected_bit = crate::bit_util::get_bit(allocating_result.as_slice(), i);
+                assert_eq!(inplace_bit, expected_bit, "Binary OR bit {} mismatch for offset={}, len={}", i, offset, len);
+            }
+        }
+    }
 }
diff --git a/arrow-buffer/src/buffer/ops.rs b/arrow-buffer/src/buffer/ops.rs
index c69e5c6deb10..f0c591ab8dd8 100644
--- a/arrow-buffer/src/buffer/ops.rs
+++ b/arrow-buffer/src/buffer/ops.rs
@@ -60,73 +60,39 @@ where
 
 /// Apply a bitwise operation `op` to two inputs and return the result as a Buffer.
 /// The inputs are treated as bitmaps, meaning that offsets and length are specified in number of bits.
+#[deprecated(note = "use Buffer::bitwise_binary instead")]
 pub fn bitwise_bin_op_helper<F>(
     left: &Buffer,
     left_offset_in_bits: usize,
     right: &Buffer,
     right_offset_in_bits: usize,
     len_in_bits: usize,
-    mut op: F,
+    op: F,
 ) -> Buffer
 where
-    F: FnMut(u64, u64) -> u64,
+    F: Fn(u64, u64) -> u64 + Copy,
 {
-    let left_chunks = left.bit_chunks(left_offset_in_bits, len_in_bits);
-    let right_chunks = right.bit_chunks(right_offset_in_bits, len_in_bits);
-
-    let chunks = left_chunks
-        .iter()
-        .zip(right_chunks.iter())
-        .map(|(left, right)| op(left, right));
-    // Soundness: `BitChunks` is a `BitChunks` iterator which
-    // correctly reports its upper bound
-    let mut buffer = unsafe { MutableBuffer::from_trusted_len_iter(chunks) };
-
-    let remainder_bytes = ceil(left_chunks.remainder_len(), 8);
-    let rem = op(left_chunks.remainder_bits(), right_chunks.remainder_bits());
-    // we are counting its starting from the least significant bit, to to_le_bytes should be correct
-    let rem = &rem.to_le_bytes()[0..remainder_bytes];
-    buffer.extend_from_slice(rem);
-
-    buffer.into()
+    left.bitwise_binary(right, left_offset_in_bits, right_offset_in_bits, len_in_bits, op)
 }
 
 /// Apply a bitwise operation `op` to one input and return the result as a Buffer.
 /// The input is treated as a bitmap, meaning that offset and length are specified in number of bits.
+#[deprecated(note = "use Buffer::bitwise_unary instead")]
 pub fn bitwise_unary_op_helper<F>(
     left: &Buffer,
     offset_in_bits: usize,
     len_in_bits: usize,
-    mut op: F,
+    op: F,
 ) -> Buffer
 where
-    F: FnMut(u64) -> u64,
+    F: Fn(u64) -> u64 + Copy,
 {
-    // reserve capacity and set length so we can get a typed view of u64 chunks
-    let mut result =
-        MutableBuffer::new(ceil(len_in_bits, 8)).with_bitset(len_in_bits / 64 * 8, false);
-
-    let left_chunks = left.bit_chunks(offset_in_bits, len_in_bits);
-
-    let result_chunks = result.typed_data_mut::<u64>().iter_mut();
-
-    result_chunks
-        .zip(left_chunks.iter())
-        .for_each(|(res, left)| {
-            *res = op(left);
-        });
-
-    let remainder_bytes = ceil(left_chunks.remainder_len(), 8);
-    let rem = op(left_chunks.remainder_bits());
-    // we are counting its starting from the least significant bit, to to_le_bytes should be correct
-    let rem = &rem.to_le_bytes()[0..remainder_bytes];
-    result.extend_from_slice(rem);
-
-    result.into()
+    left.bitwise_unary(offset_in_bits, len_in_bits, op)
 }
 
 /// Apply a bitwise and to two inputs and return the result as a Buffer.
 /// The inputs are treated as bitmaps, meaning that offsets and length are specified in number of bits.
+#[deprecated(note = "use Buffer::bitwise_binary with |a, b| a & b or BooleanArray::binary instead")]
 pub fn buffer_bin_and(
     left: &Buffer,
     left_offset_in_bits: usize,
@@ -134,18 +100,12 @@ pub fn buffer_bin_and(
     right_offset_in_bits: usize,
     len_in_bits: usize,
 ) -> Buffer {
-    bitwise_bin_op_helper(
-        left,
-        left_offset_in_bits,
-        right,
-        right_offset_in_bits,
-        len_in_bits,
-        |a, b| a & b,
-    )
+    left.bitwise_binary(right, left_offset_in_bits, right_offset_in_bits, len_in_bits, |a, b| a & b)
 }
 
 /// Apply a bitwise or to two inputs and return the result as a Buffer.
 /// The inputs are treated as bitmaps, meaning that offsets and length are specified in number of bits.
+#[deprecated(note = "use Buffer::bitwise_binary with |a, b| a | b or BooleanArray::binary instead")]
 pub fn buffer_bin_or(
     left: &Buffer,
     left_offset_in_bits: usize,
@@ -153,18 +113,12 @@ pub fn buffer_bin_or(
     right_offset_in_bits: usize,
     len_in_bits: usize,
 ) -> Buffer {
-    bitwise_bin_op_helper(
-        left,
-        left_offset_in_bits,
-        right,
-        right_offset_in_bits,
-        len_in_bits,
-        |a, b| a | b,
-    )
+    left.bitwise_binary(right, left_offset_in_bits, right_offset_in_bits, len_in_bits, |a, b| a | b)
 }
 
 /// Apply a bitwise xor to two inputs and return the result as a Buffer.
 /// The inputs are treated as bitmaps, meaning that offsets and length are specified in number of bits.
+#[deprecated(note = "use Buffer::bitwise_binary with |a, b| a ^ b or BooleanArray::binary instead")]
 pub fn buffer_bin_xor(
     left: &Buffer,
     left_offset_in_bits: usize,
@@ -172,18 +126,12 @@ pub fn buffer_bin_xor(
     right_offset_in_bits: usize,
     len_in_bits: usize,
 ) -> Buffer {
-    bitwise_bin_op_helper(
-        left,
-        left_offset_in_bits,
-        right,
-        right_offset_in_bits,
-        len_in_bits,
-        |a, b| a ^ b,
-    )
+    left.bitwise_binary(right, left_offset_in_bits, right_offset_in_bits, len_in_bits, |a, b| a ^ b)
 }
 
 /// Apply a bitwise and_not to two inputs and return the result as a Buffer.
 /// The inputs are treated as bitmaps, meaning that offsets and length are specified in number of bits.
+#[deprecated(note = "use Buffer::bitwise_binary with |a, b| a & !b or BooleanArray::binary instead")]
 pub fn buffer_bin_and_not(
     left: &Buffer,
     left_offset_in_bits: usize,
@@ -191,18 +139,12 @@ pub fn buffer_bin_and_not(
     right_offset_in_bits: usize,
     len_in_bits: usize,
 ) -> Buffer {
-    bitwise_bin_op_helper(
-        left,
-        left_offset_in_bits,
-        right,
-        right_offset_in_bits,
-        len_in_bits,
-        |a, b| a & !b,
-    )
+    left.bitwise_binary(right, left_offset_in_bits, right_offset_in_bits, len_in_bits, |a, b| a & !b)
 }
 
 /// Apply a bitwise not to one input and return the result as a Buffer.
 /// The input is treated as a bitmap, meaning that offset and length are specified in number of bits.
+#[deprecated(note = "use Buffer::bitwise_unary with |a| !a or BooleanArray::unary instead")]
 pub fn buffer_unary_not(left: &Buffer, offset_in_bits: usize, len_in_bits: usize) -> Buffer {
-    bitwise_unary_op_helper(left, offset_in_bits, len_in_bits, |a| !a)
+    left.bitwise_unary(offset_in_bits, len_in_bits, |a| !a)
 }
diff --git a/arrow-select/src/nullif.rs b/arrow-select/src/nullif.rs
index 8e3cc7d56c71..cc7603f1ca32 100644
--- a/arrow-select/src/nullif.rs
+++ b/arrow-select/src/nullif.rs
@@ -17,11 +17,50 @@
 
 //! Implements the `nullif` function for Arrow arrays.
 
+/*
+ * NULLIF Implementation Contract
+ *
+ * For any ArrayData:
+ * len = data.len()            // logical elements
+ * offset = data.offset()      // logical starting index into buffers
+ *
+ * Validity bitmap (if present) is a Buffer B.
+ * Invariant:
+ *   Logical index i in [0, len) is valid iff get_bit(B, offset + i) == true.
+ *
+ * For the result of nullif:
+ * We will build a fresh ArrayData with offset = 0.
+ * For that result:
+ *   Logical index i is valid iff get_bit(result_validity, i) == true.
+ *   Values buffer is laid out so element 0 is first result value, etc.
+ *
+ * For nullif semantics:
+ * Let V(i) = left is valid at i
+ *     C(i) = condition "nullify at i" is true (depends on left, right, type)
+ * Then:
+ *   result_valid(i) = V(i) & !C(i)
+ *   result_value(i) = left_value(i)    // when result_valid(i) == true
+ *
+ * This contract is the law. All nullif implementations must follow it.
+ * See docs/arraydata_bitmap_layout_contract.md for full details.
+ */
+
 use arrow_array::{Array, ArrayRef, BooleanArray, make_array};
-use arrow_buffer::buffer::{bitwise_bin_op_helper, bitwise_unary_op_helper};
-use arrow_buffer::{BooleanBuffer, NullBuffer};
+use arrow_buffer::{BooleanBuffer, Buffer, NullBuffer};
+use arrow_data::{ArrayData, ArrayDataBuilder};
 use arrow_schema::{ArrowError, DataType};
 
+fn get_element_size(data_type: &DataType) -> usize {
+    match data_type {
+        DataType::Boolean => 1,
+        DataType::Int8 | DataType::UInt8 => 1,
+        DataType::Int16 | DataType::UInt16 => 2,
+        DataType::Int32 | DataType::UInt32 | DataType::Float32 => 4,
+        DataType::Int64 | DataType::UInt64 | DataType::Float64 => 8,
+        _ => panic!("unsupported"),
+    }
+}
+
 /// Returns a new array with the same values and the validity bit to false where
 /// the corresponding element of`right` is true.
 ///
@@ -55,65 +94,164 @@ pub fn nullif(left: &dyn Array, right: &BooleanArray) -> Result<ArrayRef, ArrowE
         return Ok(make_array(left_data));
     }
 
-    // left=0 (null)   right=null       output bitmap=null
-    // left=0          right=1          output bitmap=null
-    // left=1 (set)    right=null       output bitmap=set   (passthrough)
-    // left=1          right=1 & comp=true    output bitmap=null
-    // left=1          right=1 & comp=false   output bitmap=set
-    //
-    // Thus: result = left null bitmap & (!right_values | !right_bitmap)
-    //              OR left null bitmap & !(right_values & right_bitmap)
-
-    // Compute right_values & right_bitmap
-    let right = match right.nulls() {
+    let cond_mask = match right.nulls() {
         Some(nulls) => right.values() & nulls.inner(),
         None => right.values().clone(),
     };
-
-    // Compute left null bitmap & !right
-
-    let (combined, null_count) = match left_data.nulls() {
-        Some(left) => {
-            let mut valid_count = 0;
-            let b = bitwise_bin_op_helper(
-                left.buffer(),
-                left.offset(),
-                right.inner(),
-                right.offset(),
-                len,
-                |l, r| {
-                    let t = l & !r;
-                    valid_count += t.count_ones() as usize;
-                    t
-                },
-            );
-            (b, len - valid_count)
-        }
-        None => {
-            let mut null_count = 0;
-            let buffer = bitwise_unary_op_helper(right.inner(), right.offset(), len, |b| {
-                let t = !b;
-                null_count += t.count_zeros() as usize;
-                t
-            });
-            (buffer, null_count)
+    let cond_offset = cond_mask.offset();
+
+    let validity = if let Some(left_nulls) = left_data.nulls() {
+        let left_valid = left_nulls.inner().inner();
+        let left_bit_offset = left_nulls.inner().offset();
+        compute_nullif_validity(&left_valid, left_bit_offset, &cond_mask.inner(), cond_offset, len)
+    } else {
+        // left is all valid
+        let left_valid_len = (len + 7) / 8;
+        let mut left_valid_data = vec![255u8; left_valid_len];
+        if len % 8 != 0 {
+            let mask = (1u8 << (len % 8)) - 1;
+            left_valid_data[left_valid_len - 1] = mask;
         }
+        let left_valid = Buffer::from(left_valid_data);
+        let left_bit_offset = 0;
+        compute_nullif_validity(&left_valid, left_bit_offset, &cond_mask.inner(), cond_offset, len)
     };
 
-    let combined = BooleanBuffer::new(combined, 0, len);
-    // Safety:
-    // Counted nulls whilst computing
-    let nulls = unsafe { NullBuffer::new_unchecked(combined, null_count) };
-    let data = left_data.into_builder().nulls(Some(nulls));
+    let (null_buffer, _) = compute_null_buffer(&validity, len);
+    let nulls = NullBuffer::new(null_buffer);
+
+    let data_without_nulls = copy_array_data_with_offset_zero(&left_data)?;
+    let data = ArrayDataBuilder::new(left_data.data_type().clone())
+        .len(len)
+        .nulls(Some(nulls))
+        .offset(0)
+        .buffers(data_without_nulls.buffers().to_vec())
+        .child_data(data_without_nulls.child_data().to_vec())
+        .build()
+        .unwrap();
+
+    Ok(make_array(data))
+}
+
+/// Computes the null buffer and null count from the validity buffer.
+///
+/// The null buffer has bits set where validity is 1 (not null).
+/// The null count is computed as len - number of set bits in validity.
+fn compute_null_buffer(validity: &Buffer, len: usize) -> (BooleanBuffer, usize) {
+    let null_buffer = BooleanBuffer::new(validity.clone(), 0, len);
+    let null_count = len - validity.count_set_bits_offset(0, len);
+    (null_buffer, null_count)
+}
+
+/// Computes the NULLIF validity bitmap from left validity and condition mask.
+///
+/// For each logical index `i` in `0..len`:
+/// - `left_bit = bit(left_valid, left_offset + i)`
+/// - `cond_bit = bit(cond_mask, cond_offset + i)`
+/// - `result_bit = left_bit & !cond_bit`
+///
+/// This implements the core bitmap logic for NULLIF, isolating it from array-level concerns.
+fn compute_nullif_validity(
+    left_valid: &Buffer,
+    left_offset: usize,
+    cond_mask: &Buffer,
+    cond_offset: usize,
+    len: usize,
+) -> Buffer {
+    left_valid.bitwise_binary(cond_mask, left_offset, cond_offset, len, |l, c| l & !c)
+}
+
+/// Creates a new ArrayData with offset=0 by slicing the buffers from the given ArrayData.
+/// This ensures the result ArrayData has offset=0 as per the nullif contract.
+fn copy_array_data_with_offset_zero(left_data: &ArrayData) -> Result<ArrayData, ArrowError> {
+    let len = left_data.len();
+    let offset = left_data.offset();
+    let data_type = left_data.data_type();
+    let mut buffers = Vec::new();
+    let mut child_data = Vec::new();
+
+    match data_type {
+        DataType::Boolean => {
+            let values = left_data.buffers()[0].bitwise_unary(offset, len, |b| b);
+            buffers.push(values);
+        }
+        DataType::Int8 | DataType::UInt8 | DataType::Int16 | DataType::UInt16 |
+        DataType::Int32 | DataType::UInt32 | DataType::Float32 |
+        DataType::Int64 | DataType::UInt64 | DataType::Float64 => {
+            let element_size = get_element_size(data_type);
+            let start = offset * element_size;
+            let end = start + len * element_size;
+            let values = Buffer::from(&left_data.buffers()[0].as_slice()[start..end]);
+            buffers.push(values);
+        }
+        DataType::Utf8 => {
+            let offsets_buf = &left_data.buffers()[0];
+            let data_buf = &left_data.buffers()[1];
+            let offsets_start = offset;
+            let offsets_end = offset + len + 1;
+            let offsets_slice = &offsets_buf.as_slice()[offsets_start * 4..offsets_end * 4];
+            let base_offset = i32::from_le_bytes(offsets_slice[0..4].try_into().unwrap()) as usize;
+            let data_end = i32::from_le_bytes(offsets_slice[offsets_slice.len() - 4..].try_into().unwrap()) as usize;
+            let data_slice = &data_buf.as_slice()[base_offset..data_end];
+            let mut new_offsets = Vec::with_capacity(len + 1);
+            for i in 0..=len {
+                let old_offset = i32::from_le_bytes(offsets_slice[i * 4..(i + 1) * 4].try_into().unwrap()) as usize;
+                new_offsets.push((old_offset - base_offset) as i32);
+            }
+            let new_offsets_buf = Buffer::from(new_offsets.into_iter().flat_map(|x| x.to_le_bytes()).collect::<Vec<u8>>());
+            buffers.push(new_offsets_buf);
+            buffers.push(Buffer::from(data_slice));
+        }
+        DataType::LargeUtf8 => {
+            let offsets_buf = &left_data.buffers()[0];
+            let data_buf = &left_data.buffers()[1];
+            let offsets_start = offset;
+            let offsets_end = offset + len + 1;
+            let offsets_slice = &offsets_buf.as_slice()[offsets_start * 8..offsets_end * 8];
+            let base_offset = i64::from_le_bytes(offsets_slice[0..8].try_into().unwrap()) as usize;
+            let data_end = i64::from_le_bytes(offsets_slice[offsets_slice.len() - 8..].try_into().unwrap()) as usize;
+            let data_slice = &data_buf.as_slice()[base_offset..data_end];
+            let mut new_offsets = Vec::with_capacity(len + 1);
+            for i in 0..=len {
+                let old_offset = i64::from_le_bytes(offsets_slice[i * 8..(i + 1) * 8].try_into().unwrap()) as usize;
+                new_offsets.push((old_offset - base_offset) as i64);
+            }
+            let new_offsets_buf = Buffer::from(new_offsets.into_iter().flat_map(|x| x.to_le_bytes()).collect::<Vec<u8>>());
+            buffers.push(new_offsets_buf);
+            buffers.push(Buffer::from(data_slice));
+        }
+        DataType::Struct(_) => {
+            // No buffers for struct
+            child_data = left_data.child_data().iter()
+                .map(|child| copy_array_data_with_offset_zero(child))
+                .collect::<Result<Vec<_>, _>>()?;
+        }
+        _ => {
+            // For other types, copy buffers as is and keep offset (for now, to avoid breaking)
+            // TODO: implement slicing for other types like List, Binary, etc.
+            buffers = left_data.buffers().to_vec();
+            child_data = left_data.child_data().to_vec();
+            return Ok(ArrayDataBuilder::new(data_type.clone())
+                .len(len)
+                .offset(offset)
+                .buffers(buffers)
+                .child_data(child_data)
+                .build()?);
+        }
+    }
 
-    // SAFETY:
-    // Only altered null mask
-    Ok(make_array(unsafe { data.build_unchecked() }))
+    Ok(ArrayDataBuilder::new(data_type.clone())
+        .len(len)
+        .offset(0)
+        .buffers(buffers)
+        .child_data(child_data)
+        .build()?)
 }
 
 #[cfg(test)]
 mod tests {
     use super::*;
+    use arrow_buffer::bit_util::get_bit;
     use arrow_array::builder::{BooleanBuilder, Int32Builder, StructBuilder};
     use arrow_array::cast::AsArray;
     use arrow_array::types::Int32Type;
@@ -526,4 +664,230 @@ mod tests {
             }
         }
     }
+
+    #[test]
+    fn test_nullif_with_offsets_and_nulls() {
+        // Build arrays using builder to ensure correct null count
+        let array = Int32Array::from(vec![Some(15), None, Some(8), Some(1), Some(9)]);
+
+        let condition = BooleanArray::from(vec![Some(false), None, Some(true), Some(false), None]);
+
+        // Test on full arrays
+        let result_full = nullif(&array, &condition).unwrap();
+        let expected_full = Int32Array::from(vec![
+            Some(15), // false -> keep
+            None,     // null condition -> keep (was null)
+            None,     // true -> null
+            Some(1),  // false -> keep
+            Some(9),  // null condition -> keep
+        ]);
+        assert_eq!(result_full.as_primitive::<Int32Type>(), &expected_full);
+
+        // Test on sliced arrays
+        let array_sliced = array.slice(1, 3); // None, Some(8), Some(1)
+        let condition_sliced = condition.slice(1, 3); // None, Some(true), Some(false)
+        let result_sliced = nullif(&array_sliced, &condition_sliced).unwrap();
+
+        let expected_sliced = Int32Array::from(vec![
+            None,    // null condition -> keep (was null)
+            None,    // true -> null
+            Some(1), // false -> keep
+        ]);
+        assert_eq!(result_sliced.as_primitive::<Int32Type>(), &expected_sliced);
+
+        // Test with different condition
+        let array2 = Int32Array::from(vec![Some(10), None, Some(20), Some(30), None]);
+
+        let condition2 = BooleanArray::from(vec![Some(false), Some(true), Some(false), Some(true), Some(false)]);
+
+        let result = nullif(&array2, &condition2).unwrap();
+        let expected = Int32Array::from(vec![
+            Some(10), // false -> keep
+            None,     // true -> null (was null)
+            Some(20), // false -> keep
+            None,     // true -> null
+            None,     // false -> keep (was null)
+        ]);
+        assert_eq!(result.as_primitive::<Int32Type>(), &expected);
+    }
+
+    #[test]
+    fn test_nullif_null_count_and_offsets_regressions() {
+        // Construct a small Int32Array with nulls
+        let values = Int32Array::from(vec![Some(1), None, Some(2), Some(1), None, Some(3)]);
+        // Construct a "condition" array (equals values for nullif)
+        let equals = BooleanArray::from(vec![Some(true), Some(false), None, Some(true), Some(false), None]);
+
+        // Helper to build expected Vec<Option<i32>>
+        fn build_expected(values: &[Option<i32>], equals: &[Option<bool>]) -> Vec<Option<i32>> {
+            values.iter().zip(equals.iter()).map(|(&v, &e)| {
+                if v.is_none() {
+                    None
+                } else if e == Some(true) {
+                    None
+                } else {
+                    v
+                }
+            }).collect()
+        }
+
+        // Test full arrays
+        let result_full = nullif(&values, &equals).unwrap();
+        assert_eq!(result_full.len(), values.len());
+        let actual_nulls: Vec<Option<i32>> = result_full.as_primitive::<Int32Type>().iter().collect();
+        let expected_full = build_expected(&[Some(1), None, Some(2), Some(1), None, Some(3)], &[Some(true), Some(false), None, Some(true), Some(false), None]);
+        assert_eq!(actual_nulls, expected_full);
+        assert_eq!(result_full.null_count(), actual_nulls.iter().filter(|x| x.is_none()).count());
+
+        // Test sliced arrays: values.slice(1, 4) and equals.slice(1, 4)
+        // values[1..5]: None, Some(2), Some(1), None
+        // equals[1..5]: Some(false), None, Some(true), Some(false)
+        let values_slice1 = values.slice(1, 4);
+        let equals_slice1 = equals.slice(1, 4);
+        let result_slice1 = nullif(&values_slice1, &equals_slice1).unwrap();
+        assert_eq!(result_slice1.len(), 4);
+        let actual_slice1: Vec<Option<i32>> = result_slice1.as_primitive::<Int32Type>().iter().collect();
+        let expected_slice1 = build_expected(&[None, Some(2), Some(1), None], &[Some(false), None, Some(true), Some(false)]);
+        assert_eq!(actual_slice1, expected_slice1);
+        assert_eq!(result_slice1.null_count(), actual_slice1.iter().filter(|x| x.is_none()).count());
+
+        // Test another slice: values.slice(2, 3) and equals.slice(2, 3)
+        // values[2..5]: Some(2), Some(1), None
+        // equals[2..5]: None, Some(true), Some(false)
+        let values_slice2 = values.slice(2, 3);
+        let equals_slice2 = equals.slice(2, 3);
+        let result_slice2 = nullif(&values_slice2, &equals_slice2).unwrap();
+        assert_eq!(result_slice2.len(), 3);
+        let actual_slice2: Vec<Option<i32>> = result_slice2.as_primitive::<Int32Type>().iter().collect();
+        let expected_slice2 = build_expected(&[Some(2), Some(1), None], &[None, Some(true), Some(false)]);
+        assert_eq!(actual_slice2, expected_slice2);
+        assert_eq!(result_slice2.null_count(), actual_slice2.iter().filter(|x| x.is_none()).count());
+    }
+
+    #[test]
+    fn test_nullif_boolean_offsets_regression() {
+        // Use BooleanArray to reproduce BooleanBuffer slicing issues
+        let values = BooleanArray::from(vec![Some(true), None, Some(false), Some(true), None, Some(false), Some(true), Some(false)]);
+        let equals = BooleanArray::from(vec![Some(false), Some(true), None, Some(true), Some(false), None, Some(true), Some(false)]);
+
+        // Helper to build expected Vec<Option<bool>>
+        fn build_expected_bool(values: &[Option<bool>], equals: &[Option<bool>]) -> Vec<Option<bool>> {
+            values.iter().zip(equals.iter()).map(|(&v, &e)| {
+                if v.is_none() {
+                    None
+                } else if e == Some(true) {
+                    None
+                } else {
+                    v
+                }
+            }).collect()
+        }
+
+        // Test slices that cross byte boundaries (e.g., offset=1,len=5; offset=3,len=5; offset=7,len=1)
+        // Slice 1: offset=1, len=5 (values[1..6]: None, Some(false), Some(true), None, Some(false))
+        // equals[1..6]: Some(true), None, Some(true), Some(false), None
+        let values_slice1 = values.slice(1, 5);
+        let equals_slice1 = equals.slice(1, 5);
+        let result_slice1 = nullif(&values_slice1, &equals_slice1).unwrap();
+        assert_eq!(result_slice1.len(), 5);
+        let actual_slice1: Vec<Option<bool>> = result_slice1.as_boolean().iter().collect();
+        let expected_slice1 = build_expected_bool(&[None, Some(false), Some(true), None, Some(false)], &[Some(true), None, Some(true), Some(false), None]);
+        assert_eq!(actual_slice1, expected_slice1);
+        assert_eq!(result_slice1.null_count(), actual_slice1.iter().filter(|x| x.is_none()).count());
+
+        // Slice 2: offset=3, len=5 (values[3..8]: Some(true), None, Some(false), Some(true), Some(false))
+        // equals[3..8]: Some(true), Some(false), None, Some(true), Some(false)
+        let values_slice2 = values.slice(3, 5);
+        let equals_slice2 = equals.slice(3, 5);
+        let result_slice2 = nullif(&values_slice2, &equals_slice2).unwrap();
+        assert_eq!(result_slice2.len(), 5);
+        let actual_slice2: Vec<Option<bool>> = result_slice2.as_boolean().iter().collect();
+        let expected_slice2 = build_expected_bool(&[Some(true), None, Some(false), Some(true), Some(false)], &[Some(true), Some(false), None, Some(true), Some(false)]);
+        assert_eq!(actual_slice2, expected_slice2);
+        assert_eq!(result_slice2.null_count(), actual_slice2.iter().filter(|x| x.is_none()).count());
+
+        // Slice 3: offset=7, len=1 (values[7..8]: Some(false))
+        // equals[7..8]: Some(false)
+        let values_slice3 = values.slice(7, 1);
+        let equals_slice3 = equals.slice(7, 1);
+        let result_slice3 = nullif(&values_slice3, &equals_slice3).unwrap();
+        assert_eq!(result_slice3.len(), 1);
+        let actual_slice3: Vec<Option<bool>> = result_slice3.as_boolean().iter().collect();
+        let expected_slice3 = build_expected_bool(&[Some(false)], &[Some(false)]);
+        assert_eq!(actual_slice3, expected_slice3);
+        assert_eq!(result_slice3.null_count(), actual_slice3.iter().filter(|x| x.is_none()).count());
+    }
+
+    #[test]
+    fn test_nullif_all_null_and_empty() {
+        // Test all-null input
+        let all_null = Int32Array::from(vec![None, None, None]);
+        let condition = BooleanArray::from(vec![Some(true), Some(false), None]);
+        let result = nullif(&all_null, &condition).unwrap();
+        assert_eq!(result.len(), 3);
+        assert_eq!(result.null_count(), 3);
+        let actual: Vec<Option<i32>> = result.as_primitive::<Int32Type>().iter().collect();
+        assert_eq!(actual, vec![None, None, None]);
+
+        // Test no-null input
+        let no_null = Int32Array::from(vec![Some(1), Some(2), Some(3)]);
+        let condition = BooleanArray::from(vec![Some(false), Some(true), Some(false)]);
+        let result = nullif(&no_null, &condition).unwrap();
+        assert_eq!(result.len(), 3);
+        assert_eq!(result.null_count(), 1);
+        let actual: Vec<Option<i32>> = result.as_primitive::<Int32Type>().iter().collect();
+        assert_eq!(actual, vec![Some(1), None, Some(3)]);
+
+        // Test empty arrays
+        let empty_values = Int32Array::from(Vec::<Option<i32>>::new());
+        let empty_condition = BooleanArray::from(Vec::<Option<bool>>::new());
+        let result = nullif(&empty_values, &empty_condition).unwrap();
+        assert_eq!(result.len(), 0);
+        assert_eq!(result.null_count(), 0);
+    }
+
+    #[test]
+    fn test_compute_nullif_validity_basic_and_offsets() {
+        // left:  0b11110000, 0b00001111
+        // cond:  0b10101010, 0b01010101
+        let left = Buffer::from(vec![0b11110000u8, 0b00001111u8]);
+        let cond = Buffer::from(vec![0b10101010u8, 0b01010101u8]);
+
+        // Cases: (left_offset, cond_offset, len, expected_bits as Vec<bool>)
+        let cases = vec![
+            // No offset, 8 bits
+            (0, 0, 8, vec![
+                // left bits: 0 0 0 0 1 1 1 1
+                // cond bits: 0 1 0 1 0 1 0 1
+                // result: 0&!0=0, 0&!1=0, 0&!0=0, 0&!1=0, 1&!0=1, 1&!1=0, 1&!0=1, 1&!1=0
+                // => 0,0,0,0,1,0,1,0
+                false, false, false, false, true, false, true, false
+            ]),
+            // Offsets that make (offset+len)%8==0
+            (1, 1, 7, vec![
+                // left_offset=1, left bits 1-7: 0,0,0,1,1,1,1
+                // cond_offset=1, cond bits 1-7: 1,0,1,0,1,0,1
+                // result: 0&!1=0, 0&!0=0, 0&!1=0, 1&!0=1, 1&!1=0, 1&!0=1, 1&!1=0
+                // => 0,0,0,1,0,1,0
+                false, false, false, true, false, true, false
+            ]),
+            // A cross-byte case
+            (3, 3, 5, vec![
+                // left_offset=3, left bits 3-7: 0,1,1,1,1
+                // cond_offset=3, cond bits 3-7: 1,0,1,0,1
+                // result: 0&!1=0, 1&!0=1, 1&!1=0, 1&!0=1, 1&!1=0
+                // => 0,1,0,1,0
+                false, true, false, true, false
+            ]),
+        ];
+
+        for (loff, coff, len, expected) in cases {
+            let mask = compute_nullif_validity(&left, loff, &cond, coff, len);
+            // Check each bit in the resulting Buffer against expected
+            for i in 0..len {
+                let bit = get_bit(mask.as_slice(), i);
+                assert_eq!(bit, expected[i], "mismatch at bit {} for offsets ({}, {})", i, loff, coff);
+            }
+        }
+    }
 }
diff --git a/arrow/benches/buffer_bit_ops.rs b/arrow/benches/buffer_bit_ops.rs
index c569224b0f9b..2d0cca0e601d 100644
--- a/arrow/benches/buffer_bit_ops.rs
+++ b/arrow/benches/buffer_bit_ops.rs
@@ -22,7 +22,7 @@ use criterion::{Criterion, Throughput};
 
 extern crate arrow;
 
-use arrow::buffer::{Buffer, MutableBuffer, buffer_bin_and, buffer_bin_or, buffer_unary_not};
+use arrow::buffer::{Buffer, MutableBuffer};
 use std::hint;
 
 ///  Helper function to create arrays
@@ -37,15 +37,15 @@ fn create_buffer(size: usize) -> Buffer {
 }
 
 fn bench_buffer_and(left: &Buffer, right: &Buffer) {
-    hint::black_box(buffer_bin_and(left, 0, right, 0, left.len() * 8));
+    hint::black_box(left.bitwise_binary(right, 0, 0, left.len() * 8, |a, b| a & b));
 }
 
 fn bench_buffer_or(left: &Buffer, right: &Buffer) {
-    hint::black_box(buffer_bin_or(left, 0, right, 0, left.len() * 8));
+    hint::black_box(left.bitwise_binary(right, 0, 0, left.len() * 8, |a, b| a | b));
 }
 
 fn bench_buffer_not(buffer: &Buffer) {
-    hint::black_box(buffer_unary_not(buffer, 0, buffer.len() * 8));
+    hint::black_box(buffer.bitwise_unary(0, buffer.len() * 8, |a| !a));
 }
 
 fn bench_buffer_and_with_offsets(
@@ -55,7 +55,7 @@ fn bench_buffer_and_with_offsets(
     right_offset: usize,
     len: usize,
 ) {
-    hint::black_box(buffer_bin_and(left, left_offset, right, right_offset, len));
+    hint::black_box(left.bitwise_binary(right, left_offset, right_offset, len, |a, b| a & b));
 }
 
 fn bench_buffer_or_with_offsets(
@@ -65,18 +65,33 @@ fn bench_buffer_or_with_offsets(
     right_offset: usize,
     len: usize,
 ) {
-    hint::black_box(buffer_bin_or(left, left_offset, right, right_offset, len));
+    hint::black_box(left.bitwise_binary(right, left_offset, right_offset, len, |a, b| a | b));
 }
 
 fn bench_buffer_not_with_offsets(buffer: &Buffer, offset: usize, len: usize) {
-    hint::black_box(buffer_unary_not(buffer, offset, len));
+    hint::black_box(buffer.bitwise_unary(offset, len, |a| !a));
+}
+
+fn bench_mutable_buffer_and(left: &mut MutableBuffer, right: &Buffer) {
+    hint::black_box(left.bitwise_binary_inplace(right, 0, 0, left.len() * 8, |a, b| a & b));
+}
+
+fn bench_mutable_buffer_or(left: &mut MutableBuffer, right: &Buffer) {
+    hint::black_box(left.bitwise_binary_inplace(right, 0, 0, left.len() * 8, |a, b| a | b));
+}
+
+fn bench_mutable_buffer_not(buffer: &mut MutableBuffer) {
+    hint::black_box(buffer.bitwise_unary_inplace(0, buffer.len() * 8, |a| !a));
 }
 
 fn bit_ops_benchmark(c: &mut Criterion) {
     let left = create_buffer(512 * 10);
     let right = create_buffer(512 * 10);
+    let mut mutable_left = MutableBuffer::from(left.as_slice().to_vec());
+    let mutable_right = Buffer::from(right.as_slice().to_vec());
 
-    c.benchmark_group("buffer_binary_ops")
+    // Allocating benchmarks
+    c.benchmark_group("buffer_bitwise_alloc_binary_ops")
         .throughput(Throughput::Bytes(3 * left.len() as u64))
         .bench_function("and", |b| b.iter(|| bench_buffer_and(&left, &right)))
         .bench_function("or", |b| b.iter(|| bench_buffer_or(&left, &right)))
@@ -87,12 +102,22 @@ fn bit_ops_benchmark(c: &mut Criterion) {
             b.iter(|| bench_buffer_or_with_offsets(&left, 1, &right, 2, left.len() * 8 - 5))
         });
 
-    c.benchmark_group("buffer_unary_ops")
+    c.benchmark_group("buffer_bitwise_alloc_unary_ops")
         .throughput(Throughput::Bytes(2 * left.len() as u64))
         .bench_function("not", |b| b.iter(|| bench_buffer_not(&left)))
         .bench_function("not_with_offset", |b| {
             b.iter(|| bench_buffer_not_with_offsets(&left, 1, left.len() * 8 - 5))
         });
+
+    // In-place benchmarks
+    c.benchmark_group("buffer_bitwise_inplace_binary_ops")
+        .throughput(Throughput::Bytes(2 * mutable_left.len() as u64))
+        .bench_function("and", |b| b.iter(|| bench_mutable_buffer_and(&mut mutable_left, &mutable_right)))
+        .bench_function("or", |b| b.iter(|| bench_mutable_buffer_or(&mut mutable_left, &mutable_right)));
+
+    c.benchmark_group("buffer_bitwise_inplace_unary_ops")
+        .throughput(Throughput::Bytes(mutable_left.len() as u64))
+        .bench_function("not", |b| b.iter(|| bench_mutable_buffer_not(&mut mutable_left)));
 }
 
 criterion_group!(benches, bit_ops_benchmark);
diff --git a/docs/arraydata_bitmap_layout_contract.md b/docs/arraydata_bitmap_layout_contract.md
new file mode 100644
index 000000000000..73d401878414
--- /dev/null
+++ b/docs/arraydata_bitmap_layout_contract.md
@@ -0,0 +1,76 @@
+# ArrayData + Bitmap Layout Contract
+
+This document specifies the invariants for `ArrayData`, offsets, and bitmap buffers (validity and condition bitmaps) in Arrow-rs. These rules are derived from the actual implementation and must be treated as ground truth for bitwise and null-handling operations like `nullif`.
+
+## 1. Basics
+
+- **Bit Numbering**: Bits are numbered starting from 0 as the least significant bit (LSB) of byte 0, then bit 1 as the next LSB, and so on, across bytes in little-endian order.
+- **Validity Bitmap Invariant**:
+  > For an `ArrayData` with `len = L`, `offset = O` (in elements), and validity bitmap buffer `B`:
+  > Logical index `i` in `[0, L)` is valid iff `get_bit(B, O + i) == true`.
+- **Null Count**: If `null_count` is set, it MUST equal the count of `false` bits in the validity bitmap over the logical range `[0, L)`.
+
+## 2. Slicing
+
+- **Slice Operation**: `array.slice(offset', len')` produces a new `ArrayData` with:
+  - `len = len'`,
+  - `offset = O + offset'`,
+  - Shared underlying buffers (no copying).
+- **Sliced Validity Invariant**:
+  > For the sliced `ArrayData` with `len = L'`, `offset = O'`, and the same validity bitmap buffer `B`:
+  > Logical index `i` in `[0, L')` is valid iff `get_bit(B, O' + i) == true`.
+
+## 3. Constructing New ArrayData with a Fresh Validity Buffer
+
+- **Case 1: Offset = 0 and New Validity Bitmap**:
+  - Bit `i` in the validity bitmap `B'` corresponds directly to logical index `i` for `i` in `[0, L)`.
+  - `null_count` MUST equal the number of `false` bits in `B'` over `[0, L)`, or be left unset for Arrow to compute.
+- **Case 2: Reusing Existing Null Buffer with Offset**:
+  - When reusing a `NullBuffer` from an existing array, the `offset` in the new `ArrayData` adjusts the logical-to-physical bit mapping as per the basic invariant.
+  - Ensure the reused buffer's bits align with the new `len` and `offset`; `null_count` must reflect the new logical range.
+
+## 4. `nullif`-Specific Contract
+
+- **Condition Bitmap**: A condition bitmap `C` indicates elements to nullify: logical index `i` should be nulled if `C[i] == true`.
+- **Validity Computation Rule**:
+  > Given left validity `V` and condition `C` (both in logical index space):
+  > `result_valid(i) = V(i) & !C(i)`.
+- **Buffer Mapping**:
+  - For left array with `offset_left`, bit index for `V(i)` is `offset_left + i`.
+  - For condition array with `offset_cond`, bit index for `C(i)` is `offset_cond + i`.
+  - The result validity bitmap should be constructed with `offset = 0` and bits computed as above.
+
+## 5. Do/Don't Guidelines
+
+- **DO**: Centralize bitmap math in helpers that take `(buffer, bit_offset, len)` and return a new bitmap with `offset = 0`.
+- **DO**: Ensure `null_count` matches the validity bitmap or leave it unset for automatic computation.
+- **DON'T**: Manually slice buffers and add offsets (avoid double-compensation); use the slicing semantics instead.
+
+## 6. Nullif Implementation Contract
+
+For any ArrayData:
+
+len = data.len()            // logical elements
+offset = data.offset()      // logical starting index into buffers
+
+Validity bitmap (if present) is a Buffer B.
+
+Invariant:
+  Logical index i in [0, len) is valid iff get_bit(B, offset + i) == true.
+
+For the result of nullif:
+
+We will build a fresh ArrayData with offset = 0.
+
+For that result:
+  Logical index i is valid iff get_bit(result_validity, i) == true.
+  Values buffer is laid out so element 0 is first result value, etc.
+
+For nullif semantics:
+
+Let V(i) = left is valid at i
+    C(i) = condition "nullify at i" is true (depends on left, right, type)
+
+Then:
+  result_valid(i) = V(i) & !C(i)
+  result_value(i) = left_value(i)    // when result_valid(i) == true
\ No newline at end of file