diff --git a/performance/set_difference.test b/performance/set_difference.test new file mode 100644 index 000000000..fa1521d8e --- /dev/null +++ b/performance/set_difference.test @@ -0,0 +1,45 @@ +PREAMBLE = \ + """ + #include + #include + #include + """ + +INITIALIZE = \ + """ + thrust::host_vector<$InputType> h_a = unittest::random_integers<$InputType>($InputSize); + thrust::host_vector<$InputType> h_b = unittest::random_integers<$InputType>($InputSize); + thrust::sort(h_a.begin(), h_a.end()); + thrust::sort(h_b.begin(), h_b.end()); + + thrust::host_vector<$InputType> h_result(h_a.size()); + thrust::host_vector<$InputType>::iterator new_end = + thrust::set_difference(h_a.begin(), h_a.end(), h_b.begin(), h_b.end(), h_result.begin()); + h_result.resize(new_end - h_result.begin()); + + thrust::device_vector<$InputType> d_a = h_a, d_b = h_b; + + thrust::device_vector<$InputType> d_result(h_result.size()); + thrust::set_difference(d_a.begin(), d_a.end(), d_b.begin(), d_b.end(), d_result.begin()); + + ASSERT_EQUAL(h_result, d_result); + """ + +TIME = \ + """ + thrust::set_difference(d_a.begin(), d_a.end(), d_b.begin(), d_b.end(), d_result.begin()); + """ + +FINALIZE = \ + """ + RECORD_TIME(); + RECORD_BANDWIDTH((2 * double($InputSize) + d_result.size()) * sizeof($InputType)); + RECORD_SORTING_RATE(2 * double($InputSize)) + """ + + +InputTypes = ['char', 'short', 'int', 'long', 'float', 'double'] +InputSizes = [2**N for N in range(10, 25)] + +TestVariables = [('InputType', InputTypes), ('InputSize', InputSizes)] + diff --git a/testing/set_difference.cu b/testing/set_difference.cu new file mode 100644 index 000000000..2b5b565ce --- /dev/null +++ b/testing/set_difference.cu @@ -0,0 +1,188 @@ +#include +#include +#include +#include + +template +void TestSetDifferenceSimple(void) +{ + typedef typename Vector::iterator Iterator; + + Vector a(4), b(5); + + a[0] = 0; a[1] = 2; a[2] = 4; a[3] = 5; + b[0] = 0; b[1] = 3; b[2] = 3; b[3] = 4; b[4] = 6; + + Vector ref(2); + ref[0] = 2; ref[1] = 5; + + Vector result(2); + + Iterator end = thrust::set_difference(a.begin(), a.end(), + b.begin(), b.end(), + result.begin()); + + ASSERT_EQUAL_QUIET(result.end(), end); + ASSERT_EQUAL(ref, result); +} +DECLARE_VECTOR_UNITTEST(TestSetDifferenceSimple); + + +template +void TestSetDifference(const size_t n) +{ + thrust::host_vector temp = unittest::random_integers(2 * n); + thrust::host_vector h_a(temp.begin(), temp.begin() + n); + thrust::host_vector h_b(temp.begin() + n, temp.end()); + + thrust::sort(h_a.begin(), h_a.end()); + thrust::sort(h_b.begin(), h_b.end()); + + thrust::device_vector d_a = h_a; + thrust::device_vector d_b = h_b; + + thrust::host_vector h_result(n); + thrust::device_vector d_result(n); + + typename thrust::host_vector::iterator h_end; + typename thrust::device_vector::iterator d_end; + + h_end = thrust::set_difference(h_a.begin(), h_a.end(), + h_b.begin(), h_b.end(), + h_result.begin()); + h_result.resize(h_end - h_result.begin()); + + d_end = thrust::set_difference(d_a.begin(), d_a.end(), + d_b.begin(), d_b.end(), + d_result.begin()); + + d_result.resize(d_end - d_result.begin()); + + ASSERT_EQUAL(h_result, d_result); +} +DECLARE_VARIABLE_UNITTEST(TestSetDifference); + + +template +void TestSetDifferenceEquivalentRanges(const size_t n) +{ + thrust::host_vector temp = unittest::random_integers(n); + thrust::host_vector h_a = temp; thrust::sort(h_a.begin(), h_a.end()); + thrust::host_vector h_b = h_a; + + thrust::device_vector d_a = h_a; + thrust::device_vector d_b = h_b; + + thrust::host_vector h_result(n); + thrust::device_vector d_result(n); + + typename thrust::host_vector::iterator h_end; + typename thrust::device_vector::iterator d_end; + + h_end = thrust::set_difference(h_a.begin(), h_a.end(), + h_b.begin(), h_b.end(), + h_result.begin()); + h_result.resize(h_end - h_result.begin()); + + d_end = thrust::set_difference(d_a.begin(), d_a.end(), + d_b.begin(), d_b.end(), + d_result.begin()); + + d_result.resize(d_end - d_result.begin()); + + ASSERT_EQUAL(h_result, d_result); +} +DECLARE_VARIABLE_UNITTEST(TestSetDifferenceEquivalentRanges); + + +template +void TestSetDifferenceMultiset(const size_t n) +{ + thrust::host_vector temp = unittest::random_integers(2 * n); + + // restrict elements to [min,13) + for(typename thrust::host_vector::iterator i = temp.begin(); + i != temp.end(); + ++i) + { + int temp = static_cast(*i); + temp %= 13; + *i = temp; + } + + thrust::host_vector h_a(temp.begin(), temp.begin() + n); + thrust::host_vector h_b(temp.begin() + n, temp.end()); + + thrust::sort(h_a.begin(), h_a.end()); + thrust::sort(h_b.begin(), h_b.end()); + + thrust::device_vector d_a = h_a; + thrust::device_vector d_b = h_b; + + thrust::host_vector h_result(n); + thrust::device_vector d_result(n); + + typename thrust::host_vector::iterator h_end; + typename thrust::device_vector::iterator d_end; + + h_end = thrust::set_difference(h_a.begin(), h_a.end(), + h_b.begin(), h_b.end(), + h_result.begin()); + h_result.resize(h_end - h_result.begin()); + + d_end = thrust::set_difference(d_a.begin(), d_a.end(), + d_b.begin(), d_b.end(), + d_result.begin()); + + d_result.resize(d_end - d_result.begin()); + + ASSERT_EQUAL(h_result, d_result); +} +DECLARE_VARIABLE_UNITTEST(TestSetDifferenceMultiset); + + +template + void TestSetDifferenceKeyValue(size_t n) +{ + typedef key_value T; + + thrust::host_vector h_keys_a = unittest::random_integers(n); + thrust::host_vector h_values_a = unittest::random_integers(n); + + thrust::host_vector h_keys_b = unittest::random_integers(n); + thrust::host_vector h_values_b = unittest::random_integers(n); + + thrust::host_vector h_a(n), h_b(n); + for(size_t i = 0; i < n; ++i) + { + h_a[i] = T(h_keys_a[i], h_values_a[i]); + h_b[i] = T(h_keys_b[i], h_values_b[i]); + } + + thrust::stable_sort(h_a.begin(), h_a.end()); + thrust::stable_sort(h_b.begin(), h_b.end()); + + thrust::device_vector d_a = h_a; + thrust::device_vector d_b = h_b; + + thrust::host_vector h_result(n); + thrust::device_vector d_result(n); + + typename thrust::host_vector::iterator h_end; + typename thrust::device_vector::iterator d_end; + + h_end = thrust::set_difference(h_a.begin(), h_a.end(), + h_b.begin(), h_b.end(), + h_result.begin()); + h_result.resize(h_end - h_result.begin()); + + d_end = thrust::set_difference(d_a.begin(), d_a.end(), + d_b.begin(), d_b.end(), + d_result.begin()); + + d_result.resize(d_end - d_result.begin()); + + ASSERT_EQUAL_QUIET(h_result, d_result); +} +DECLARE_VARIABLE_UNITTEST(TestSetDifferenceKeyValue); + diff --git a/testing/set_difference_descending.cu b/testing/set_difference_descending.cu new file mode 100644 index 000000000..e9c886c18 --- /dev/null +++ b/testing/set_difference_descending.cu @@ -0,0 +1,68 @@ +#include +#include +#include +#include + +template +void TestSetDifferenceAscendingSimple(void) +{ + typedef typename Vector::value_type T; + typedef typename Vector::iterator Iterator; + + Vector a(4), b(5); + + a[0] = 5; a[1] = 4; a[2] = 2; a[3] = 0; + b[0] = 6; b[1] = 4; b[2] = 3; b[3] = 3; b[4] = 0; + + Vector ref(2); + ref[0] = 5; ref[1] = 2; + + Vector result(2); + + Iterator end = thrust::set_difference(a.begin(), a.end(), + b.begin(), b.end(), + result.begin(), + thrust::greater()); + + ASSERT_EQUAL_QUIET(result.end(), end); + ASSERT_EQUAL(ref, result); +} +DECLARE_VECTOR_UNITTEST(TestSetDifferenceAscendingSimple); + + +template +void TestSetDifferenceAscending(const size_t n) +{ + thrust::host_vector temp = unittest::random_integers(2 * n); + thrust::host_vector h_a(temp.begin(), temp.begin() + n); + thrust::host_vector h_b(temp.begin() + n, temp.end()); + + thrust::sort(h_a.begin(), h_a.end(), thrust::greater()); + thrust::sort(h_b.begin(), h_b.end(), thrust::greater()); + + thrust::device_vector d_a = h_a; + thrust::device_vector d_b = h_b; + + thrust::host_vector h_result(n); + thrust::device_vector d_result(n); + + typename thrust::host_vector::iterator h_end; + typename thrust::device_vector::iterator d_end; + + h_end = thrust::set_difference(h_a.begin(), h_a.end(), + h_b.begin(), h_b.end(), + h_result.begin(), + thrust::greater()); + h_result.resize(h_end - h_result.begin()); + + d_end = thrust::set_difference(d_a.begin(), d_a.end(), + d_b.begin(), d_b.end(), + d_result.begin(), + thrust::greater()); + + d_result.resize(d_end - d_result.begin()); + + ASSERT_EQUAL(h_result, d_result); +} +DECLARE_VARIABLE_UNITTEST(TestSetDifferenceAscending); + diff --git a/thrust/detail/device/cuda/block/set_difference.h b/thrust/detail/device/cuda/block/set_difference.h new file mode 100644 index 000000000..3b36b077b --- /dev/null +++ b/thrust/detail/device/cuda/block/set_difference.h @@ -0,0 +1,52 @@ +/* + * Copyright 2008-2010 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +namespace thrust +{ +namespace detail +{ +namespace device +{ +namespace cuda +{ +namespace block +{ + +template +__device__ __forceinline__ + RandomAccessIterator4 set_difference(RandomAccessIterator1 first1, + RandomAccessIterator1 last1, + RandomAccessIterator2 first2, + RandomAccessIterator2 last2, + RandomAccessIterator3 temporary, + RandomAccessIterator4 result, + StrictWeakOrdering comp); + +} // end block +} // end cuda +} // end device +} // end detail +} // end thrust + +#include + + diff --git a/thrust/detail/device/cuda/block/set_difference.inl b/thrust/detail/device/cuda/block/set_difference.inl new file mode 100644 index 000000000..4e9fdfa22 --- /dev/null +++ b/thrust/detail/device/cuda/block/set_difference.inl @@ -0,0 +1,114 @@ +/* + * Copyright 2008-2010 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace detail +{ +namespace device +{ +namespace cuda +{ +namespace block +{ + + +template +__device__ __forceinline__ + RandomAccessIterator4 set_difference(RandomAccessIterator1 first1, + RandomAccessIterator1 last1, + RandomAccessIterator2 first2, + RandomAccessIterator2 last2, + RandomAccessIterator3 temporary, + RandomAccessIterator4 result, + StrictWeakOrdering comp) +{ + typedef typename thrust::iterator_difference::type difference1; + typedef typename thrust::iterator_difference::type difference2; + + difference1 n1 = last1 - first1; + + if(n1 == 0) return result; + + // search for all non-matches in the second range for each element in the first + bool needs_output = false; + if(threadIdx.x < n1) + { + RandomAccessIterator1 x = first1; + x += threadIdx.x; + + // count the number of previous occurrances of x the first range + difference1 rank = x - thrust::detail::device::generic::scalar::lower_bound(first1,x,dereference(x),comp); + + // count the number of equivalent elements of x in the second range + thrust::pair matches = + thrust::detail::device::generic::scalar::equal_range(first2,last2,dereference(x),comp); + + difference2 num_matches = matches.second - matches.first; + + // the element needs output if its rank is gequal than the number of matches + needs_output = rank >= num_matches; + } // end if + + // mark whether my element needs output in the scratch array + RandomAccessIterator3 temp = temporary; + temp += threadIdx.x; + dereference(temp) = needs_output; + + block::inplace_inclusive_scan_n(temporary, n1, thrust::plus()); + + // copy_if + if(needs_output) + { + // find the index to write our element + unsigned int output_index = 0; + if(threadIdx.x > 0) + { + RandomAccessIterator3 src = temporary; + src += threadIdx.x - 1; + output_index = dereference(src); + } // end if + + RandomAccessIterator1 x = first1; + x += threadIdx.x; + + RandomAccessIterator4 dst = result; + dst += output_index; + dereference(dst) = dereference(x); + } // end if + + return result + temporary[n1-1]; +} // end set_difference + +} // end block +} // end cuda +} // end device +} // end detail +} // end thrust + + diff --git a/thrust/detail/device/cuda/block/set_intersection.inl b/thrust/detail/device/cuda/block/set_intersection.inl index 801a0939f..29dda2414 100644 --- a/thrust/detail/device/cuda/block/set_intersection.inl +++ b/thrust/detail/device/cuda/block/set_intersection.inl @@ -56,7 +56,7 @@ __device__ __forceinline__ if(n1 == 0) return result; // search for all matches in the second range for each element in the first - bool found = false; + bool needs_output = false; if(threadIdx.x < n1) { RandomAccessIterator1 x = first1; @@ -71,19 +71,19 @@ __device__ __forceinline__ difference2 num_matches = matches.second - matches.first; - // the element is "found" if its rank is less than the number of matches - found = rank < num_matches; + // the element needs output if its rank is less than the number of matches + needs_output = rank < num_matches; } // end if // mark whether my element was found or not in the scratch array RandomAccessIterator3 temp = temporary; temp += threadIdx.x; - dereference(temp) = found; + dereference(temp) = needs_output; block::inplace_inclusive_scan_n(temporary, n1, thrust::plus()); // copy_if - if(found) + if(needs_output) { // find the index to write our element unsigned int output_index = 0; diff --git a/thrust/detail/device/cuda/detail/rank_iterator.h b/thrust/detail/device/cuda/detail/rank_iterator.h index 20de06b8b..2102b2a9f 100644 --- a/thrust/detail/device/cuda/detail/rank_iterator.h +++ b/thrust/detail/device/cuda/detail/rank_iterator.h @@ -17,6 +17,8 @@ #pragma once #include +#include +#include namespace thrust { diff --git a/thrust/detail/device/cuda/set_difference.inl b/thrust/detail/device/cuda/set_difference.inl new file mode 100644 index 000000000..da4a9625c --- /dev/null +++ b/thrust/detail/device/cuda/set_difference.inl @@ -0,0 +1,103 @@ +/* + * Copyright 2008-2010 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC + +#include +#include +#include +#include +#include + +namespace thrust +{ +namespace detail +{ +namespace device +{ +namespace cuda +{ + +namespace set_difference_detail +{ + +struct block_convergent_set_difference_functor +{ + __host__ __device__ __forceinline__ + static unsigned int get_temporary_array_size(unsigned int block_size) + { + return block_size * sizeof(int); + } + + // operator() simply calls the block-wise function + template + __device__ __forceinline__ + RandomAccessIterator3 operator()(RandomAccessIterator1 first1, + RandomAccessIterator1 last1, + RandomAccessIterator2 first2, + RandomAccessIterator2 last2, + void *temporary, + RandomAccessIterator3 result, + StrictWeakOrdering comp) + { + return block::set_difference(first1,last1,first2,last2,reinterpret_cast(temporary),result,comp); + } // end operator()() +}; // end block_convergent_set_difference_functor + +} // end namespace set_difference_detail + + +template +RandomAccessIterator3 set_difference(RandomAccessIterator1 first1, + RandomAccessIterator1 last1, + RandomAccessIterator2 first2, + RandomAccessIterator2 last2, + RandomAccessIterator3 result, + Compare comp) +{ + typedef typename thrust::iterator_difference::type difference1; + typedef typename thrust::iterator_difference::type difference2; + + const difference1 num_elements1 = last1 - first1; + + // check for trivial problem + if(num_elements1 == 0) + return result; + + return detail::set_operation(first1, last1, + first2, last2, + result, + comp, + thrust::make_pair(0u, num_elements1), + detail::split_for_set_operation(), + set_difference_detail::block_convergent_set_difference_functor()); +} // end set_difference + +} // end namespace cuda +} // end namespace device +} // end namespace detail +} // end namespace thrust + +#endif // THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC + diff --git a/thrust/detail/device/cuda/set_operations.h b/thrust/detail/device/cuda/set_operations.h index 99c30e992..211022814 100644 --- a/thrust/detail/device/cuda/set_operations.h +++ b/thrust/detail/device/cuda/set_operations.h @@ -28,6 +28,18 @@ namespace cuda { +template +RandomAccessIterator3 set_difference(RandomAccessIterator1 first1, + RandomAccessIterator1 last1, + RandomAccessIterator2 first2, + RandomAccessIterator2 last2, + RandomAccessIterator3 result, + Compare comp); + + template #include #include diff --git a/thrust/detail/device/dispatch/set_operations.h b/thrust/detail/device/dispatch/set_operations.h index b467a41e4..59d7c9d28 100644 --- a/thrust/detail/device/dispatch/set_operations.h +++ b/thrust/detail/device/dispatch/set_operations.h @@ -32,6 +32,41 @@ namespace device namespace dispatch { + +template + OutputIterator set_difference(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + StrictWeakOrdering comp, + thrust::detail::omp_device_space_tag) +{ + // omp backend + return thrust::detail::device::omp::set_difference(first1,last1,first2,last2,result,comp); +} // end set_difference() + + +template + OutputIterator set_difference(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + StrictWeakOrdering comp, + thrust::detail::cuda_device_space_tag) +{ + // refinement for the CUDA backend + return thrust::detail::device::cuda::set_difference(first1,last1,first2,last2,result,comp); +} // end set_difference() + + template + OutputIterator set_difference(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + StrictWeakOrdering comp) +{ + return std::set_difference(first1,last1,first2,last2,result,comp); +} // end set_difference() + template + OutputIterator set_difference(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + StrictWeakOrdering comp) +{ + // dispatch on space + return thrust::detail::device::dispatch::set_difference(first1,last1,first2,last2,result,comp, + typename thrust::detail::minimum_space< + typename thrust::iterator_space::type, + typename thrust::iterator_space::type, + typename thrust::iterator_space::type + >::type()); +} // end set_difference() + template + OutputIterator set_difference(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + StrictWeakOrdering comp, + thrust::host_space_tag) +{ + return thrust::detail::host::set_difference(first1,last1,first2,last2,result,comp); +} // end set_difference() + + +template + OutputIterator set_difference(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + StrictWeakOrdering comp, + thrust::device_space_tag) +{ + return thrust::detail::device::set_difference(first1,last1,first2,last2,result,comp); +} // end set_difference() + + template + OutputIterator set_difference(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + StrictWeakOrdering comp) +{ + return std::set_difference(first1,last1,first2,last2,result,comp); +} // end set_difference() + template + OutputIterator set_difference(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + StrictWeakOrdering comp) +{ + return thrust::detail::dispatch::set_difference(first1, last1, + first2, last2, + result, comp, + typename thrust::detail::minimum_space< + typename thrust::iterator_space::type, + typename thrust::iterator_space::type, + typename thrust::iterator_space::type + >::type()); +} // end set_difference() + +template + OutputIterator set_difference(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result) +{ + typedef typename thrust::iterator_value::type value_type; + return thrust::set_difference(first1, last1, first2, last2, result, thrust::less()); +} // end set_difference() + + template::type(), - typename thrust::iterator_space::type(), - typename thrust::iterator_space::type()); + typename thrust::detail::minimum_space< + typename thrust::iterator_space::type, + typename thrust::iterator_space::type, + typename thrust::iterator_space::type + >::type()); } // end set_intersection() template