NTrace
GPU ray tracing framework
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
Sort.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2009-2011, NVIDIA Corporation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of NVIDIA Corporation nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include "base/Sort.hpp"
30 
31 using namespace FW;
32 
33 //------------------------------------------------------------------------
34 
35 #define QSORT_STACK_SIZE 32
36 #define QSORT_MIN_SIZE 16
37 #define MULTICORE_MIN_SIZE (1 << 13)
38 
39 //------------------------------------------------------------------------
40 
41 namespace FW
42 {
43 
44 struct TaskSpec
45 {
48  void* data;
51 };
52 
53 static inline void insertionSort (int start, int size, void* data, SortCompareFunc compareFunc, SortSwapFunc swapFunc);
54 static inline int median3 (int low, int high, void* data, SortCompareFunc compareFunc);
55 static int partition (int low, int high, void* data, SortCompareFunc compareFunc, SortSwapFunc swapFunc);
56 static void qsort (int low, int high, void* data, SortCompareFunc compareFunc, SortSwapFunc swapFunc);
57 static void qsortMulticore (MulticoreLauncher::Task& task);
58 
59 }
60 
61 //------------------------------------------------------------------------
62 
63 void FW::insertionSort(int start, int size, void* data, SortCompareFunc compareFunc, SortSwapFunc swapFunc)
64 {
65  FW_ASSERT(compareFunc && swapFunc);
66  FW_ASSERT(size >= 0);
67 
68  for (int i = 1; i < size; i++)
69  {
70  int j = start + i - 1;
71  while (j >= start && compareFunc(data, j + 1, j))
72  {
73  swapFunc(data, j, j + 1);
74  j--;
75  }
76  }
77 }
78 
79 //------------------------------------------------------------------------
80 
81 int FW::median3(int low, int high, void* data, SortCompareFunc compareFunc)
82 {
83  FW_ASSERT(compareFunc);
84  FW_ASSERT(low >= 0 && high >= 2);
85 
86  int l = low;
87  int c = (low + high) >> 1;
88  int h = high - 2;
89 
90  if (compareFunc(data, h, l)) swap(l, h);
91  if (compareFunc(data, c, l)) c = l;
92  return (compareFunc(data, h, c)) ? h : c;
93 }
94 
95 //------------------------------------------------------------------------
96 
97 int FW::partition(int low, int high, void* data, SortCompareFunc compareFunc, SortSwapFunc swapFunc)
98 {
99  // Select pivot using median-3, and hide it in the highest entry.
100 
101  swapFunc(data, median3(low, high, data, compareFunc), high - 1);
102 
103  // Partition data.
104 
105  int i = low - 1;
106  int j = high - 1;
107  for (;;)
108  {
109  do
110  i++;
111  while (compareFunc(data, i, high - 1));
112  do
113  j--;
114  while (compareFunc(data, high - 1, j));
115 
116  FW_ASSERT(i >= low && j >= low && i < high && j < high);
117  if (i >= j)
118  break;
119 
120  swapFunc(data, i, j);
121  }
122 
123  // Restore pivot.
124 
125  swapFunc(data, i, high - 1);
126  return i;
127 }
128 
129 //------------------------------------------------------------------------
130 
131 void FW::qsort(int low, int high, void* data, SortCompareFunc compareFunc, SortSwapFunc swapFunc)
132 {
133  FW_ASSERT(compareFunc && swapFunc);
134  FW_ASSERT(low <= high);
135 
136  int stack[QSORT_STACK_SIZE];
137  int sp = 0;
138  stack[sp++] = high;
139 
140  while (sp)
141  {
142  high = stack[--sp];
143  FW_ASSERT(low <= high);
144 
145  // Small enough or stack full => use insertion sort.
146 
147  if (high - low < QSORT_MIN_SIZE || sp + 2 > QSORT_STACK_SIZE)
148  {
149  insertionSort(low, high - low, data, compareFunc, swapFunc);
150  low = high + 1;
151  continue;
152  }
153 
154  // Partition and sort sub-partitions.
155 
156  int i = partition(low, high, data, compareFunc, swapFunc);
157  FW_ASSERT(sp + 2 <= QSORT_STACK_SIZE);
158  if (high - i > 2)
159  stack[sp++] = high;
160  if (i - low > 1)
161  stack[sp++] = i;
162  else
163  low = i + 1;
164  }
165 }
166 
167 //------------------------------------------------------------------------
168 
169 void FW::qsortMulticore(MulticoreLauncher::Task& task)
170 {
171  // Small enough => use sequential qsort.
172 
173  TaskSpec* spec = (TaskSpec*)task.data;
174  if (spec->high - spec->low < MULTICORE_MIN_SIZE)
175  qsort(spec->low, spec->high, spec->data, spec->compareFunc, spec->swapFunc);
176 
177  // Otherwise => partition and schedule sub-partitions.
178 
179  else
180  {
181  int i = partition(spec->low, spec->high, spec->data, spec->compareFunc, spec->swapFunc);
182  if (i - spec->low >= 2)
183  {
184  TaskSpec* childSpec = new TaskSpec(*spec);
185  childSpec->high = i;
186  task.launcher->push(qsortMulticore, childSpec);
187  }
188  if (spec->high - i > 2)
189  {
190  TaskSpec* childSpec = new TaskSpec(*spec);
191  childSpec->low = i + 1;
192  task.launcher->push(qsortMulticore, childSpec);
193  }
194  }
195 
196  // Free task spec.
197 
198  delete spec;
199 }
200 
201 //------------------------------------------------------------------------
202 
203 void FW::sort(void* data, int start, int end, SortCompareFunc compareFunc, SortSwapFunc swapFunc, bool multicore)
204 {
205  FW_ASSERT(start <= end);
206  FW_ASSERT(compareFunc && swapFunc);
207 
208  // Nothing to do => skip.
209 
210  if (end - start < 2)
211  return;
212 
213  // Single-core.
214 
215  if (!multicore || end - start < MULTICORE_MIN_SIZE)
216  {
217  qsort(start, end, data, compareFunc, swapFunc);
218 }
219 
220  // Multicore.
221 
222  else
223 {
224  TaskSpec* spec = new TaskSpec;
225  spec->low = start;
226  spec->high = end;
227  spec->data = data;
228  spec->compareFunc = compareFunc;
229  spec->swapFunc = swapFunc;
230 
231  MulticoreLauncher launcher;
233  task.launcher = &launcher;
234  task.data = spec;
235  qsortMulticore(task);
236 }
237 }
238 
239 //------------------------------------------------------------------------
240 
241 int FW::compareU32(void* data, int idxA, int idxB)
242 {
243  U32 a = ((U32*)data)[idxA];
244  U32 b = ((U32*)data)[idxB];
245  return (a < b) ? -1 : (a > b) ? 1 : 0;
246 }
247 
248 void FW::swapU32(void* data, int idxA, int idxB)
249 {
250  swap(((U32*)data)[idxA], ((U32*)data)[idxB]);
251 }
252 
253 //------------------------------------------------------------------------
254 
255 int FW::compareU64(void* data, int idxA, int idxB)
256 {
257  U64 a = ((U64*)data)[idxA];
258  U64 b = ((U64*)data)[idxB];
259  return (a < b) ? -1 : (a > b) ? 1 : 0;
260 }
261 
262 void FW::swapU64(void* data, int idxA, int idxB)
263 {
264  swap(((U64*)data)[idxA], ((U64*)data)[idxB]);
265 }
266 
267 //------------------------------------------------------------------------
268 
269 int FW::compareS32(void* data, int idxA, int idxB)
270 {
271  S32 a = ((S32*)data)[idxA];
272  S32 b = ((S32*)data)[idxB];
273  return (a < b) ? -1 : (a > b) ? 1 : 0;
274 }
275 
276 void FW::swapS32(void* data, int idxA, int idxB)
277 {
278  swap(((S32*)data)[idxA], ((S32*)data)[idxB]);
279 }
280 
281 //------------------------------------------------------------------------
282 
283 int FW::compareS64(void* data, int idxA, int idxB)
284 {
285  S64 a = ((S64*)data)[idxA];
286  S64 b = ((S64*)data)[idxB];
287  return (a < b) ? -1 : (a > b) ? 1 : 0;
288 }
289 
290 void FW::swapS64(void* data, int idxA, int idxB)
291 {
292  swap(((S64*)data)[idxA], ((S64*)data)[idxB]);
293 }
294 
295 //------------------------------------------------------------------------
296 
297 int FW::compareF32(void* data, int idxA, int idxB)
298 {
299  F32 a = ((F32*)data)[idxA];
300  F32 b = ((F32*)data)[idxB];
301  return (a < b) ? -1 : (a > b) ? 1 : 0;
302 }
303 
304 void FW::swapF32(void* data, int idxA, int idxB)
305 {
306  swap(((F32*)data)[idxA], ((F32*)data)[idxB]);
307 }
308 
309 //------------------------------------------------------------------------
310 
311 int FW::compareF64(void* data, int idxA, int idxB)
312 {
313  F64 a = ((F64*)data)[idxA];
314  F64 b = ((F64*)data)[idxB];
315  return (a < b) ? -1 : (a > b) ? 1 : 0;
316 }
317 
318 void FW::swapF64(void* data, int idxA, int idxB)
319 {
320  swap(((F64*)data)[idxA], ((F64*)data)[idxB]);
321 }
322 
323 //------------------------------------------------------------------------
int compareF64(void *data, int idxA, int idxB)
Definition: Sort.cpp:311
int compareU32(void *data, int idxA, int idxB)
Definition: Sort.cpp:241
void swapS32(void *data, int idxA, int idxB)
Definition: Sort.cpp:276
void sort(void *data, int start, int end, SortCompareFunc compareFunc, SortSwapFunc swapFunc, bool multicore=false)
Definition: Sort.cpp:203
void * data
Definition: Sort.cpp:48
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes CUcontext unsigned int CUdevice device GLenum texture GLenum GLuint buffer GLenum GLuint renderbuffer GLenum GLsizeiptr const GLvoid * data
Definition: DLLImports.inl:319
MulticoreLauncher & push(TaskFunc func, void *data, int firstIdx=0, int numTasks=1)
void swapF64(void *data, int idxA, int idxB)
Definition: Sort.cpp:318
SortSwapFunc swapFunc
Definition: Sort.cpp:50
S32 low
Definition: Sort.cpp:46
unsigned __int64 U64
Definition: Defs.hpp:97
double F64
Definition: Defs.hpp:90
#define MULTICORE_MIN_SIZE
Definition: Sort.cpp:37
int compareS64(void *data, int idxA, int idxB)
Definition: Sort.cpp:283
float F32
Definition: Defs.hpp:89
int compareS32(void *data, int idxA, int idxB)
Definition: Sort.cpp:269
SortCompareFunc compareFunc
Definition: Sort.cpp:49
void swapU32(void *data, int idxA, int idxB)
Definition: Sort.cpp:248
#define FW_ASSERT(X)
Definition: Defs.hpp:67
int compareF32(void *data, int idxA, int idxB)
Definition: Sort.cpp:297
void swapU64(void *data, int idxA, int idxB)
Definition: Sort.cpp:262
signed int S32
Definition: Defs.hpp:88
int compareU64(void *data, int idxA, int idxB)
Definition: Sort.cpp:255
signed __int64 S64
Definition: Defs.hpp:98
S32 high
Definition: Sort.cpp:47
unsigned int U32
Definition: Defs.hpp:85
void(* SortSwapFunc)(void *data, int idxA, int idxB)
Definition: Sort.hpp:47
#define QSORT_STACK_SIZE
Definition: Sort.cpp:35
FW_CUDA_FUNC void swap(T &a, T &b)
Definition: Defs.hpp:183
void swapS64(void *data, int idxA, int idxB)
Definition: Sort.cpp:290
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes CUcontext unsigned int CUdevice device GLenum texture GLenum GLuint buffer GLenum GLuint renderbuffer GLenum GLsizeiptr size
Definition: DLLImports.inl:319
bool(* SortCompareFunc)(void *data, int idxA, int idxB)
Definition: Sort.hpp:46
void swapF32(void *data, int idxA, int idxB)
Definition: Sort.cpp:304