diff options
author | Adhemerval Zanella <adhemerval.zanella@linaro.org> | 2018-09-03 15:30:15 -0300 |
---|---|---|
committer | Adhemerval Zanella <adhemerval.zanella@linaro.org> | 2018-09-04 15:45:12 -0300 |
commit | 367e56edc8b0a2d2eefbe4875d81caa82fe82654 (patch) | |
tree | 2e6330f087244712ae5a4cabd172fb607db47e54 | |
parent | d3960af570f708e3f3a6c8a7c2d292f55d4d1da4 (diff) |
stdlib: Implement introsort with qsortazanella/qsort-refactor
This patch adds a introsort implementation on qsort to avoid worse-case
performance of quicksort to O(nlog n). The fallback used is a simple
heapsort based on Linux implementation. As a side note the introsort
implementation is similar the one used on libstdc++ for std::sort.
The performance shown some regression due heapsort usage, as expected:
Results for member size 4
MostlySorted
nmemb | base | patched | diff
32 | 1809 | 2034 | 12.44
4096 | 796888 | 1051955 | 32.01
32768 | 7616495 | 10820159 | 42.06
524288 | 142093487 | 250602727 | 76.36
Repeated
nmemb | base | patched | diff
32 | 1875 | 2046 | 9.12
4096 | 912206 | 1123392 | 23.15
32768 | 8690714 | 10223479 | 17.64
524288 | 171229868 | 287344355 | 67.81
Sorted
nmemb | base | patched | diff
32 | 1367 | 1354 | -0.95
4096 | 341229 | 1153712 | 238.10
32768 | 3358312 | 10983978 | 227.07
524288 | 66206257 | 232996299 | 251.92
Unsorted
nmemb | base | patched | diff
32 | 2182 | 2209 | 1.24
4096 | 990920 | 1101897 | 11.20
32768 | 10119221 | 11319451 | 11.86
524288 | 188297819 | 295723362 | 57.05
Results for member size 8
MostlySorted
nmemb | base | patched | diff
32 | 1976 | 2061 | 4.30
4096 | 720747 | 1048324 | 45.45
32768 | 7161384 | 10927694 | 52.59
524288 | 133265602 | 264259752 | 98.30
Repeated
nmemb | base | patched | diff
32 | 2123 | 2265 | 6.69
4096 | 913059 | 1124658 | 23.17
32768 | 8785972 | 11353472 | 29.22
524288 | 171667139 | 303675135 | 76.90
Sorted
nmemb | base | patched | diff
32 | 1193 | 1128 | -5.45
4096 | 301818 | 1101787 | 265.05
32768 | 2961904 | 10964479 | 270.18
524288 | 58873679 | 230789472 | 292.01
Unsorted
nmemb | base | patched | diff
32 | 2099 | 2407 | 14.67
4096 | 1024575 | 1154641 | 12.69
32768 | 10017934 | 11635442 | 16.15
524288 | 188778771 | 310044501 | 64.24
Results for member size 32
MostlySorted
nmemb | base | patched | diff
32 | 3864 | 4423 | 14.47
4096 | 1102305 | 1898226 | 72.21
32768 | 9915730 | 19176780 | 93.40
524288 | 193398884 | 478171367 | 147.25
Repeated
nmemb | base | patched | diff
32 | 4636 | 3730 | -19.54
4096 | 1359433 | 1847693 | 35.92
32768 | 13028460 | 19120804 | 46.76
524288 | 251463610 | 571423413 | 127.24
Sorted
nmemb | base | patched | diff
32 | 1223 | 1165 | -4.74
4096 | 310945 | 1922987 | 518.43
32768 | 2975730 | 19160622 | 543.90
524288 | 63995389 | 423600523 | 561.92
Unsorted
nmemb | base | patched | diff
32 | 4612 | 5024 | 8.93
4096 | 1436912 | 1989848 | 38.48
32768 | 14043593 | 20652436 | 47.06
524288 | 269586508 | 641284939 | 137.88
Checked on x86_64-linux-gnu.
* stdlib/qsort.c (lg): New function.
(R_HEAP): New define.
* stdlib/qsort_common.c (R_CMP_ARG_USE): New define.
(R_HEAP): New function: fallback heapsort implementation.
(R_FUNC): Use fallback heapsort if quicksort depth exceeds
2 times log2 of total element number.
-rw-r--r-- | stdlib/qsort.c | 10 | ||||
-rw-r--r-- | stdlib/qsort_common.c | 63 |
2 files changed, 70 insertions, 3 deletions
diff --git a/stdlib/qsort.c b/stdlib/qsort.c index 10b805930a..3f78b66bc8 100644 --- a/stdlib/qsort.c +++ b/stdlib/qsort.c @@ -100,14 +100,24 @@ typedef struct #define POP(low, high) ((void) (--top, (low = top->lo), (high = top->hi))) #define STACK_NOT_EMPTY (stack < top) +/* Helper function to calculate the maximum depth before quicksort switches + to heapsort. X should be higher than 0. */ +static inline size_t +lg (size_t x) +{ + return sizeof (size_t) * CHAR_BIT - 1 - __builtin_clzl(x); +} + #define R_VERSION #define R_FUNC __qsort_r +#define R_HEAP heapsort_r #include <stdlib/qsort_common.c> libc_hidden_def (__qsort_r) weak_alias (__qsort_r, qsort_r) #define R_FUNC qsort +#define R_HEAP heapsort #include <stdlib/qsort_common.c> libc_hidden_def (qsort) diff --git a/stdlib/qsort_common.c b/stdlib/qsort_common.c index 666b1958ab..96274f2150 100644 --- a/stdlib/qsort_common.c +++ b/stdlib/qsort_common.c @@ -23,15 +23,65 @@ #ifdef R_VERSION # define R_CMP_TYPE __compar_d_fn_t # define R_CMP_ARG , void *arg +# define R_CMP_ARG_USE , arg # define R_CMP(p1, p2) cmp (p1, p2, arg) #else # define R_CMP_TYPE __compar_fn_t # define R_CMP_ARG +# define R_CMP_ARG_USE # define R_CMP(p1, p2) cmp (p1, p2) #endif -/* Order size using quicksort. This implementation incorporates - four optimizations discussed in Sedgewick: +/* A fast, small, non-recursive O(nlog n) heapsort, adapted from Linux + lib/sort.c. Used on introsort implementation as a fallback routine with + worst-case performance of O(nlog n) and worst-case space complexity of + O(1). */ + +static void +R_HEAP (void *pbase, size_t total_elems, size_t size, swap_t swap, + R_CMP_TYPE cmp R_CMP_ARG) +{ + size_t i = (total_elems / 2 - 1) * size; + size_t n = total_elems * size; + size_t r, c; + + while (1) + { + for (r = i; r * 2 + size < n; r = c) + { + c = r * 2 + size; + if (c < n - size && R_CMP (pbase + c, pbase + c + size) < 0) + c += size; + if (R_CMP (pbase + r, pbase + c) >= 0) + break; + swap (pbase + r, pbase + c, size); + } + if (i == 0) + break; + i -= size; + } + + i = n - size; + while (1) + { + swap (pbase, pbase + i, size); + for (r = 0; r * 2 + size < i; r = c) + { + c = r * 2 + size; + if (c < i - size && R_CMP (pbase + c, pbase + c + size) < 0) + c += size; + if (R_CMP (pbase + r, pbase + c) >= 0) + break; + swap (pbase + r, pbase + c, size); + } + i -= size; + if (i == 0) + break; + } +} + +/* Introsort to avoid worst case scenario of quicksort. For quicksort + the implementation incorporates four optimizations discussed in Sedgewick: 1. Non-recursive, using an explicit stack of pointer that store the next array partition to sort. To save time, this maximum amount @@ -57,7 +107,7 @@ void R_FUNC (void *pbase, size_t total_elems, size_t size, R_CMP_TYPE cmp R_CMP_ARG) { - if (total_elems == 0) + if (total_elems <= 1) /* Avoid lossage with unsigned arithmetic below. */ return; @@ -67,6 +117,8 @@ R_FUNC (void *pbase, size_t total_elems, size_t size, R_CMP_TYPE cmp R_CMP_ARG) swap_t swap = select_swap_func (pbase, size); + size_t depth = 2 * lg (total_elems); + if (total_elems > MAX_THRESH) { char *lo = base_ptr; @@ -78,6 +130,9 @@ R_FUNC (void *pbase, size_t total_elems, size_t size, R_CMP_TYPE cmp R_CMP_ARG) while (STACK_NOT_EMPTY) { + if (depth-- == 0) + return R_HEAP (pbase, total_elems, size, swap, cmp R_CMP_ARG_USE); + char *left_ptr; char *right_ptr; @@ -220,6 +275,8 @@ R_FUNC (void *pbase, size_t total_elems, size_t size, R_CMP_TYPE cmp R_CMP_ARG) #undef R_NAME #undef R_CMP_TYPE #undef R_CMP_ARG +#undef R_CMP_ARG_USE #undef R_CMP #undef R_FUNC #undef R_VERSION +#undef R_HEAP |