aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdhemerval Zanella <adhemerval.zanella@linaro.org>2018-09-03 15:30:15 -0300
committerAdhemerval Zanella <adhemerval.zanella@linaro.org>2018-09-04 15:45:12 -0300
commit367e56edc8b0a2d2eefbe4875d81caa82fe82654 (patch)
tree2e6330f087244712ae5a4cabd172fb607db47e54
parentd3960af570f708e3f3a6c8a7c2d292f55d4d1da4 (diff)
stdlib: Implement introsort with qsortazanella/qsort-refactor
This patch adds a introsort implementation on qsort to avoid worse-case performance of quicksort to O(nlog n). The fallback used is a simple heapsort based on Linux implementation. As a side note the introsort implementation is similar the one used on libstdc++ for std::sort. The performance shown some regression due heapsort usage, as expected: Results for member size 4 MostlySorted nmemb | base | patched | diff 32 | 1809 | 2034 | 12.44 4096 | 796888 | 1051955 | 32.01 32768 | 7616495 | 10820159 | 42.06 524288 | 142093487 | 250602727 | 76.36 Repeated nmemb | base | patched | diff 32 | 1875 | 2046 | 9.12 4096 | 912206 | 1123392 | 23.15 32768 | 8690714 | 10223479 | 17.64 524288 | 171229868 | 287344355 | 67.81 Sorted nmemb | base | patched | diff 32 | 1367 | 1354 | -0.95 4096 | 341229 | 1153712 | 238.10 32768 | 3358312 | 10983978 | 227.07 524288 | 66206257 | 232996299 | 251.92 Unsorted nmemb | base | patched | diff 32 | 2182 | 2209 | 1.24 4096 | 990920 | 1101897 | 11.20 32768 | 10119221 | 11319451 | 11.86 524288 | 188297819 | 295723362 | 57.05 Results for member size 8 MostlySorted nmemb | base | patched | diff 32 | 1976 | 2061 | 4.30 4096 | 720747 | 1048324 | 45.45 32768 | 7161384 | 10927694 | 52.59 524288 | 133265602 | 264259752 | 98.30 Repeated nmemb | base | patched | diff 32 | 2123 | 2265 | 6.69 4096 | 913059 | 1124658 | 23.17 32768 | 8785972 | 11353472 | 29.22 524288 | 171667139 | 303675135 | 76.90 Sorted nmemb | base | patched | diff 32 | 1193 | 1128 | -5.45 4096 | 301818 | 1101787 | 265.05 32768 | 2961904 | 10964479 | 270.18 524288 | 58873679 | 230789472 | 292.01 Unsorted nmemb | base | patched | diff 32 | 2099 | 2407 | 14.67 4096 | 1024575 | 1154641 | 12.69 32768 | 10017934 | 11635442 | 16.15 524288 | 188778771 | 310044501 | 64.24 Results for member size 32 MostlySorted nmemb | base | patched | diff 32 | 3864 | 4423 | 14.47 4096 | 1102305 | 1898226 | 72.21 32768 | 9915730 | 19176780 | 93.40 524288 | 193398884 | 478171367 | 147.25 Repeated nmemb | base | patched | diff 32 | 4636 | 3730 | -19.54 4096 | 1359433 | 1847693 | 35.92 32768 | 13028460 | 19120804 | 46.76 524288 | 251463610 | 571423413 | 127.24 Sorted nmemb | base | patched | diff 32 | 1223 | 1165 | -4.74 4096 | 310945 | 1922987 | 518.43 32768 | 2975730 | 19160622 | 543.90 524288 | 63995389 | 423600523 | 561.92 Unsorted nmemb | base | patched | diff 32 | 4612 | 5024 | 8.93 4096 | 1436912 | 1989848 | 38.48 32768 | 14043593 | 20652436 | 47.06 524288 | 269586508 | 641284939 | 137.88 Checked on x86_64-linux-gnu. * stdlib/qsort.c (lg): New function. (R_HEAP): New define. * stdlib/qsort_common.c (R_CMP_ARG_USE): New define. (R_HEAP): New function: fallback heapsort implementation. (R_FUNC): Use fallback heapsort if quicksort depth exceeds 2 times log2 of total element number.
-rw-r--r--stdlib/qsort.c10
-rw-r--r--stdlib/qsort_common.c63
2 files changed, 70 insertions, 3 deletions
diff --git a/stdlib/qsort.c b/stdlib/qsort.c
index 10b805930a..3f78b66bc8 100644
--- a/stdlib/qsort.c
+++ b/stdlib/qsort.c
@@ -100,14 +100,24 @@ typedef struct
#define POP(low, high) ((void) (--top, (low = top->lo), (high = top->hi)))
#define STACK_NOT_EMPTY (stack < top)
+/* Helper function to calculate the maximum depth before quicksort switches
+ to heapsort. X should be higher than 0. */
+static inline size_t
+lg (size_t x)
+{
+ return sizeof (size_t) * CHAR_BIT - 1 - __builtin_clzl(x);
+}
+
#define R_VERSION
#define R_FUNC __qsort_r
+#define R_HEAP heapsort_r
#include <stdlib/qsort_common.c>
libc_hidden_def (__qsort_r)
weak_alias (__qsort_r, qsort_r)
#define R_FUNC qsort
+#define R_HEAP heapsort
#include <stdlib/qsort_common.c>
libc_hidden_def (qsort)
diff --git a/stdlib/qsort_common.c b/stdlib/qsort_common.c
index 666b1958ab..96274f2150 100644
--- a/stdlib/qsort_common.c
+++ b/stdlib/qsort_common.c
@@ -23,15 +23,65 @@
#ifdef R_VERSION
# define R_CMP_TYPE __compar_d_fn_t
# define R_CMP_ARG , void *arg
+# define R_CMP_ARG_USE , arg
# define R_CMP(p1, p2) cmp (p1, p2, arg)
#else
# define R_CMP_TYPE __compar_fn_t
# define R_CMP_ARG
+# define R_CMP_ARG_USE
# define R_CMP(p1, p2) cmp (p1, p2)
#endif
-/* Order size using quicksort. This implementation incorporates
- four optimizations discussed in Sedgewick:
+/* A fast, small, non-recursive O(nlog n) heapsort, adapted from Linux
+ lib/sort.c. Used on introsort implementation as a fallback routine with
+ worst-case performance of O(nlog n) and worst-case space complexity of
+ O(1). */
+
+static void
+R_HEAP (void *pbase, size_t total_elems, size_t size, swap_t swap,
+ R_CMP_TYPE cmp R_CMP_ARG)
+{
+ size_t i = (total_elems / 2 - 1) * size;
+ size_t n = total_elems * size;
+ size_t r, c;
+
+ while (1)
+ {
+ for (r = i; r * 2 + size < n; r = c)
+ {
+ c = r * 2 + size;
+ if (c < n - size && R_CMP (pbase + c, pbase + c + size) < 0)
+ c += size;
+ if (R_CMP (pbase + r, pbase + c) >= 0)
+ break;
+ swap (pbase + r, pbase + c, size);
+ }
+ if (i == 0)
+ break;
+ i -= size;
+ }
+
+ i = n - size;
+ while (1)
+ {
+ swap (pbase, pbase + i, size);
+ for (r = 0; r * 2 + size < i; r = c)
+ {
+ c = r * 2 + size;
+ if (c < i - size && R_CMP (pbase + c, pbase + c + size) < 0)
+ c += size;
+ if (R_CMP (pbase + r, pbase + c) >= 0)
+ break;
+ swap (pbase + r, pbase + c, size);
+ }
+ i -= size;
+ if (i == 0)
+ break;
+ }
+}
+
+/* Introsort to avoid worst case scenario of quicksort. For quicksort
+ the implementation incorporates four optimizations discussed in Sedgewick:
1. Non-recursive, using an explicit stack of pointer that store the
next array partition to sort. To save time, this maximum amount
@@ -57,7 +107,7 @@
void
R_FUNC (void *pbase, size_t total_elems, size_t size, R_CMP_TYPE cmp R_CMP_ARG)
{
- if (total_elems == 0)
+ if (total_elems <= 1)
/* Avoid lossage with unsigned arithmetic below. */
return;
@@ -67,6 +117,8 @@ R_FUNC (void *pbase, size_t total_elems, size_t size, R_CMP_TYPE cmp R_CMP_ARG)
swap_t swap = select_swap_func (pbase, size);
+ size_t depth = 2 * lg (total_elems);
+
if (total_elems > MAX_THRESH)
{
char *lo = base_ptr;
@@ -78,6 +130,9 @@ R_FUNC (void *pbase, size_t total_elems, size_t size, R_CMP_TYPE cmp R_CMP_ARG)
while (STACK_NOT_EMPTY)
{
+ if (depth-- == 0)
+ return R_HEAP (pbase, total_elems, size, swap, cmp R_CMP_ARG_USE);
+
char *left_ptr;
char *right_ptr;
@@ -220,6 +275,8 @@ R_FUNC (void *pbase, size_t total_elems, size_t size, R_CMP_TYPE cmp R_CMP_ARG)
#undef R_NAME
#undef R_CMP_TYPE
#undef R_CMP_ARG
+#undef R_CMP_ARG_USE
#undef R_CMP
#undef R_FUNC
#undef R_VERSION
+#undef R_HEAP