diff options
author | Petri Savolainen <petri.savolainen@linaro.org> | 2018-03-09 16:49:48 +0200 |
---|---|---|
committer | Maxim Uvarov <maxim.uvarov@linaro.org> | 2018-03-22 05:57:49 +0300 |
commit | bcd246e3a9948a631eb8eb77886ac1b369b13dba (patch) | |
tree | 5b521cef08b4dc5fe224428683c3c44a2379eb92 | |
parent | c57da8ddc80e0e314957c6d11db288ef11fc1805 (diff) |
linux-gen: sched: configurable priority spread
Use configuration file to enable user to change priority queue
spreading.
Signed-off-by: Petri Savolainen <petri.savolainen@linaro.org>
Reviewed-by: Balasubramanian Manoharan <bala.manoharan@linaro.org>
Reviewed-by: Dmitry Eremin-Solenikov <dmitry.ereminsolenikov@linaro.org>
Signed-off-by: Maxim Uvarov <maxim.uvarov@linaro.org>
-rw-r--r-- | config/odp-linux-generic.conf | 9 | ||||
-rw-r--r-- | platform/linux-generic/odp_schedule_basic.c | 167 |
2 files changed, 115 insertions, 61 deletions
diff --git a/config/odp-linux-generic.conf b/config/odp-linux-generic.conf index 916058866..0034c64ba 100644 --- a/config/odp-linux-generic.conf +++ b/config/odp-linux-generic.conf @@ -38,3 +38,12 @@ queue_basic: { # Default queue size. Value must be a power of two. default_queue_size = 4096 } + +sched_basic: { + # Priority level spread. Each priority level is spread into multiple + # scheduler internal queues. A higher spread value typically improves + # parallelism and thus is better for high thread counts, but causes + # uneven service level for low thread counts. Typically, optimal + # value is the number of threads using the scheduler. + prio_spread = 4 +} diff --git a/platform/linux-generic/odp_schedule_basic.c b/platform/linux-generic/odp_schedule_basic.c index cd20b39dd..e6d28c6df 100644 --- a/platform/linux-generic/odp_schedule_basic.c +++ b/platform/linux-generic/odp_schedule_basic.c @@ -27,6 +27,7 @@ #include <odp_timer_internal.h> #include <odp_queue_internal.h> #include <odp_buffer_inlines.h> +#include <odp_libconfig_internal.h> /* Number of priority levels */ #define NUM_PRIO 8 @@ -41,15 +42,18 @@ ODP_STATIC_ASSERT((ODP_SCHED_PRIO_NORMAL > 0) && /* Number of scheduling groups */ #define NUM_SCHED_GRPS 32 -/* Priority queues per priority */ -#define QUEUES_PER_PRIO 4 +/* Maximum priority queue spread */ +#define MAX_SPREAD 4 + +/* Minimum priority queue spread */ +#define MIN_SPREAD 1 /* A thread polls a non preferred sched queue every this many polls * of the prefer queue. */ #define PREFER_RATIO 64 /* Size of poll weight table */ -#define WEIGHT_TBL_SIZE ((QUEUES_PER_PRIO - 1) * PREFER_RATIO) +#define WEIGHT_TBL_SIZE ((MAX_SPREAD - 1) * PREFER_RATIO) /* Maximum number of packet IO interfaces */ #define NUM_PKTIO ODP_CONFIG_PKTIO_ENTRIES @@ -60,14 +64,10 @@ ODP_STATIC_ASSERT((ODP_SCHED_PRIO_NORMAL > 0) && /* Not a valid index */ #define NULL_INDEX ((uint32_t)-1) -/* Priority queue ring size. In worst case, all event queues are scheduled - * queues and have the same priority. The ring size must be larger than or - * equal to ODP_CONFIG_QUEUES / QUEUES_PER_PRIO, so that it can hold all - * queues in the worst case. */ -#define PRIO_QUEUE_RING_SIZE (ODP_CONFIG_QUEUES / QUEUES_PER_PRIO) - -/* Mask for wrapping around priority queue index */ -#define RING_MASK (PRIO_QUEUE_RING_SIZE - 1) +/* Maximum priority queue ring size. A ring must be large enough to store all + * queues in the worst case (all queues are scheduled, have the same priority + * and no spreading). */ +#define MAX_RING_SIZE ODP_CONFIG_QUEUES /* Priority queue empty, not a valid queue index. */ #define PRIO_QUEUE_EMPTY NULL_INDEX @@ -76,14 +76,14 @@ ODP_STATIC_ASSERT((ODP_SCHED_PRIO_NORMAL > 0) && ODP_STATIC_ASSERT(CHECK_IS_POWER2(ODP_CONFIG_QUEUES), "Number_of_queues_is_not_power_of_two"); -/* Ring size must be power of two, so that MAX_QUEUE_IDX_MASK can be used. */ -ODP_STATIC_ASSERT(CHECK_IS_POWER2(PRIO_QUEUE_RING_SIZE), +/* Ring size must be power of two, so that mask can be used. */ +ODP_STATIC_ASSERT(CHECK_IS_POWER2(MAX_RING_SIZE), "Ring_size_is_not_power_of_two"); /* Mask of queues per priority */ typedef uint8_t pri_mask_t; -ODP_STATIC_ASSERT((8 * sizeof(pri_mask_t)) >= QUEUES_PER_PRIO, +ODP_STATIC_ASSERT((8 * sizeof(pri_mask_t)) >= MAX_SPREAD, "pri_mask_t_is_too_small"); /* Start of named groups in group mask arrays */ @@ -147,7 +147,7 @@ typedef struct ODP_ALIGNED_CACHE { ring_t ring; /* Ring data: queue indexes */ - uint32_t queue_index[PRIO_QUEUE_RING_SIZE]; + uint32_t queue_index[MAX_RING_SIZE]; } prio_queue_t; @@ -168,14 +168,20 @@ typedef struct { pri_mask_t pri_mask[NUM_PRIO]; odp_spinlock_t mask_lock; - prio_queue_t prio_q[NUM_SCHED_GRPS][NUM_PRIO][QUEUES_PER_PRIO]; + prio_queue_t prio_q[NUM_SCHED_GRPS][NUM_PRIO][MAX_SPREAD]; odp_shm_t shm; - uint32_t pri_count[NUM_PRIO][QUEUES_PER_PRIO]; + + struct { + uint8_t num_spread; + } config; + + uint32_t pri_count[NUM_PRIO][MAX_SPREAD]; odp_thrmask_t mask_all; odp_spinlock_t grp_lock; odp_atomic_u32_t grp_epoch; + uint32_t ring_mask; struct { char name[ODP_SCHED_GROUP_NAME_LEN]; @@ -186,7 +192,7 @@ typedef struct { struct { uint8_t grp; uint8_t prio; - uint8_t queue_per_prio; + uint8_t spread; uint8_t sync; uint8_t order_lock_count; uint8_t poll_pktin; @@ -206,8 +212,7 @@ typedef struct { /* Check that queue[] variables are large enough */ ODP_STATIC_ASSERT(NUM_SCHED_GRPS <= 256, "Group_does_not_fit_8_bits"); ODP_STATIC_ASSERT(NUM_PRIO <= 256, "Prio_does_not_fit_8_bits"); -ODP_STATIC_ASSERT(QUEUES_PER_PRIO <= 256, - "Queues_per_prio_does_not_fit_8_bits"); +ODP_STATIC_ASSERT(MAX_SPREAD <= 256, "Spread_does_not_fit_8_bits"); ODP_STATIC_ASSERT(CONFIG_QUEUE_MAX_ORD_LOCKS <= 256, "Ordered_lock_count_does_not_fit_8_bits"); ODP_STATIC_ASSERT(NUM_PKTIO <= 256, "Pktio_index_does_not_fit_8_bits"); @@ -221,11 +226,41 @@ static __thread sched_local_t sched_local; /* Function prototypes */ static inline void schedule_release_context(void); +static int read_config_file(sched_global_t *sched) +{ + const char *str; + int val = 0; + + ODP_PRINT("Scheduler config:\n"); + + str = "sched_basic.prio_spread"; + if (!_odp_libconfig_lookup_int(str, &val)) { + ODP_ERR("Config option '%s' not found.\n", str); + return -1; + } + + if (val > MAX_SPREAD || val < MIN_SPREAD) { + ODP_ERR("Bad value %s = %u\n", str, val); + return -1; + } + + sched->config.num_spread = val; + ODP_PRINT(" %s: %i\n\n", str, val); + + return 0; +} + +static inline uint8_t prio_spread_index(uint32_t index) +{ + return index % sched->config.num_spread; +} + static void sched_local_init(void) { int i; - uint8_t id; - uint8_t offset = 0; + uint8_t spread; + uint8_t num_spread = sched->config.num_spread; + uint8_t offset = 1; memset(&sched_local, 0, sizeof(sched_local_t)); @@ -234,17 +269,17 @@ static void sched_local_init(void) sched_local.stash_qi = PRIO_QUEUE_EMPTY; sched_local.ordered.src_queue = NULL_INDEX; - id = sched_local.thr & (QUEUES_PER_PRIO - 1); + spread = prio_spread_index(sched_local.thr); for (i = 0; i < WEIGHT_TBL_SIZE; i++) { - sched_local.weight_tbl[i] = id; + sched_local.weight_tbl[i] = spread; - if (i % PREFER_RATIO == 0) { + if (num_spread > 1 && (i % PREFER_RATIO) == 0) { + sched_local.weight_tbl[i] = prio_spread_index(spread + + offset); offset++; - sched_local.weight_tbl[i] = (id + offset) & - (QUEUES_PER_PRIO - 1); - if (offset == QUEUES_PER_PRIO - 1) - offset = 0; + if (offset == num_spread) + offset = 1; } } } @@ -269,19 +304,24 @@ static int schedule_init_global(void) memset(sched, 0, sizeof(sched_global_t)); + if (read_config_file(sched)) { + odp_shm_free(shm); + return -1; + } + sched->shm = shm; odp_spinlock_init(&sched->mask_lock); for (grp = 0; grp < NUM_SCHED_GRPS; grp++) { for (i = 0; i < NUM_PRIO; i++) { - for (j = 0; j < QUEUES_PER_PRIO; j++) { + for (j = 0; j < MAX_SPREAD; j++) { prio_queue_t *prio_q; int k; prio_q = &sched->prio_q[grp][i][j]; ring_init(&prio_q->ring); - for (k = 0; k < PRIO_QUEUE_RING_SIZE; k++) { + for (k = 0; k < MAX_RING_SIZE; k++) { prio_q->queue_index[k] = PRIO_QUEUE_EMPTY; } @@ -322,14 +362,15 @@ static int schedule_term_global(void) int ret = 0; int rc = 0; int i, j, grp; + uint32_t ring_mask = sched->ring_mask; for (grp = 0; grp < NUM_SCHED_GRPS; grp++) { for (i = 0; i < NUM_PRIO; i++) { - for (j = 0; j < QUEUES_PER_PRIO; j++) { + for (j = 0; j < MAX_SPREAD; j++) { ring_t *ring = &sched->prio_q[grp][i][j].ring; uint32_t qi; - while ((qi = ring_deq(ring, RING_MASK)) != + while ((qi = ring_deq(ring, ring_mask)) != RING_EMPTY) { odp_event_t events[1]; int num; @@ -413,11 +454,6 @@ static uint32_t schedule_max_ordered_locks(void) return CONFIG_QUEUE_MAX_ORD_LOCKS; } -static inline int queue_per_prio(uint32_t queue_index) -{ - return ((QUEUES_PER_PRIO - 1) & queue_index); -} - static void pri_set(int id, int prio) { odp_spinlock_lock(&sched->mask_lock); @@ -441,33 +477,39 @@ static void pri_clr(int id, int prio) static void pri_set_queue(uint32_t queue_index, int prio) { - int id = queue_per_prio(queue_index); + uint8_t id = prio_spread_index(queue_index); return pri_set(id, prio); } static void pri_clr_queue(uint32_t queue_index, int prio) { - int id = queue_per_prio(queue_index); + uint8_t id = prio_spread_index(queue_index); pri_clr(id, prio); } static int schedule_init_queue(uint32_t queue_index, const odp_schedule_param_t *sched_param) { + uint32_t ring_size; int i; int prio = sched_param->prio; pri_set_queue(queue_index, prio); sched->queue[queue_index].grp = sched_param->group; sched->queue[queue_index].prio = prio; - sched->queue[queue_index].queue_per_prio = queue_per_prio(queue_index); + sched->queue[queue_index].spread = prio_spread_index(queue_index); sched->queue[queue_index].sync = sched_param->sync; sched->queue[queue_index].order_lock_count = sched_param->lock_count; sched->queue[queue_index].poll_pktin = 0; sched->queue[queue_index].pktio_index = 0; sched->queue[queue_index].pktin_index = 0; + ring_size = MAX_RING_SIZE / sched->config.num_spread; + ring_size = ROUNDUP_POWER2_U32(ring_size); + ODP_ASSERT(ring_size <= MAX_RING_SIZE); + sched->ring_mask = ring_size - 1; + odp_atomic_init_u64(&sched->order[queue_index].ctx, 0); odp_atomic_init_u64(&sched->order[queue_index].next_ctx, 0); @@ -492,9 +534,9 @@ static void schedule_destroy_queue(uint32_t queue_index) int prio = sched->queue[queue_index].prio; pri_clr_queue(queue_index, prio); - sched->queue[queue_index].grp = 0; - sched->queue[queue_index].prio = 0; - sched->queue[queue_index].queue_per_prio = 0; + sched->queue[queue_index].grp = 0; + sched->queue[queue_index].prio = 0; + sched->queue[queue_index].spread = 0; if (queue_is_ordered(queue_index) && odp_atomic_load_u64(&sched->order[queue_index].ctx) != @@ -504,12 +546,12 @@ static void schedule_destroy_queue(uint32_t queue_index) static int schedule_sched_queue(uint32_t queue_index) { - int grp = sched->queue[queue_index].grp; - int prio = sched->queue[queue_index].prio; - int queue_per_prio = sched->queue[queue_index].queue_per_prio; - ring_t *ring = &sched->prio_q[grp][prio][queue_per_prio].ring; + int grp = sched->queue[queue_index].grp; + int prio = sched->queue[queue_index].prio; + int spread = sched->queue[queue_index].spread; + ring_t *ring = &sched->prio_q[grp][prio][spread].ring; - ring_enq(ring, RING_MASK, queue_index); + ring_enq(ring, sched->ring_mask, queue_index); return 0; } @@ -540,13 +582,14 @@ static void schedule_release_atomic(void) uint32_t qi = sched_local.stash_qi; if (qi != PRIO_QUEUE_EMPTY && sched_local.stash_num == 0) { - int grp = sched->queue[qi].grp; - int prio = sched->queue[qi].prio; - int queue_per_prio = sched->queue[qi].queue_per_prio; - ring_t *ring = &sched->prio_q[grp][prio][queue_per_prio].ring; + int grp = sched->queue[qi].grp; + int prio = sched->queue[qi].prio; + int spread = sched->queue[qi].spread; + ring_t *ring = &sched->prio_q[grp][prio][spread].ring; /* Release current atomic queue */ - ring_enq(ring, RING_MASK, qi); + ring_enq(ring, sched->ring_mask, qi); + sched_local.stash_qi = PRIO_QUEUE_EMPTY; } } @@ -773,8 +816,10 @@ static inline int do_schedule_grp(odp_queue_t *out_queue, odp_event_t out_ev[], int prio, i; int ret; int id; - unsigned int max_deq = MAX_DEQ; uint32_t qi; + unsigned int max_deq = MAX_DEQ; + int num_spread = sched->config.num_spread; + uint32_t ring_mask = sched->ring_mask; /* Schedule events */ for (prio = 0; prio < NUM_PRIO; prio++) { @@ -785,14 +830,14 @@ static inline int do_schedule_grp(odp_queue_t *out_queue, odp_event_t out_ev[], /* Select the first ring based on weights */ id = first; - for (i = 0; i < QUEUES_PER_PRIO;) { + for (i = 0; i < num_spread;) { int num; int ordered; odp_queue_t handle; ring_t *ring; int pktin; - if (id >= QUEUES_PER_PRIO) + if (id >= num_spread) id = 0; /* No queues created for this priority queue */ @@ -805,7 +850,7 @@ static inline int do_schedule_grp(odp_queue_t *out_queue, odp_event_t out_ev[], /* Get queue index from the priority queue */ ring = &sched->prio_q[grp][prio][id].ring; - qi = ring_deq(ring, RING_MASK); + qi = ring_deq(ring, ring_mask); /* Priority queue empty */ if (qi == RING_EMPTY) { @@ -854,7 +899,7 @@ static inline int do_schedule_grp(odp_queue_t *out_queue, odp_event_t out_ev[], continue; if (num_pkt == 0 || !stash) { - ring_enq(ring, RING_MASK, qi); + ring_enq(ring, ring_mask, qi); break; } @@ -880,14 +925,14 @@ static inline int do_schedule_grp(odp_queue_t *out_queue, odp_event_t out_ev[], sched_local.ordered.src_queue = qi; /* Continue scheduling ordered queues */ - ring_enq(ring, RING_MASK, qi); + ring_enq(ring, ring_mask, qi); } else if (queue_is_atomic(qi)) { /* Hold queue during atomic access */ sched_local.stash_qi = qi; } else { /* Continue scheduling the queue */ - ring_enq(ring, RING_MASK, qi); + ring_enq(ring, ring_mask, qi); } handle = queue_from_index(qi); |