aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPetri Savolainen <petri.savolainen@linaro.org>2018-03-09 16:49:48 +0200
committerMaxim Uvarov <maxim.uvarov@linaro.org>2018-03-22 05:57:49 +0300
commitbcd246e3a9948a631eb8eb77886ac1b369b13dba (patch)
tree5b521cef08b4dc5fe224428683c3c44a2379eb92
parentc57da8ddc80e0e314957c6d11db288ef11fc1805 (diff)
linux-gen: sched: configurable priority spread
Use configuration file to enable user to change priority queue spreading. Signed-off-by: Petri Savolainen <petri.savolainen@linaro.org> Reviewed-by: Balasubramanian Manoharan <bala.manoharan@linaro.org> Reviewed-by: Dmitry Eremin-Solenikov <dmitry.ereminsolenikov@linaro.org> Signed-off-by: Maxim Uvarov <maxim.uvarov@linaro.org>
-rw-r--r--config/odp-linux-generic.conf9
-rw-r--r--platform/linux-generic/odp_schedule_basic.c167
2 files changed, 115 insertions, 61 deletions
diff --git a/config/odp-linux-generic.conf b/config/odp-linux-generic.conf
index 916058866..0034c64ba 100644
--- a/config/odp-linux-generic.conf
+++ b/config/odp-linux-generic.conf
@@ -38,3 +38,12 @@ queue_basic: {
# Default queue size. Value must be a power of two.
default_queue_size = 4096
}
+
+sched_basic: {
+ # Priority level spread. Each priority level is spread into multiple
+ # scheduler internal queues. A higher spread value typically improves
+ # parallelism and thus is better for high thread counts, but causes
+ # uneven service level for low thread counts. Typically, optimal
+ # value is the number of threads using the scheduler.
+ prio_spread = 4
+}
diff --git a/platform/linux-generic/odp_schedule_basic.c b/platform/linux-generic/odp_schedule_basic.c
index cd20b39dd..e6d28c6df 100644
--- a/platform/linux-generic/odp_schedule_basic.c
+++ b/platform/linux-generic/odp_schedule_basic.c
@@ -27,6 +27,7 @@
#include <odp_timer_internal.h>
#include <odp_queue_internal.h>
#include <odp_buffer_inlines.h>
+#include <odp_libconfig_internal.h>
/* Number of priority levels */
#define NUM_PRIO 8
@@ -41,15 +42,18 @@ ODP_STATIC_ASSERT((ODP_SCHED_PRIO_NORMAL > 0) &&
/* Number of scheduling groups */
#define NUM_SCHED_GRPS 32
-/* Priority queues per priority */
-#define QUEUES_PER_PRIO 4
+/* Maximum priority queue spread */
+#define MAX_SPREAD 4
+
+/* Minimum priority queue spread */
+#define MIN_SPREAD 1
/* A thread polls a non preferred sched queue every this many polls
* of the prefer queue. */
#define PREFER_RATIO 64
/* Size of poll weight table */
-#define WEIGHT_TBL_SIZE ((QUEUES_PER_PRIO - 1) * PREFER_RATIO)
+#define WEIGHT_TBL_SIZE ((MAX_SPREAD - 1) * PREFER_RATIO)
/* Maximum number of packet IO interfaces */
#define NUM_PKTIO ODP_CONFIG_PKTIO_ENTRIES
@@ -60,14 +64,10 @@ ODP_STATIC_ASSERT((ODP_SCHED_PRIO_NORMAL > 0) &&
/* Not a valid index */
#define NULL_INDEX ((uint32_t)-1)
-/* Priority queue ring size. In worst case, all event queues are scheduled
- * queues and have the same priority. The ring size must be larger than or
- * equal to ODP_CONFIG_QUEUES / QUEUES_PER_PRIO, so that it can hold all
- * queues in the worst case. */
-#define PRIO_QUEUE_RING_SIZE (ODP_CONFIG_QUEUES / QUEUES_PER_PRIO)
-
-/* Mask for wrapping around priority queue index */
-#define RING_MASK (PRIO_QUEUE_RING_SIZE - 1)
+/* Maximum priority queue ring size. A ring must be large enough to store all
+ * queues in the worst case (all queues are scheduled, have the same priority
+ * and no spreading). */
+#define MAX_RING_SIZE ODP_CONFIG_QUEUES
/* Priority queue empty, not a valid queue index. */
#define PRIO_QUEUE_EMPTY NULL_INDEX
@@ -76,14 +76,14 @@ ODP_STATIC_ASSERT((ODP_SCHED_PRIO_NORMAL > 0) &&
ODP_STATIC_ASSERT(CHECK_IS_POWER2(ODP_CONFIG_QUEUES),
"Number_of_queues_is_not_power_of_two");
-/* Ring size must be power of two, so that MAX_QUEUE_IDX_MASK can be used. */
-ODP_STATIC_ASSERT(CHECK_IS_POWER2(PRIO_QUEUE_RING_SIZE),
+/* Ring size must be power of two, so that mask can be used. */
+ODP_STATIC_ASSERT(CHECK_IS_POWER2(MAX_RING_SIZE),
"Ring_size_is_not_power_of_two");
/* Mask of queues per priority */
typedef uint8_t pri_mask_t;
-ODP_STATIC_ASSERT((8 * sizeof(pri_mask_t)) >= QUEUES_PER_PRIO,
+ODP_STATIC_ASSERT((8 * sizeof(pri_mask_t)) >= MAX_SPREAD,
"pri_mask_t_is_too_small");
/* Start of named groups in group mask arrays */
@@ -147,7 +147,7 @@ typedef struct ODP_ALIGNED_CACHE {
ring_t ring;
/* Ring data: queue indexes */
- uint32_t queue_index[PRIO_QUEUE_RING_SIZE];
+ uint32_t queue_index[MAX_RING_SIZE];
} prio_queue_t;
@@ -168,14 +168,20 @@ typedef struct {
pri_mask_t pri_mask[NUM_PRIO];
odp_spinlock_t mask_lock;
- prio_queue_t prio_q[NUM_SCHED_GRPS][NUM_PRIO][QUEUES_PER_PRIO];
+ prio_queue_t prio_q[NUM_SCHED_GRPS][NUM_PRIO][MAX_SPREAD];
odp_shm_t shm;
- uint32_t pri_count[NUM_PRIO][QUEUES_PER_PRIO];
+
+ struct {
+ uint8_t num_spread;
+ } config;
+
+ uint32_t pri_count[NUM_PRIO][MAX_SPREAD];
odp_thrmask_t mask_all;
odp_spinlock_t grp_lock;
odp_atomic_u32_t grp_epoch;
+ uint32_t ring_mask;
struct {
char name[ODP_SCHED_GROUP_NAME_LEN];
@@ -186,7 +192,7 @@ typedef struct {
struct {
uint8_t grp;
uint8_t prio;
- uint8_t queue_per_prio;
+ uint8_t spread;
uint8_t sync;
uint8_t order_lock_count;
uint8_t poll_pktin;
@@ -206,8 +212,7 @@ typedef struct {
/* Check that queue[] variables are large enough */
ODP_STATIC_ASSERT(NUM_SCHED_GRPS <= 256, "Group_does_not_fit_8_bits");
ODP_STATIC_ASSERT(NUM_PRIO <= 256, "Prio_does_not_fit_8_bits");
-ODP_STATIC_ASSERT(QUEUES_PER_PRIO <= 256,
- "Queues_per_prio_does_not_fit_8_bits");
+ODP_STATIC_ASSERT(MAX_SPREAD <= 256, "Spread_does_not_fit_8_bits");
ODP_STATIC_ASSERT(CONFIG_QUEUE_MAX_ORD_LOCKS <= 256,
"Ordered_lock_count_does_not_fit_8_bits");
ODP_STATIC_ASSERT(NUM_PKTIO <= 256, "Pktio_index_does_not_fit_8_bits");
@@ -221,11 +226,41 @@ static __thread sched_local_t sched_local;
/* Function prototypes */
static inline void schedule_release_context(void);
+static int read_config_file(sched_global_t *sched)
+{
+ const char *str;
+ int val = 0;
+
+ ODP_PRINT("Scheduler config:\n");
+
+ str = "sched_basic.prio_spread";
+ if (!_odp_libconfig_lookup_int(str, &val)) {
+ ODP_ERR("Config option '%s' not found.\n", str);
+ return -1;
+ }
+
+ if (val > MAX_SPREAD || val < MIN_SPREAD) {
+ ODP_ERR("Bad value %s = %u\n", str, val);
+ return -1;
+ }
+
+ sched->config.num_spread = val;
+ ODP_PRINT(" %s: %i\n\n", str, val);
+
+ return 0;
+}
+
+static inline uint8_t prio_spread_index(uint32_t index)
+{
+ return index % sched->config.num_spread;
+}
+
static void sched_local_init(void)
{
int i;
- uint8_t id;
- uint8_t offset = 0;
+ uint8_t spread;
+ uint8_t num_spread = sched->config.num_spread;
+ uint8_t offset = 1;
memset(&sched_local, 0, sizeof(sched_local_t));
@@ -234,17 +269,17 @@ static void sched_local_init(void)
sched_local.stash_qi = PRIO_QUEUE_EMPTY;
sched_local.ordered.src_queue = NULL_INDEX;
- id = sched_local.thr & (QUEUES_PER_PRIO - 1);
+ spread = prio_spread_index(sched_local.thr);
for (i = 0; i < WEIGHT_TBL_SIZE; i++) {
- sched_local.weight_tbl[i] = id;
+ sched_local.weight_tbl[i] = spread;
- if (i % PREFER_RATIO == 0) {
+ if (num_spread > 1 && (i % PREFER_RATIO) == 0) {
+ sched_local.weight_tbl[i] = prio_spread_index(spread +
+ offset);
offset++;
- sched_local.weight_tbl[i] = (id + offset) &
- (QUEUES_PER_PRIO - 1);
- if (offset == QUEUES_PER_PRIO - 1)
- offset = 0;
+ if (offset == num_spread)
+ offset = 1;
}
}
}
@@ -269,19 +304,24 @@ static int schedule_init_global(void)
memset(sched, 0, sizeof(sched_global_t));
+ if (read_config_file(sched)) {
+ odp_shm_free(shm);
+ return -1;
+ }
+
sched->shm = shm;
odp_spinlock_init(&sched->mask_lock);
for (grp = 0; grp < NUM_SCHED_GRPS; grp++) {
for (i = 0; i < NUM_PRIO; i++) {
- for (j = 0; j < QUEUES_PER_PRIO; j++) {
+ for (j = 0; j < MAX_SPREAD; j++) {
prio_queue_t *prio_q;
int k;
prio_q = &sched->prio_q[grp][i][j];
ring_init(&prio_q->ring);
- for (k = 0; k < PRIO_QUEUE_RING_SIZE; k++) {
+ for (k = 0; k < MAX_RING_SIZE; k++) {
prio_q->queue_index[k] =
PRIO_QUEUE_EMPTY;
}
@@ -322,14 +362,15 @@ static int schedule_term_global(void)
int ret = 0;
int rc = 0;
int i, j, grp;
+ uint32_t ring_mask = sched->ring_mask;
for (grp = 0; grp < NUM_SCHED_GRPS; grp++) {
for (i = 0; i < NUM_PRIO; i++) {
- for (j = 0; j < QUEUES_PER_PRIO; j++) {
+ for (j = 0; j < MAX_SPREAD; j++) {
ring_t *ring = &sched->prio_q[grp][i][j].ring;
uint32_t qi;
- while ((qi = ring_deq(ring, RING_MASK)) !=
+ while ((qi = ring_deq(ring, ring_mask)) !=
RING_EMPTY) {
odp_event_t events[1];
int num;
@@ -413,11 +454,6 @@ static uint32_t schedule_max_ordered_locks(void)
return CONFIG_QUEUE_MAX_ORD_LOCKS;
}
-static inline int queue_per_prio(uint32_t queue_index)
-{
- return ((QUEUES_PER_PRIO - 1) & queue_index);
-}
-
static void pri_set(int id, int prio)
{
odp_spinlock_lock(&sched->mask_lock);
@@ -441,33 +477,39 @@ static void pri_clr(int id, int prio)
static void pri_set_queue(uint32_t queue_index, int prio)
{
- int id = queue_per_prio(queue_index);
+ uint8_t id = prio_spread_index(queue_index);
return pri_set(id, prio);
}
static void pri_clr_queue(uint32_t queue_index, int prio)
{
- int id = queue_per_prio(queue_index);
+ uint8_t id = prio_spread_index(queue_index);
pri_clr(id, prio);
}
static int schedule_init_queue(uint32_t queue_index,
const odp_schedule_param_t *sched_param)
{
+ uint32_t ring_size;
int i;
int prio = sched_param->prio;
pri_set_queue(queue_index, prio);
sched->queue[queue_index].grp = sched_param->group;
sched->queue[queue_index].prio = prio;
- sched->queue[queue_index].queue_per_prio = queue_per_prio(queue_index);
+ sched->queue[queue_index].spread = prio_spread_index(queue_index);
sched->queue[queue_index].sync = sched_param->sync;
sched->queue[queue_index].order_lock_count = sched_param->lock_count;
sched->queue[queue_index].poll_pktin = 0;
sched->queue[queue_index].pktio_index = 0;
sched->queue[queue_index].pktin_index = 0;
+ ring_size = MAX_RING_SIZE / sched->config.num_spread;
+ ring_size = ROUNDUP_POWER2_U32(ring_size);
+ ODP_ASSERT(ring_size <= MAX_RING_SIZE);
+ sched->ring_mask = ring_size - 1;
+
odp_atomic_init_u64(&sched->order[queue_index].ctx, 0);
odp_atomic_init_u64(&sched->order[queue_index].next_ctx, 0);
@@ -492,9 +534,9 @@ static void schedule_destroy_queue(uint32_t queue_index)
int prio = sched->queue[queue_index].prio;
pri_clr_queue(queue_index, prio);
- sched->queue[queue_index].grp = 0;
- sched->queue[queue_index].prio = 0;
- sched->queue[queue_index].queue_per_prio = 0;
+ sched->queue[queue_index].grp = 0;
+ sched->queue[queue_index].prio = 0;
+ sched->queue[queue_index].spread = 0;
if (queue_is_ordered(queue_index) &&
odp_atomic_load_u64(&sched->order[queue_index].ctx) !=
@@ -504,12 +546,12 @@ static void schedule_destroy_queue(uint32_t queue_index)
static int schedule_sched_queue(uint32_t queue_index)
{
- int grp = sched->queue[queue_index].grp;
- int prio = sched->queue[queue_index].prio;
- int queue_per_prio = sched->queue[queue_index].queue_per_prio;
- ring_t *ring = &sched->prio_q[grp][prio][queue_per_prio].ring;
+ int grp = sched->queue[queue_index].grp;
+ int prio = sched->queue[queue_index].prio;
+ int spread = sched->queue[queue_index].spread;
+ ring_t *ring = &sched->prio_q[grp][prio][spread].ring;
- ring_enq(ring, RING_MASK, queue_index);
+ ring_enq(ring, sched->ring_mask, queue_index);
return 0;
}
@@ -540,13 +582,14 @@ static void schedule_release_atomic(void)
uint32_t qi = sched_local.stash_qi;
if (qi != PRIO_QUEUE_EMPTY && sched_local.stash_num == 0) {
- int grp = sched->queue[qi].grp;
- int prio = sched->queue[qi].prio;
- int queue_per_prio = sched->queue[qi].queue_per_prio;
- ring_t *ring = &sched->prio_q[grp][prio][queue_per_prio].ring;
+ int grp = sched->queue[qi].grp;
+ int prio = sched->queue[qi].prio;
+ int spread = sched->queue[qi].spread;
+ ring_t *ring = &sched->prio_q[grp][prio][spread].ring;
/* Release current atomic queue */
- ring_enq(ring, RING_MASK, qi);
+ ring_enq(ring, sched->ring_mask, qi);
+
sched_local.stash_qi = PRIO_QUEUE_EMPTY;
}
}
@@ -773,8 +816,10 @@ static inline int do_schedule_grp(odp_queue_t *out_queue, odp_event_t out_ev[],
int prio, i;
int ret;
int id;
- unsigned int max_deq = MAX_DEQ;
uint32_t qi;
+ unsigned int max_deq = MAX_DEQ;
+ int num_spread = sched->config.num_spread;
+ uint32_t ring_mask = sched->ring_mask;
/* Schedule events */
for (prio = 0; prio < NUM_PRIO; prio++) {
@@ -785,14 +830,14 @@ static inline int do_schedule_grp(odp_queue_t *out_queue, odp_event_t out_ev[],
/* Select the first ring based on weights */
id = first;
- for (i = 0; i < QUEUES_PER_PRIO;) {
+ for (i = 0; i < num_spread;) {
int num;
int ordered;
odp_queue_t handle;
ring_t *ring;
int pktin;
- if (id >= QUEUES_PER_PRIO)
+ if (id >= num_spread)
id = 0;
/* No queues created for this priority queue */
@@ -805,7 +850,7 @@ static inline int do_schedule_grp(odp_queue_t *out_queue, odp_event_t out_ev[],
/* Get queue index from the priority queue */
ring = &sched->prio_q[grp][prio][id].ring;
- qi = ring_deq(ring, RING_MASK);
+ qi = ring_deq(ring, ring_mask);
/* Priority queue empty */
if (qi == RING_EMPTY) {
@@ -854,7 +899,7 @@ static inline int do_schedule_grp(odp_queue_t *out_queue, odp_event_t out_ev[],
continue;
if (num_pkt == 0 || !stash) {
- ring_enq(ring, RING_MASK, qi);
+ ring_enq(ring, ring_mask, qi);
break;
}
@@ -880,14 +925,14 @@ static inline int do_schedule_grp(odp_queue_t *out_queue, odp_event_t out_ev[],
sched_local.ordered.src_queue = qi;
/* Continue scheduling ordered queues */
- ring_enq(ring, RING_MASK, qi);
+ ring_enq(ring, ring_mask, qi);
} else if (queue_is_atomic(qi)) {
/* Hold queue during atomic access */
sched_local.stash_qi = qi;
} else {
/* Continue scheduling the queue */
- ring_enq(ring, RING_MASK, qi);
+ ring_enq(ring, ring_mask, qi);
}
handle = queue_from_index(qi);