diff options
Diffstat (limited to 'test/performance')
39 files changed, 2231 insertions, 184 deletions
diff --git a/test/performance/.gitignore b/test/performance/.gitignore index 08a4d5609..46d9e9c2c 100644 --- a/test/performance/.gitignore +++ b/test/performance/.gitignore @@ -20,6 +20,7 @@ odp_mem_perf odp_packet_gen odp_pktio_ordered odp_pktio_perf +odp_pool_latency odp_pool_perf odp_queue_perf odp_random diff --git a/test/performance/Makefile.am b/test/performance/Makefile.am index 7b0adbe09..356e98a2d 100644 --- a/test/performance/Makefile.am +++ b/test/performance/Makefile.am @@ -12,6 +12,7 @@ EXECUTABLES = odp_atomic_perf \ odp_lock_perf \ odp_mem_perf \ odp_pktio_perf \ + odp_pool_latency \ odp_pool_perf \ odp_queue_perf \ odp_stash_perf \ @@ -81,6 +82,7 @@ odp_sched_latency_SOURCES = odp_sched_latency.c odp_sched_pktio_SOURCES = odp_sched_pktio.c odp_scheduling_SOURCES = odp_scheduling.c odp_pktio_perf_SOURCES = odp_pktio_perf.c +odp_pool_latency_SOURCES = odp_pool_latency.c odp_pool_perf_SOURCES = odp_pool_perf.c odp_queue_perf_SOURCES = odp_queue_perf.c odp_random_SOURCES = odp_random.c @@ -121,5 +123,3 @@ clean-local: rm -f $(builddir)/$$f; \ done \ fi - -.NOTPARALLEL: diff --git a/test/performance/bench_common.c b/test/performance/bench_common.c index f838954ab..640889503 100644 --- a/test/performance/bench_common.c +++ b/test/performance/bench_common.c @@ -2,6 +2,8 @@ * Copyright (c) 2023 Nokia */ +/** @cond _ODP_HIDE_FROM_DOXYGEN_ */ + #include <odp_api.h> #include <odp/helper/odph_api.h> diff --git a/test/performance/bench_common.h b/test/performance/bench_common.h index bd611878d..4b59c941f 100644 --- a/test/performance/bench_common.h +++ b/test/performance/bench_common.h @@ -2,6 +2,8 @@ * Copyright (c) 2023 Nokia */ +/** @cond _ODP_HIDE_FROM_DOXYGEN_ */ + #ifndef BENCH_COMMON_H #define BENCH_COMMON_H diff --git a/test/performance/dummy_crc.h b/test/performance/dummy_crc.h index 68928abee..01e6c2433 100644 --- a/test/performance/dummy_crc.h +++ b/test/performance/dummy_crc.h @@ -37,6 +37,8 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/** @cond _ODP_HIDE_FROM_DOXYGEN_ */ + /** * @file * diff --git a/test/performance/odp_atomic_perf.c b/test/performance/odp_atomic_perf.c index 28217f5d7..e665081a2 100644 --- a/test/performance/odp_atomic_perf.c +++ b/test/performance/odp_atomic_perf.c @@ -5,6 +5,14 @@ * SPDX-License-Identifier: BSD-3-Clause */ +/** + * @example odp_atomic_perf.c + * + * Performance test application for atomic operation APIs + * + * @cond _ODP_HIDE_FROM_DOXYGEN_ + */ + #include <stdio.h> #include <string.h> #include <stdint.h> @@ -16,7 +24,7 @@ #include <odp/helper/odph_api.h> /* Default number of test rounds */ -#define NUM_ROUNDS 1000000u +#define NUM_ROUNDS 100000u /* Initial value for atomic variables. Supports up to 2 billion * rounds of 32-bit min and max tests. */ diff --git a/test/performance/odp_bench_buffer.c b/test/performance/odp_bench_buffer.c index 8b041eee0..ce14ec8b3 100644 --- a/test/performance/odp_bench_buffer.c +++ b/test/performance/odp_bench_buffer.c @@ -5,6 +5,14 @@ * SPDX-License-Identifier: BSD-3-Clause */ +/** + * @example odp_bench_buffer.c + * + * Microbenchmark application for buffer API functions + * + * @cond _ODP_HIDE_FROM_DOXYGEN_ + */ + #include <odp_api.h> #include <odp/helper/odph_api.h> @@ -26,7 +34,7 @@ #define TEST_REPEAT_COUNT 1000 /** Default number of rounds per test case */ -#define TEST_ROUNDS 1000u +#define TEST_ROUNDS 100u /** Maximum burst size for *_multi operations */ #define TEST_MAX_BURST 64 @@ -81,9 +89,9 @@ typedef struct { /** Array for storing test pool handles */ odp_pool_t pool_tbl[TEST_REPEAT_COUNT]; /** Array for storing test event types */ - odp_event_type_t event_type_tbl[TEST_REPEAT_COUNT]; + odp_event_type_t event_type_tbl[TEST_REPEAT_COUNT * TEST_MAX_BURST]; /** Array for storing test event subtypes */ - odp_event_subtype_t event_subtype_tbl[TEST_REPEAT_COUNT]; + odp_event_subtype_t event_subtype_tbl[TEST_REPEAT_COUNT * TEST_MAX_BURST]; /** CPU mask as string */ char cpumask_str[ODP_CPUMASK_STR_SIZE]; } args_t; @@ -113,14 +121,14 @@ static void allocate_test_buffers(odp_buffer_t buf[], int num) } } -static void alloc_buffers_multi(void) +static void create_buffers(void) { - allocate_test_buffers(gbl_args->buf_tbl, TEST_REPEAT_COUNT * gbl_args->appl.burst_size); + allocate_test_buffers(gbl_args->buf_tbl, TEST_REPEAT_COUNT); } -static void create_buffers(void) +static void create_buffers_multi(void) { - allocate_test_buffers(gbl_args->buf_tbl, TEST_REPEAT_COUNT); + allocate_test_buffers(gbl_args->buf_tbl, TEST_REPEAT_COUNT * gbl_args->appl.burst_size); } static void create_events(void) @@ -176,6 +184,20 @@ static int buffer_from_event(void) return i; } +static int buffer_from_event_multi(void) +{ + odp_buffer_t *buf_tbl = gbl_args->buf_tbl; + odp_event_t *event_tbl = gbl_args->event_tbl; + int burst_size = gbl_args->appl.burst_size; + int i; + + for (i = 0; i < TEST_REPEAT_COUNT; i++) + odp_buffer_from_event_multi(&buf_tbl[i * burst_size], + &event_tbl[i * burst_size], burst_size); + + return i; +} + static int buffer_to_event(void) { odp_buffer_t *buf_tbl = gbl_args->buf_tbl; @@ -188,6 +210,20 @@ static int buffer_to_event(void) return i; } +static int buffer_to_event_multi(void) +{ + odp_buffer_t *buf_tbl = gbl_args->buf_tbl; + odp_event_t *event_tbl = gbl_args->event_tbl; + int burst_size = gbl_args->appl.burst_size; + int i; + + for (i = 0; i < TEST_REPEAT_COUNT; i++) + odp_buffer_to_event_multi(&buf_tbl[i * burst_size], + &event_tbl[i * burst_size], burst_size); + + return i; +} + static int buffer_addr(void) { odp_buffer_t *buf_tbl = gbl_args->buf_tbl; @@ -365,6 +401,36 @@ static int event_types(void) return i; } +static int event_types_multi(void) +{ + odp_event_t *event_tbl = gbl_args->event_tbl; + odp_event_type_t *event_type_tbl = gbl_args->event_type_tbl; + odp_event_subtype_t *event_subtype_tbl = gbl_args->event_subtype_tbl; + int burst_size = gbl_args->appl.burst_size; + int i; + + for (i = 0; i < TEST_REPEAT_COUNT; i++) + odp_event_types_multi(&event_tbl[i * burst_size], + &event_type_tbl[i * burst_size], + &event_subtype_tbl[i * burst_size], burst_size); + + return i; +} + +static int event_types_multi_no_sub(void) +{ + odp_event_t *event_tbl = gbl_args->event_tbl; + odp_event_type_t *event_type_tbl = gbl_args->event_type_tbl; + int burst_size = gbl_args->appl.burst_size; + int i; + + for (i = 0; i < TEST_REPEAT_COUNT; i++) + odp_event_types_multi(&event_tbl[i * burst_size], + &event_type_tbl[i * burst_size], NULL, burst_size); + + return i; +} + static int event_type_multi(void) { odp_event_t *event_tbl = gbl_args->event_tbl; @@ -379,6 +445,45 @@ static int event_type_multi(void) return ret; } +static int event_pool(void) +{ + odp_event_t *event_tbl = gbl_args->event_tbl; + odp_pool_t *pool_tbl = gbl_args->pool_tbl; + int i; + + for (i = 0; i < TEST_REPEAT_COUNT; i++) + pool_tbl[i] = odp_event_pool(event_tbl[i]); + + return i; +} + +static int event_user_area(void) +{ + odp_event_t *event_tbl = gbl_args->event_tbl; + void **ptr_tbl = gbl_args->ptr_tbl; + int i; + + for (i = 0; i < TEST_REPEAT_COUNT; i++) + ptr_tbl[i] = odp_event_user_area(event_tbl[i]); + + return i; +} + +static int event_user_area_and_flag(void) +{ + odp_event_t *event_tbl = gbl_args->event_tbl; + void **ptr_tbl = gbl_args->ptr_tbl; + int ret = 0; + int flag; + + for (int i = 0; i < TEST_REPEAT_COUNT; i++) { + ptr_tbl[i] = odp_event_user_area_and_flag(event_tbl[i], &flag); + ret += flag; + } + + return ret; +} + static int event_is_valid(void) { odp_event_t *event_tbl = gbl_args->event_tbl; @@ -572,7 +677,9 @@ static void print_info(void) */ bench_info_t test_suite[] = { BENCH_INFO(buffer_from_event, create_events, free_buffers, NULL), + BENCH_INFO(buffer_from_event_multi, create_events_multi, free_buffers_multi, NULL), BENCH_INFO(buffer_to_event, create_buffers, free_buffers, NULL), + BENCH_INFO(buffer_to_event_multi, create_buffers_multi, free_buffers_multi, NULL), BENCH_INFO(buffer_addr, create_buffers, free_buffers, NULL), BENCH_INFO(buffer_size, create_buffers, free_buffers, NULL), BENCH_INFO_COND(buffer_user_area, create_buffers, free_buffers, NULL, check_uarea), @@ -580,14 +687,20 @@ bench_info_t test_suite[] = { BENCH_INFO(buffer_alloc, NULL, free_buffers, NULL), BENCH_INFO(buffer_alloc_multi, NULL, free_buffers_multi, NULL), BENCH_INFO(buffer_free, create_buffers, NULL, NULL), - BENCH_INFO(buffer_free_multi, alloc_buffers_multi, NULL, NULL), + BENCH_INFO(buffer_free_multi, create_buffers_multi, NULL, NULL), BENCH_INFO(buffer_alloc_free, NULL, NULL, NULL), BENCH_INFO(buffer_alloc_free_multi, NULL, NULL, NULL), BENCH_INFO(buffer_is_valid, create_buffers, free_buffers, NULL), BENCH_INFO(event_type, create_events, free_buffers, NULL), - BENCH_INFO(event_subtype, create_buffers, free_buffers, NULL), - BENCH_INFO(event_types, create_buffers, free_buffers, NULL), + BENCH_INFO(event_subtype, create_events, free_buffers, NULL), + BENCH_INFO(event_types, create_events, free_buffers, NULL), + BENCH_INFO(event_types_multi, create_events_multi, free_buffers_multi, NULL), + BENCH_INFO(event_types_multi_no_sub, create_events_multi, free_buffers_multi, + "event_types_multi (no sub)"), BENCH_INFO(event_type_multi, create_events_multi, free_buffers_multi, NULL), + BENCH_INFO(event_pool, create_events, free_buffers, NULL), + BENCH_INFO_COND(event_user_area, create_events, free_buffers, NULL, check_uarea), + BENCH_INFO_COND(event_user_area_and_flag, create_events, free_buffers, NULL, check_uarea), BENCH_INFO(event_is_valid, create_events, free_buffers, NULL), BENCH_INFO(event_free, create_events, NULL, NULL), BENCH_INFO(event_free_multi, create_events_multi, NULL, NULL), diff --git a/test/performance/odp_bench_misc.c b/test/performance/odp_bench_misc.c index 64318938a..61afdc398 100644 --- a/test/performance/odp_bench_misc.c +++ b/test/performance/odp_bench_misc.c @@ -4,6 +4,14 @@ * SPDX-License-Identifier: BSD-3-Clause */ +/** + * @example odp_bench_misc.c + * + * Microbenchmark application for miscellaneous API functions + * + * @cond _ODP_HIDE_FROM_DOXYGEN_ + */ + #ifndef _GNU_SOURCE #define _GNU_SOURCE /* Needed for sigaction */ #endif @@ -20,7 +28,7 @@ #include <unistd.h> /* Number of API function calls per test case */ -#define REPEAT_COUNT 1000 +#define REPEAT_COUNT 1024 /* Default number of rounds per test case */ #define ROUNDS 1000u @@ -733,6 +741,42 @@ static int mb_full(void) return i; } +static int prefetch(void) +{ + uint64_t *a1 = gbl_args->a1; + uint32_t index = 0; + int i; + + for (i = 0; i < REPEAT_COUNT; i++) { + odp_prefetch(&a1[index]); + + /* Prefetch every 64B */ + index += 8; + if (odp_unlikely(index >= REPEAT_COUNT)) + index = 0; + } + + return i; +} + +static int prefetch_store(void) +{ + uint64_t *a1 = gbl_args->a1; + uint32_t index = 0; + int i; + + for (i = 0; i < REPEAT_COUNT; i++) { + odp_prefetch_store(&a1[index]); + + /* Prefetch every 64B */ + index += 8; + if (odp_unlikely(index >= REPEAT_COUNT)) + index = 0; + } + + return i; +} + bench_info_t test_suite[] = { BENCH_INFO(time_local, NULL, 0, NULL), BENCH_INFO(time_local_strict, NULL, 0, NULL), @@ -785,6 +829,8 @@ bench_info_t test_suite[] = { BENCH_INFO(mb_release, NULL, 0, NULL), BENCH_INFO(mb_acquire, NULL, 0, NULL), BENCH_INFO(mb_full, NULL, 0, NULL), + BENCH_INFO(prefetch, NULL, 0, NULL), + BENCH_INFO(prefetch_store, NULL, 0, NULL), }; /* Print usage information */ diff --git a/test/performance/odp_bench_packet.c b/test/performance/odp_bench_packet.c index 67b6b9cfc..cb9e3ca03 100644 --- a/test/performance/odp_bench_packet.c +++ b/test/performance/odp_bench_packet.c @@ -6,9 +6,11 @@ */ /** - * @file + * @example odp_bench_packet.c * - * @example odp_bench_packet.c Microbenchmarks for packet functions + * Microbenchmark application for packet API functions + * + * @cond _ODP_HIDE_FROM_DOXYGEN_ */ #include <stdlib.h> @@ -39,7 +41,7 @@ #define TEST_REPEAT_COUNT 1000 /** Number of rounds per test case */ -#define TEST_ROUNDS 10u +#define TEST_ROUNDS 2u /** Maximum burst size for *_multi operations */ #define TEST_MAX_BURST 64 @@ -1094,6 +1096,27 @@ static int packet_user_area_size(void) return ret; } +static int packet_user_flag(void) +{ + int i; + uint32_t ret = 0; + + for (i = 0; i < TEST_REPEAT_COUNT; i++) + ret += !odp_packet_user_flag(gbl_args->pkt_tbl[i]); + + return ret; +} + +static int packet_user_flag_set(void) +{ + int i; + + for (i = 0; i < TEST_REPEAT_COUNT; i++) + odp_packet_user_flag_set(gbl_args->pkt_tbl[i], 1); + + return i; +} + static int packet_l2_ptr(void) { int i; @@ -1512,6 +1535,8 @@ bench_info_t test_suite[] = { BENCH_INFO(packet_user_ptr_set, create_packets, free_packets, NULL), BENCH_INFO(packet_user_area, create_packets, free_packets, NULL), BENCH_INFO(packet_user_area_size, create_packets, free_packets, NULL), + BENCH_INFO(packet_user_flag, create_packets, free_packets, NULL), + BENCH_INFO(packet_user_flag_set, create_packets, free_packets, NULL), BENCH_INFO(packet_l2_ptr, create_packets, free_packets, NULL), BENCH_INFO(packet_l2_offset, create_packets, free_packets, NULL), BENCH_INFO(packet_l2_offset_set, create_packets, free_packets, NULL), diff --git a/test/performance/odp_bench_pktio_sp.c b/test/performance/odp_bench_pktio_sp.c index 65d85a062..017e7565f 100644 --- a/test/performance/odp_bench_pktio_sp.c +++ b/test/performance/odp_bench_pktio_sp.c @@ -2,6 +2,14 @@ * Copyright (c) 2023 Nokia */ +/** + * @example odp_bench_pktio_sp.c + * + * Microbenchmark application for packet IO slow path functions + * + * @cond _ODP_HIDE_FROM_DOXYGEN_ + */ + #ifndef _GNU_SOURCE #define _GNU_SOURCE /* Needed for sigaction */ #endif diff --git a/test/performance/odp_bench_timer.c b/test/performance/odp_bench_timer.c index a53671460..65c7a9168 100644 --- a/test/performance/odp_bench_timer.c +++ b/test/performance/odp_bench_timer.c @@ -4,6 +4,14 @@ * SPDX-License-Identifier: BSD-3-Clause */ +/** + * @example odp_bench_timer.c + * + * Microbenchmark application for timer API functions + * + * @cond _ODP_HIDE_FROM_DOXYGEN_ + */ + #ifndef _GNU_SOURCE #define _GNU_SOURCE /* Needed for sigaction */ #endif @@ -695,8 +703,12 @@ exit: if (gbl_args->pool != ODP_POOL_INVALID) odp_pool_destroy(gbl_args->pool); - if (gbl_args->timer != ODP_TIMER_INVALID) - odp_timer_free(gbl_args->timer); + if (gbl_args->timer != ODP_TIMER_INVALID) { + if (odp_timer_free(gbl_args->timer)) { + ODPH_ERR("Timer free failed\n"); + exit(EXIT_FAILURE); + } + } if (gbl_args->timer_pool != ODP_TIMER_POOL_INVALID) odp_timer_pool_destroy(gbl_args->timer_pool); diff --git a/test/performance/odp_cpu_bench.c b/test/performance/odp_cpu_bench.c index 7ef12dc30..39eff620d 100644 --- a/test/performance/odp_cpu_bench.c +++ b/test/performance/odp_cpu_bench.c @@ -4,6 +4,14 @@ * SPDX-License-Identifier: BSD-3-Clause */ +/** + * @example odp_cpu_bench.c + * + * Application for CPU stress testing + * + * @cond _ODP_HIDE_FROM_DOXYGEN_ + */ + #include <odp_api.h> #include <odp/helper/odph_api.h> @@ -327,7 +335,7 @@ static int run_thread(void *arg) odp_event_t ev; ev = odp_schedule(NULL, - odp_schedule_wait_time(ODP_TIME_SEC_IN_NS)); + odp_schedule_wait_time(100 * ODP_TIME_MSEC_IN_NS)); if (ev == ODP_EVENT_INVALID) break; diff --git a/test/performance/odp_crc.c b/test/performance/odp_crc.c index 89e8af837..89e2e971f 100644 --- a/test/performance/odp_crc.c +++ b/test/performance/odp_crc.c @@ -5,6 +5,14 @@ * SPDX-License-Identifier: BSD-3-Clause */ +/** + * @example odp_crc.c + * + * Performance test application for CRC hash APIs + * + * @cond _ODP_HIDE_FROM_DOXYGEN_ + */ + #include <stdio.h> #include <string.h> #include <stdint.h> diff --git a/test/performance/odp_crypto.c b/test/performance/odp_crypto.c index 93315ce05..a644da5e1 100644 --- a/test/performance/odp_crypto.c +++ b/test/performance/odp_crypto.c @@ -5,6 +5,14 @@ * SPDX-License-Identifier: BSD-3-Clause */ +/** + * @example odp_crypto.c + * + * Performance test application for crypto APIs + * + * @cond _ODP_HIDE_FROM_DOXYGEN_ + */ + #ifndef _GNU_SOURCE #define _GNU_SOURCE #endif /* _GNU_SOURCE */ @@ -1233,7 +1241,7 @@ int main(int argc, char *argv[]) odp_pool_capability_t pool_capa; odp_crypto_capability_t crypto_capa; uint32_t max_seg_len; - unsigned i; + uint32_t i; /* Let helper collect its own arguments (e.g. --odph_proc) */ argc = odph_parse_options(argc, argv); @@ -1366,8 +1374,6 @@ int main(int argc, char *argv[]) run_measure_one_config(&test_run_arg); } } else { - unsigned int i; - for (i = 0; i < ODPH_ARRAY_SIZE(algs_config); i++) { test_run_arg.crypto_alg_config = algs_config + i; run_measure_one_config(&test_run_arg); diff --git a/test/performance/odp_dma_perf.c b/test/performance/odp_dma_perf.c index 21c9c0558..2f4ca490d 100644 --- a/test/performance/odp_dma_perf.c +++ b/test/performance/odp_dma_perf.c @@ -1,14 +1,16 @@ /* SPDX-License-Identifier: BSD-3-Clause - * Copyright (c) 2021-2023 Nokia + * Copyright (c) 2021-2024 Nokia */ /** - * DMA performance tester + * @example odp_dma_perf.c * * This tester application can be used to profile the performance of an ODP DMA implementation. * Tester workflow is simple and consists of issuing as many back-to-back DMA transfers as the * implementation allows and then recording key performance statistics (such as function overhead, * latencies etc.). + * + * @cond _ODP_HIDE_FROM_DOXYGEN_ */ #ifndef _GNU_SOURCE @@ -107,7 +109,7 @@ typedef struct { odp_dma_transfer_param_t trs_param; odp_dma_compl_param_t compl_param; odp_ticketlock_t lock; - uint64_t trs_start_tm; + odp_time_t trs_start_tm; uint64_t trs_start_cc; uint64_t trs_poll_cnt; odp_bool_t is_running; @@ -202,7 +204,7 @@ typedef struct prog_config_s { uint32_t src_seg_len; uint32_t dst_seg_len; uint32_t num_inflight; - uint32_t time_sec; + double time_sec; uint32_t num_sessions; uint32_t src_cache_size; uint32_t dst_cache_size; @@ -249,14 +251,14 @@ static void init_config(prog_config_t *config) stats = &config->thread_config[i].stats; memset(sd, 0, sizeof(*sd)); - for (uint32_t i = 0U; i < MAX_SEGS; ++i) { - info = &sd->dma.infos[i]; + for (uint32_t j = 0U; j < MAX_SEGS; ++j) { + info = &sd->dma.infos[j]; info->compl_param.transfer_id = ODP_DMA_TRANSFER_ID_INVALID; info->compl_param.event = ODP_EVENT_INVALID; info->compl_param.queue = ODP_QUEUE_INVALID; odp_ticketlock_init(&info->lock); - sd->seg.src_pkt[i] = ODP_PACKET_INVALID; - sd->seg.dst_pkt[i] = ODP_PACKET_INVALID; + sd->seg.src_pkt[j] = ODP_PACKET_INVALID; + sd->seg.dst_pkt[j] = ODP_PACKET_INVALID; } sd->dma.handle = ODP_DMA_INVALID; @@ -597,7 +599,7 @@ static parse_result_t parse_options(int argc, char **argv, prog_config_t *config config->num_inflight = atoi(optarg); break; case 'T': - config->time_sec = atoi(optarg); + config->time_sec = atof(optarg); break; case 'c': config->num_workers = atoi(optarg); @@ -871,20 +873,21 @@ static void free_memory(const sd_t *sd) static void run_transfer(odp_dma_t handle, trs_info_t *info, stats_t *stats, ver_fn_t ver_fn) { - uint64_t start_tm, end_tm, start_cc, end_cc, trs_tm, trs_cc, start_cc_diff; + odp_time_t start_tm, end_tm; + uint64_t start_cc, end_cc, trs_tm, trs_cc; odp_dma_result_t res; int ret; - start_tm = odp_time_local_strict_ns(); + start_tm = odp_time_local_strict(); start_cc = odp_cpu_cycles(); ret = odp_dma_transfer(handle, &info->trs_param, &res); end_cc = odp_cpu_cycles(); - end_tm = odp_time_local_strict_ns(); + end_tm = odp_time_local_strict(); if (odp_unlikely(ret <= 0)) { ++stats->start_errs; } else { - trs_tm = end_tm - start_tm; + trs_tm = odp_time_diff_ns(end_tm, start_tm); stats->max_trs_tm = ODPH_MAX(trs_tm, stats->max_trs_tm); stats->min_trs_tm = ODPH_MIN(trs_tm, stats->min_trs_tm); stats->trs_tm += trs_tm; @@ -893,10 +896,9 @@ static void run_transfer(odp_dma_t handle, trs_info_t *info, stats_t *stats, ver stats->min_trs_cc = ODPH_MIN(trs_cc, stats->min_trs_cc); stats->trs_cc += trs_cc; ++stats->trs_cnt; - start_cc_diff = odp_cpu_cycles_diff(end_cc, start_cc); - stats->max_start_cc = ODPH_MAX(start_cc_diff, stats->max_start_cc); - stats->min_start_cc = ODPH_MIN(start_cc_diff, stats->min_start_cc); - stats->start_cc += start_cc_diff; + stats->max_start_cc = stats->max_trs_cc; + stats->min_start_cc = stats->min_trs_cc; + stats->start_cc += trs_cc; ++stats->start_cnt; if (odp_unlikely(!res.success)) { @@ -967,7 +969,8 @@ static odp_bool_t configure_poll_compl(sd_t *sd) static void poll_transfer(sd_t *sd, trs_info_t *info, stats_t *stats) { - uint64_t start_cc, end_cc, trs_tm, trs_cc, wait_cc, start_tm, start_cc_diff; + uint64_t start_cc, end_cc, trs_tm, trs_cc, wait_cc, start_cc_diff; + odp_time_t start_tm; odp_dma_t handle = sd->dma.handle; odp_dma_result_t res; int ret; @@ -992,7 +995,7 @@ static void poll_transfer(sd_t *sd, trs_info_t *info, stats_t *stats) if (ret == 0) return; - trs_tm = odp_time_global_strict_ns() - info->trs_start_tm; + trs_tm = odp_time_diff_ns(odp_time_global_strict(), info->trs_start_tm); stats->max_trs_tm = ODPH_MAX(trs_tm, stats->max_trs_tm); stats->min_trs_tm = ODPH_MIN(trs_tm, stats->min_trs_tm); stats->trs_tm += trs_tm; @@ -1017,7 +1020,7 @@ static void poll_transfer(sd_t *sd, trs_info_t *info, stats_t *stats) if (sd->prep_trs_fn != NULL) sd->prep_trs_fn(sd, info); - start_tm = odp_time_global_strict_ns(); + start_tm = odp_time_global_strict(); start_cc = odp_cpu_cycles(); ret = odp_dma_transfer_start(handle, &info->trs_param, &info->compl_param); end_cc = odp_cpu_cycles(); @@ -1146,7 +1149,8 @@ static odp_bool_t configure_event_compl(sd_t *sd) static odp_bool_t start_initial_transfers(sd_t *sd) { - uint64_t start_tm, start_cc; + odp_time_t start_tm; + uint64_t start_cc; trs_info_t *info; int ret; @@ -1156,7 +1160,7 @@ static odp_bool_t start_initial_transfers(sd_t *sd) if (sd->prep_trs_fn != NULL) sd->prep_trs_fn(sd, info); - start_tm = odp_time_global_strict_ns(); + start_tm = odp_time_global_strict(); start_cc = odp_cpu_cycles(); ret = odp_dma_transfer_start(sd->dma.handle, &info->trs_param, &info->compl_param); @@ -1174,7 +1178,8 @@ static odp_bool_t start_initial_transfers(sd_t *sd) static void wait_compl_event(sd_t *sd, stats_t *stats) { - uint64_t start_cc, end_cc, wait_cc, trs_tm, trs_cc, start_tm, start_cc_diff; + uint64_t start_cc, end_cc, wait_cc, trs_tm, trs_cc, start_cc_diff; + odp_time_t start_tm; odp_event_t ev; odp_dma_result_t res; trs_info_t *info; @@ -1191,7 +1196,7 @@ static void wait_compl_event(sd_t *sd, stats_t *stats) odp_dma_compl_result(odp_dma_compl_from_event(ev), &res); info = res.user_ptr; - trs_tm = odp_time_global_strict_ns() - info->trs_start_tm; + trs_tm = odp_time_diff_ns(odp_time_global_strict(), info->trs_start_tm); stats->max_trs_tm = ODPH_MAX(trs_tm, stats->max_trs_tm); stats->min_trs_tm = ODPH_MIN(trs_tm, stats->min_trs_tm); stats->trs_tm += trs_tm; @@ -1218,7 +1223,7 @@ static void wait_compl_event(sd_t *sd, stats_t *stats) if (sd->prep_trs_fn != NULL) sd->prep_trs_fn(sd, info); - start_tm = odp_time_global_strict_ns(); + start_tm = odp_time_global_strict(); start_cc = odp_cpu_cycles(); ret = odp_dma_transfer_start(sd->dma.handle, &info->trs_param, &info->compl_param); end_cc = odp_cpu_cycles(); @@ -1241,7 +1246,7 @@ static void drain_compl_events(ODP_UNUSED sd_t *sd) odp_event_t ev; while (true) { - ev = odp_schedule(NULL, odp_schedule_wait_time(ODP_TIME_SEC_IN_NS)); + ev = odp_schedule(NULL, odp_schedule_wait_time(100 * ODP_TIME_MSEC_IN_NS)); if (ev == ODP_EVENT_INVALID) break; @@ -1250,7 +1255,8 @@ static void drain_compl_events(ODP_UNUSED sd_t *sd) static void run_memcpy(trs_info_t *info, stats_t *stats, ver_fn_t ver_fn) { - uint64_t start_tm, end_tm, start_cc, end_cc, trs_tm, trs_cc, start_cc_diff; + odp_time_t start_tm; + uint64_t start_cc, end_cc, trs_tm, trs_cc; const odp_dma_transfer_param_t *param = &info->trs_param; uint32_t tot_len, src_len, dst_len, min_len, len, i = 0U, j = 0U, src_off = 0U, dst_off = 0U, src_rem, dst_rem; @@ -1265,7 +1271,7 @@ static void run_memcpy(trs_info_t *info, stats_t *stats, ver_fn_t ver_fn) dst_len = param->dst_seg->len; min_len = ODPH_MIN(src_len, dst_len); len = min_len; - start_tm = odp_time_local_strict_ns(); + start_tm = odp_time_local_strict(); start_cc = odp_cpu_cycles(); while (tot_len > 0U) { @@ -1295,8 +1301,7 @@ static void run_memcpy(trs_info_t *info, stats_t *stats, ver_fn_t ver_fn) } end_cc = odp_cpu_cycles(); - end_tm = odp_time_local_strict_ns(); - trs_tm = end_tm - start_tm; + trs_tm = odp_time_diff_ns(odp_time_local_strict(), start_tm); stats->max_trs_tm = ODPH_MAX(trs_tm, stats->max_trs_tm); stats->min_trs_tm = ODPH_MIN(trs_tm, stats->min_trs_tm); stats->trs_tm += trs_tm; @@ -1305,10 +1310,9 @@ static void run_memcpy(trs_info_t *info, stats_t *stats, ver_fn_t ver_fn) stats->min_trs_cc = ODPH_MIN(trs_cc, stats->min_trs_cc); stats->trs_cc += trs_cc; ++stats->trs_cnt; - start_cc_diff = odp_cpu_cycles_diff(end_cc, start_cc); - stats->max_start_cc = ODPH_MAX(start_cc_diff, stats->max_start_cc); - stats->min_start_cc = ODPH_MIN(start_cc_diff, stats->min_start_cc); - stats->start_cc += start_cc_diff; + stats->max_start_cc = stats->max_trs_cc; + stats->min_start_cc = stats->min_trs_cc; + stats->start_cc += trs_cc; ++stats->start_cnt; ++stats->completed; @@ -1548,7 +1552,7 @@ static int transfer(void *args) stats_t *stats = &thr_config->stats; test_api_t *api = &prog_conf->api; odp_thrmask_t mask; - uint64_t start_tm, end_tm; + odp_time_t start_tm; odp_barrier_wait(&prog_config->init_barrier); @@ -1562,13 +1566,12 @@ static int transfer(void *args) } } - start_tm = odp_time_local_strict_ns(); + start_tm = odp_time_local_strict(); while (odp_atomic_load_u32(&prog_config->is_running)) api->wait_fn(sd, stats); - end_tm = odp_time_local_strict_ns(); - thr_config->stats.tot_tm = end_tm - start_tm; + thr_config->stats.tot_tm = odp_time_diff_ns(odp_time_local_strict(), start_tm); if (api->drain_fn != NULL) api->drain_fn(sd); @@ -1690,7 +1693,7 @@ static void print_humanised(uint64_t value, const char *type) else if (value > MEGAS) printf("%.2f M%s\n", (double)value / MEGAS, type); else if (value > KILOS) - printf("%.2f K%s\n", (double)value / KILOS, type); + printf("%.2f k%s\n", (double)value / KILOS, type); else printf("%" PRIu64 " %s\n", value, type); } @@ -1700,8 +1703,8 @@ static void print_stats(const prog_config_t *config) const stats_t *stats; uint64_t data_cnt = config->num_in_segs * config->src_seg_len, tot_completed = 0U, tot_tm = 0U, tot_trs_tm = 0U, tot_trs_cc = 0U, tot_trs_cnt = 0U, tot_min_tm = UINT64_MAX, - tot_max_tm = 0U, tot_min_cc = UINT64_MAX, tot_max_cc = 0U, avg_start_cc, avg_wait_cc, - avg_tot_tm; + tot_max_tm = 0U, tot_min_cc = UINT64_MAX, tot_max_cc = 0U, avg_start_cc, avg_wait_cc; + double avg_tot_tm; printf("\n======================\n\n" "DMA performance test done\n\n" @@ -1769,11 +1772,12 @@ static void print_stats(const prog_config_t *config) stats->trs_cnt > 0U ? stats->trs_cc / stats->trs_cnt : 0U, stats->trs_cnt > 0U ? stats->min_trs_cc : 0U, stats->trs_cnt > 0U ? stats->max_trs_cc : 0U); - print_humanised(stats->completed / (stats->tot_tm / ODP_TIME_SEC_IN_NS), + print_humanised(stats->completed / + ((double)stats->tot_tm / ODP_TIME_SEC_IN_NS), "OPS"); printf(" speed: "); print_humanised(stats->completed * data_cnt / - (stats->tot_tm / ODP_TIME_SEC_IN_NS), "B/s"); + ((double)stats->tot_tm / ODP_TIME_SEC_IN_NS), "B/s"); } avg_start_cc = stats->start_cnt > 0U ? stats->start_cc / stats->start_cnt : 0U; @@ -1816,7 +1820,7 @@ static void print_stats(const prog_config_t *config) printf("\n"); } - avg_tot_tm = tot_tm / config->num_workers / ODP_TIME_SEC_IN_NS; + avg_tot_tm = (double)tot_tm / config->num_workers / ODP_TIME_SEC_IN_NS; printf(" total:\n" " average time per transfer: %" PRIu64 " (min: %" PRIu64 ", max: %" PRIu64 ") ns\n" @@ -1913,8 +1917,12 @@ int main(int argc, char **argv) goto out_test; } - if (prog_conf->time_sec) { - sleep(prog_conf->time_sec); + if (prog_conf->time_sec > 0.001) { + struct timespec ts; + + ts.tv_sec = prog_conf->time_sec; + ts.tv_nsec = (prog_conf->time_sec - ts.tv_sec) * ODP_TIME_SEC_IN_NS; + nanosleep(&ts, NULL); odp_atomic_store_u32(&prog_conf->is_running, 0U); } diff --git a/test/performance/odp_dma_perf_run.sh b/test/performance/odp_dma_perf_run.sh index f5d567740..31948e40a 100755 --- a/test/performance/odp_dma_perf_run.sh +++ b/test/performance/odp_dma_perf_run.sh @@ -10,7 +10,7 @@ BIN_NAME=odp_dma_perf SEGC=0 SEGS=1024 INFL=1 -TIME=1 +TIME=0.1 TESTS_RUN=0 check_result() diff --git a/test/performance/odp_dmafwd.c b/test/performance/odp_dmafwd.c index 188a8e358..694973ce0 100644 --- a/test/performance/odp_dmafwd.c +++ b/test/performance/odp_dmafwd.c @@ -3,12 +3,14 @@ */ /** - * DMA forwarder + * @example odp_dmafwd.c * * This tester application can be used to profile the performance of an ODP DMA implementation. * Tester workflow consists of packet reception, copy and forwarding steps. Packets are first * received from configured interfaces after which packets are copied, either with plain SW memory * copy or with DMA offload copy. Finally, copied packets are echoed back to the sender(s). + * + * @cond _ODP_HIDE_FROM_DOXYGEN_ */ #ifndef _GNU_SOURCE @@ -20,6 +22,7 @@ #include <signal.h> #include <stdio.h> #include <unistd.h> +#include <time.h> #include <odp_api.h> #include <odp/helper/odph_api.h> @@ -152,7 +155,7 @@ typedef struct prog_config_s { uint32_t trs_cache_size; uint32_t compl_cache_size; uint32_t stash_cache_size; - uint32_t time_sec; + double time_sec; odp_stash_type_t stash_type; int num_thrs; uint8_t num_ifs; @@ -505,7 +508,7 @@ static parse_result_t parse_options(int argc, char **argv, prog_config_t *config config->cache_size = atoi(optarg); break; case 'T': - config->time_sec = atoi(optarg); + config->time_sec = atof(optarg); break; case 'h': print_usage(&config->dyn_defs); @@ -758,7 +761,7 @@ static void drain_events(thread_config_t *config ODP_UNUSED) transfer_t *trs; while (true) { - ev = odp_schedule(NULL, odp_schedule_wait_time(ODP_TIME_SEC_IN_NS * 2U)); + ev = odp_schedule(NULL, odp_schedule_wait_time(100 * ODP_TIME_MSEC_IN_NS)); if (ev == ODP_EVENT_INVALID) break; @@ -1378,7 +1381,7 @@ int main(int argc, char **argv) odp_init_param_init(&init_param); init_param.mem_model = odph_opts.mem_model; - if (odp_init_global(&odp_instance, NULL, NULL)) { + if (odp_init_global(&odp_instance, &init_param, NULL)) { ODPH_ERR("ODP global init failed, exiting\n"); exit(EXIT_FAILURE); } @@ -1436,8 +1439,12 @@ int main(int argc, char **argv) goto out_test; } - if (prog_conf->time_sec) { - sleep(prog_conf->time_sec); + if (prog_conf->time_sec > 0.001) { + struct timespec ts; + + ts.tv_sec = prog_conf->time_sec; + ts.tv_nsec = (prog_conf->time_sec - ts.tv_sec) * ODP_TIME_SEC_IN_NS; + nanosleep(&ts, NULL); odp_atomic_store_u32(&prog_conf->is_running, 0U); } else { while (odp_atomic_load_u32(&prog_conf->is_running)) diff --git a/test/performance/odp_dmafwd_run.sh b/test/performance/odp_dmafwd_run.sh index fa629bd0c..ebb9b153a 100755 --- a/test/performance/odp_dmafwd_run.sh +++ b/test/performance/odp_dmafwd_run.sh @@ -10,7 +10,7 @@ PERF_TEST_DIR=${TEST_SRC_DIR}/../../${PERF_TEST_DIR} BIN_NAME=odp_dmafwd BATCH=10 -TIME=2 +TIME=0.1 TESTS_RUN=0 check_env() diff --git a/test/performance/odp_ipsec.c b/test/performance/odp_ipsec.c index 8a0bc2989..3ea93ec96 100644 --- a/test/performance/odp_ipsec.c +++ b/test/performance/odp_ipsec.c @@ -6,6 +6,14 @@ * SPDX-License-Identifier: BSD-3-Clause */ +/** + * @example odp_ipsec.c + * + * Performance test application for IPsec APIs + * + * @cond _ODP_HIDE_FROM_DOXYGEN_ + */ + #ifndef _GNU_SOURCE #define _GNU_SOURCE #endif /* _GNU_SOURCE */ @@ -1375,8 +1383,6 @@ int main(int argc, char *argv[]) run_measure_one_config(&cargs, cargs.alg_config); } } else { - unsigned int i; - for (i = 0; i < ODPH_ARRAY_SIZE(algs_config); i++) { if (cargs.ah && algs_config[i].crypto.cipher_alg != diff --git a/test/performance/odp_ipsecfwd.c b/test/performance/odp_ipsecfwd.c index 5c35d67f7..0220cf6ae 100644 --- a/test/performance/odp_ipsecfwd.c +++ b/test/performance/odp_ipsecfwd.c @@ -2,6 +2,15 @@ * Copyright (c) 2022-2023 Nokia */ +/** + * @example odp_ipsecfwd.c + * + * Simple IPsec performance tester application which forwards and processes + * plain and IPsec packets. + * + * @cond _ODP_HIDE_FROM_DOXYGEN_ + */ + #ifndef _GNU_SOURCE #define _GNU_SOURCE #endif diff --git a/test/performance/odp_l2fwd.c b/test/performance/odp_l2fwd.c index 1e9b79db0..b993de4cb 100644 --- a/test/performance/odp_l2fwd.c +++ b/test/performance/odp_l2fwd.c @@ -1,11 +1,19 @@ /* Copyright (c) 2014-2018, Linaro Limited - * Copyright (c) 2019-2023, Nokia + * Copyright (c) 2019-2024, Nokia * Copyright (c) 2020-2021, Marvell * All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ +/** + * @example odp_l2fwd.c + * + * L2 forwarding example application + * + * @cond _ODP_HIDE_FROM_DOXYGEN_ + */ + /* enable strtok */ #ifndef _GNU_SOURCE #define _GNU_SOURCE @@ -81,7 +89,32 @@ static inline int sched_mode(pktin_mode_t in_mode) */ typedef struct { /* Some extra features (e.g. error checks) have been enabled */ - int extra_feat; + uint8_t extra_feat; + + /* Prefetch packet data */ + uint8_t prefetch; + + /* Change destination eth addresses */ + uint8_t dst_change; + + /* Change source eth addresses */ + uint8_t src_change; + + /* Read packet data in uint64_t words */ + uint16_t data_rd; + + /* Check packet errors */ + uint8_t error_check; + + /* Packet copy */ + uint8_t packet_copy; + + /* Checksum offload */ + uint8_t chksum; + + /* Print debug info on every packet */ + uint8_t verbose_pkt; + unsigned int cpu_count; int if_count; /* Number of interfaces to be used */ int addr_count; /* Number of dst addresses to be used */ @@ -93,11 +126,6 @@ typedef struct { int time; /* Time in seconds to run. */ int accuracy; /* Number of seconds to get and print stats */ char *if_str; /* Storage for interface names */ - int dst_change; /* Change destination eth addresses */ - int src_change; /* Change source eth addresses */ - int error_check; /* Check packet errors */ - int packet_copy; /* Packet copy */ - int chksum; /* Checksum offload */ int sched_mode; /* Scheduler mode */ int num_groups; /* Number of scheduling groups */ int group_mode; /* How threads join groups */ @@ -109,11 +137,12 @@ typedef struct { uint32_t num_vec; /* Number of vectors per pool */ uint64_t vec_tmo_ns; /* Vector formation timeout in ns */ uint32_t vec_size; /* Vector size */ - int verbose; /* Verbose output */ - uint32_t packet_len; /* Maximum packet length supported */ - uint32_t seg_len; /* Pool segment length */ + int verbose; /* Verbose output */ + uint32_t packet_len; /* Maximum packet length supported */ + uint32_t seg_len; /* Pool segment length */ int promisc_mode; /* Promiscuous mode enabled */ int flow_aware; /* Flow aware scheduling enabled */ + uint8_t input_ts; /* Packet input timestamping enabled */ int mtu; /* Interface MTU */ int num_prio; odp_schedule_prio_t prio[MAX_PKTIOS]; /* Priority of input queues of an interface */ @@ -131,6 +160,8 @@ typedef union ODP_ALIGNED_CACHE { uint64_t tx_drops; /* Number of failed packet copies */ uint64_t copy_fails; + /* Dummy sum of packet data */ + uint64_t dummy_sum; } s; uint8_t padding[ODP_CACHE_LINE_SIZE]; @@ -251,6 +282,15 @@ static inline int drop_err_pkts(odp_packet_t pkt_tbl[], unsigned num) return dropped; } +static inline void prefetch_data(uint8_t prefetch, odp_packet_t pkt_tbl[], uint32_t num) +{ + if (prefetch == 0) + return; + + for (uint32_t i = 0; i < num; i++) + odp_packet_prefetch(pkt_tbl[i], 0, prefetch * 64); +} + /* * Fill packets' eth addresses according to the destination port * @@ -270,9 +310,6 @@ static inline void fill_eth_addrs(odp_packet_t pkt_tbl[], for (i = 0; i < num; ++i) { pkt = pkt_tbl[i]; - - odp_packet_prefetch(pkt, 0, ODPH_ETHHDR_LEN); - eth = odp_packet_data(pkt); if (gbl_args->appl.src_change) @@ -318,6 +355,57 @@ static inline void chksum_insert(odp_packet_t *pkt_tbl, int pkts) } } +static void print_packets(odp_packet_t *pkt_tbl, int num) +{ + odp_packet_t pkt; + uintptr_t data_ptr; + uint32_t bit, align; + + for (int i = 0; i < num; i++) { + pkt = pkt_tbl[i]; + data_ptr = (uintptr_t)odp_packet_data(pkt); + + for (bit = 0, align = 1; bit < 32; bit++, align *= 2) + if (data_ptr & (0x1 << bit)) + break; + + printf(" Packet data: 0x%" PRIxPTR "\n" + " Packet len: %u\n" + " Packet seg len: %u\n" + " Data align: %u\n" + " Num segments: %i\n" + " Headroom size: %u\n" + " User area size: %u\n\n", + data_ptr, odp_packet_len(pkt), odp_packet_seg_len(pkt), align, + odp_packet_num_segs(pkt), odp_packet_headroom(pkt), + odp_packet_user_area_size(pkt)); + } +} + +static inline void data_rd(odp_packet_t *pkt_tbl, int num, uint16_t rd_words, stats_t *stats) +{ + odp_packet_t pkt; + uint64_t *data; + int i; + uint32_t len, words, j; + uint64_t sum = 0; + + for (i = 0; i < num; i++) { + pkt = pkt_tbl[i]; + data = odp_packet_data(pkt); + len = odp_packet_seg_len(pkt); + + words = rd_words; + if (rd_words * 8 > len) + words = len / 8; + + for (j = 0; j < words; j++) + sum += data[j]; + } + + stats->s.dummy_sum += sum; +} + static inline int copy_packets(odp_packet_t *pkt_tbl, int pkts) { odp_packet_t old_pkt, new_pkt; @@ -343,21 +431,29 @@ static inline int copy_packets(odp_packet_t *pkt_tbl, int pkts) /* * Return number of packets remaining in the pkt_tbl */ -static inline int process_extra_features(odp_packet_t *pkt_tbl, int pkts, - stats_t *stats) +static inline int process_extra_features(const appl_args_t *appl_args, odp_packet_t *pkt_tbl, + int pkts, stats_t *stats) { - if (odp_unlikely(gbl_args->appl.extra_feat)) { - if (gbl_args->appl.packet_copy) { + if (odp_unlikely(appl_args->extra_feat)) { + uint16_t rd_words = appl_args->data_rd; + + if (appl_args->verbose_pkt) + print_packets(pkt_tbl, pkts); + + if (rd_words) + data_rd(pkt_tbl, pkts, rd_words, stats); + + if (appl_args->packet_copy) { int fails; fails = copy_packets(pkt_tbl, pkts); stats->s.copy_fails += fails; } - if (gbl_args->appl.chksum) + if (appl_args->chksum) chksum_insert(pkt_tbl, pkts); - if (gbl_args->appl.error_check) { + if (appl_args->error_check) { int rx_drops; /* Drop packets with errors */ @@ -421,6 +517,7 @@ static int run_worker_sched_mode_vector(void *arg) odp_queue_t tx_queue[MAX_PKTIOS]; thread_args_t *thr_args = arg; stats_t *stats = &thr_args->stats; + const appl_args_t *appl_args = &gbl_args->appl; int use_event_queue = gbl_args->appl.out_mode; pktin_mode_t in_mode = gbl_args->appl.in_mode; @@ -484,7 +581,10 @@ static int run_worker_sched_mode_vector(void *arg) pkts = odp_packet_vector_tbl(pkt_vec, &pkt_tbl); } - pkts = process_extra_features(pkt_tbl, pkts, stats); + prefetch_data(appl_args->prefetch, pkt_tbl, pkts); + + pkts = process_extra_features(appl_args, pkt_tbl, pkts, stats); + if (odp_unlikely(pkts) == 0) { if (pkt_vec != ODP_PACKET_VECTOR_INVALID) odp_packet_vector_free(pkt_vec); @@ -567,6 +667,7 @@ static int run_worker_sched_mode(void *arg) char extra_str[EXTRA_STR_LEN]; thread_args_t *thr_args = arg; stats_t *stats = &thr_args->stats; + const appl_args_t *appl_args = &gbl_args->appl; int use_event_queue = gbl_args->appl.out_mode; pktin_mode_t in_mode = gbl_args->appl.in_mode; @@ -630,7 +731,10 @@ static int run_worker_sched_mode(void *arg) odp_packet_from_event_multi(pkt_tbl, ev_tbl, pkts); - pkts = process_extra_features(pkt_tbl, pkts, stats); + prefetch_data(appl_args->prefetch, pkt_tbl, pkts); + + pkts = process_extra_features(appl_args, pkt_tbl, pkts, stats); + if (odp_unlikely(pkts) == 0) continue; @@ -704,6 +808,7 @@ static int run_worker_plain_queue_mode(void *arg) int pktio = 0; thread_args_t *thr_args = arg; stats_t *stats = &thr_args->stats; + const appl_args_t *appl_args = &gbl_args->appl; int use_event_queue = gbl_args->appl.out_mode; int i; @@ -743,7 +848,10 @@ static int run_worker_plain_queue_mode(void *arg) odp_packet_from_event_multi(pkt_tbl, event, pkts); - pkts = process_extra_features(pkt_tbl, pkts, stats); + prefetch_data(appl_args->prefetch, pkt_tbl, pkts); + + pkts = process_extra_features(appl_args, pkt_tbl, pkts, stats); + if (odp_unlikely(pkts) == 0) continue; @@ -801,6 +909,7 @@ static int run_worker_direct_mode(void *arg) int pktio = 0; thread_args_t *thr_args = arg; stats_t *stats = &thr_args->stats; + const appl_args_t *appl_args = &gbl_args->appl; int use_event_queue = gbl_args->appl.out_mode; thr = odp_thread_id(); @@ -835,7 +944,10 @@ static int run_worker_direct_mode(void *arg) if (odp_unlikely(pkts <= 0)) continue; - pkts = process_extra_features(pkt_tbl, pkts, stats); + prefetch_data(appl_args->prefetch, pkt_tbl, pkts); + + pkts = process_extra_features(appl_args, pkt_tbl, pkts, stats); + if (odp_unlikely(pkts) == 0) continue; @@ -961,6 +1073,14 @@ static int create_pktio(const char *dev, int idx, int num_rx, int num_tx, odp_po odp_pktio_config_init(&config); + if (gbl_args->appl.input_ts) { + if (!pktio_capa.config.pktin.bit.ts_all) { + ODPH_ERR("Packet input timestamping not supported: %s\n", dev); + return -1; + } + config.pktin.bit.ts_all = 1; + } + config.parser.layer = ODP_PROTO_LAYER_NONE; if (gbl_args->appl.error_check || gbl_args->appl.chksum) config.parser.layer = ODP_PROTO_LAYER_ALL; @@ -1495,6 +1615,10 @@ static void usage(char *progname) " -p, --packet_copy 0: Don't copy packet (default)\n" " 1: Create and send copy of the received packet.\n" " Free the original packet.\n" + " -R, --data_rd <num> Number of packet data words (uint64_t) to read from\n" + " every received packet. Number of words is rounded down\n" + " to fit into the first segment of a packet. Default\n" + " is 0.\n" " -y, --pool_per_if Create a packet (and packet vector) pool per interface.\n" " 0: Share a single pool between all interfaces (default)\n" " 1: Create a pool per interface\n" @@ -1511,8 +1635,11 @@ static void usage(char *progname) " -l, --packet_len <len> Maximum length of packets supported (default %d).\n" " -L, --seg_len <len> Packet pool segment length\n" " (default equal to packet length).\n" + " -F, --prefetch <num> Prefetch packet data in 64 byte multiples (default 1).\n" " -f, --flow_aware Enable flow aware scheduling.\n" + " -T, --input_ts Enable packet input timestamping.\n" " -v, --verbose Verbose output.\n" + " -V, --verbose_pkt Print debug information on every received packet.\n" " -h, --help Display help and exit.\n\n" "\n", DEFAULT_VEC_SIZE, DEFAULT_VEC_TMO, POOL_PKT_LEN); } @@ -1550,6 +1677,7 @@ static void parse_args(int argc, char *argv[], appl_args_t *appl_args) {"burst_rx", required_argument, NULL, 'b'}, {"rx_queues", required_argument, NULL, 'q'}, {"packet_copy", required_argument, NULL, 'p'}, + {"data_rd", required_argument, NULL, 'R'}, {"pool_per_if", required_argument, NULL, 'y'}, {"num_pkt", required_argument, NULL, 'n'}, {"num_vec", required_argument, NULL, 'w'}, @@ -1560,13 +1688,17 @@ static void parse_args(int argc, char *argv[], appl_args_t *appl_args) {"promisc_mode", no_argument, NULL, 'P'}, {"packet_len", required_argument, NULL, 'l'}, {"seg_len", required_argument, NULL, 'L'}, + {"prefetch", required_argument, NULL, 'F'}, {"flow_aware", no_argument, NULL, 'f'}, + {"input_ts", no_argument, NULL, 'T'}, {"verbose", no_argument, NULL, 'v'}, + {"verbose_pkt", no_argument, NULL, 'V'}, {"help", no_argument, NULL, 'h'}, {NULL, 0, NULL, 0} }; - static const char *shortopts = "+c:t:a:i:m:o:r:d:s:e:k:g:G:I:b:q:p:y:n:l:L:w:x:z:M:uPfvh"; + static const char *shortopts = "+c:t:a:i:m:o:r:d:s:e:k:g:G:I:" + "b:q:p:R:y:n:l:L:w:x:z:M:F:uPfTvVh"; appl_args->time = 0; /* loop forever if time to run is 0 */ appl_args->accuracy = 1; /* get and print pps stats second */ @@ -1580,6 +1712,7 @@ static void parse_args(int argc, char *argv[], appl_args_t *appl_args) appl_args->burst_rx = 0; appl_args->rx_queues = 0; appl_args->verbose = 0; + appl_args->verbose_pkt = 0; appl_args->chksum = 0; /* don't use checksum offload by default */ appl_args->pool_per_if = 0; appl_args->num_pkt = 0; @@ -1592,7 +1725,10 @@ static void parse_args(int argc, char *argv[], appl_args_t *appl_args) appl_args->vec_size = 0; appl_args->vec_tmo_ns = 0; appl_args->flow_aware = 0; + appl_args->input_ts = 0; appl_args->num_prio = 0; + appl_args->prefetch = 1; + appl_args->data_rd = 0; while (1) { opt = getopt_long(argc, argv, shortopts, longopts, &long_index); @@ -1764,6 +1900,9 @@ static void parse_args(int argc, char *argv[], appl_args_t *appl_args) case 'p': appl_args->packet_copy = atoi(optarg); break; + case 'R': + appl_args->data_rd = atoi(optarg); + break; case 'y': appl_args->pool_per_if = atoi(optarg); break; @@ -1794,12 +1933,21 @@ static void parse_args(int argc, char *argv[], appl_args_t *appl_args) case 'z': appl_args->vec_tmo_ns = atoi(optarg); break; + case 'F': + appl_args->prefetch = atoi(optarg); + break; case 'f': appl_args->flow_aware = 1; break; + case 'T': + appl_args->input_ts = 1; + break; case 'v': appl_args->verbose = 1; break; + case 'V': + appl_args->verbose_pkt = 1; + break; case 'h': usage(argv[0]); exit(EXIT_SUCCESS); @@ -1835,16 +1983,13 @@ static void parse_args(int argc, char *argv[], appl_args_t *appl_args) appl_args->extra_feat = 0; if (appl_args->error_check || appl_args->chksum || - appl_args->packet_copy) + appl_args->packet_copy || appl_args->data_rd || appl_args->verbose_pkt) appl_args->extra_feat = 1; optind = 1; /* reset 'extern optind' from the getopt lib */ } -/* - * Print system and application info - */ -static void print_info(void) +static void print_options(void) { int i; appl_args_t *appl_args = &gbl_args->appl; @@ -1884,16 +2029,19 @@ static void print_info(void) "enabled" : "disabled"); printf("Flow aware: %s\n", appl_args->flow_aware ? "yes" : "no"); + printf("Input TS: %s\n", appl_args->input_ts ? "yes" : "no"); printf("Burst size: %i\n", appl_args->burst_rx); printf("RX queues per IF: %i\n", appl_args->rx_queues); printf("Number of pools: %i\n", appl_args->pool_per_if ? appl_args->if_count : 1); if (appl_args->extra_feat) { - printf("Extra features: %s%s%s\n", + printf("Extra features: %s%s%s%s%s\n", appl_args->error_check ? "error_check " : "", appl_args->chksum ? "chksum " : "", - appl_args->packet_copy ? "packet_copy" : ""); + appl_args->packet_copy ? "packet_copy " : "", + appl_args->data_rd ? "data_rd" : "", + appl_args->verbose_pkt ? "verbose_pkt" : ""); } printf("Num worker threads: %i\n", appl_args->num_workers); @@ -1906,11 +2054,14 @@ static void print_info(void) else printf("group: ODP_SCHED_GROUP_WORKER\n"); - printf("Packets per pool: %u\n", gbl_args->num_pkt); - printf("Packet length: %u\n", gbl_args->pkt_len); - printf("Segment length: %u\n", gbl_args->seg_len); - printf("Vectors per pool: %u\n", gbl_args->vector_num); - printf("Vector size: %u\n", gbl_args->vector_max_size); + printf("Packets per pool: %u\n", appl_args->num_pkt); + printf("Packet length: %u\n", appl_args->packet_len); + printf("Segment length: %u\n", appl_args->seg_len == UINT32_MAX ? 0 : + appl_args->seg_len); + printf("Read data: %u bytes\n", appl_args->data_rd * 8); + printf("Prefetch data %u bytes\n", appl_args->prefetch * 64); + printf("Vectors per pool: %u\n", appl_args->num_vec); + printf("Vector size: %u\n", appl_args->vec_size); printf("Priority per IF: "); for (i = 0; i < appl_args->if_count; i++) @@ -2099,8 +2250,7 @@ int main(int argc, char *argv[]) gbl_args->appl.num_workers = num_workers; - /* Print application information */ - print_info(); + print_options(); for (i = 0; i < num_workers; i++) gbl_args->thread_args[i].thr_idx = i; @@ -2148,6 +2298,12 @@ int main(int argc, char *argv[]) printf("\nWarning: Segment length requested %d configured %d\n", gbl_args->appl.seg_len, seg_len); + if (seg_len < gbl_args->appl.data_rd * 8) { + ODPH_ERR("Requested data read length %u exceeds maximum segment length %u\n", + gbl_args->appl.data_rd * 8, seg_len); + return -1; + } + /* zero means default number of packets */ if (gbl_args->appl.num_pkt == 0) num_pkt = DEFAULT_NUM_PKT; @@ -2170,6 +2326,11 @@ int main(int argc, char *argv[]) gbl_args->pkt_len = pkt_len; gbl_args->seg_len = seg_len; + printf("Resulting pool parameter values:\n"); + printf("Packets per pool: %u\n", num_pkt); + printf("Packet length: %u\n", pkt_len); + printf("Segment length: %u\n", seg_len); + /* Create packet pool */ odp_pool_param_init(¶ms); params.pkt.seg_len = seg_len; @@ -2210,6 +2371,10 @@ int main(int argc, char *argv[]) gbl_args->vector_num = params.vector.num; gbl_args->vector_max_size = params.vector.max_size; + /* Print resulting values */ + printf("Vectors per pool: %u\n", gbl_args->vector_num); + printf("Vector size: %u\n", gbl_args->vector_max_size); + for (i = 0; i < num_vec_pools; i++) { vec_pool_tbl[i] = odp_pool_create("vector pool", ¶ms); @@ -2223,6 +2388,8 @@ int main(int argc, char *argv[]) } } + printf("\n"); + bind_workers(); odp_schedule_config_init(&sched_config); diff --git a/test/performance/odp_lock_perf.c b/test/performance/odp_lock_perf.c index c12f8c950..0f78db3b8 100644 --- a/test/performance/odp_lock_perf.c +++ b/test/performance/odp_lock_perf.c @@ -5,6 +5,14 @@ * SPDX-License-Identifier: BSD-3-Clause */ +/** + * @example odp_lock_perf.c + * + * Performance test application for lock APIs + * + * @cond _ODP_HIDE_FROM_DOXYGEN_ + */ + #include <stdio.h> #include <string.h> #include <stdint.h> diff --git a/test/performance/odp_mem_perf.c b/test/performance/odp_mem_perf.c index 56a3cdf9a..241128b1f 100644 --- a/test/performance/odp_mem_perf.c +++ b/test/performance/odp_mem_perf.c @@ -5,6 +5,14 @@ * SPDX-License-Identifier: BSD-3-Clause */ +/** + * @example odp_mem_perf.c + * + * Test application for measuring memory system bandwidth + * + * @cond _ODP_HIDE_FROM_DOXYGEN_ + */ + #include <stdio.h> #include <string.h> #include <stdint.h> diff --git a/test/performance/odp_packet_gen.c b/test/performance/odp_packet_gen.c index 247ea2eb6..c88535791 100644 --- a/test/performance/odp_packet_gen.c +++ b/test/performance/odp_packet_gen.c @@ -4,6 +4,14 @@ * SPDX-License-Identifier: BSD-3-Clause */ +/** + * @example odp_packet_gen.c + * + * Performance optimized packet generator application + * + * @cond _ODP_HIDE_FROM_DOXYGEN_ + */ + /* enable usleep */ #ifndef _GNU_SOURCE #define _GNU_SOURCE diff --git a/test/performance/odp_pktio_ordered.c b/test/performance/odp_pktio_ordered.c index bd43ad53d..6177a8160 100644 --- a/test/performance/odp_pktio_ordered.c +++ b/test/performance/odp_pktio_ordered.c @@ -5,9 +5,11 @@ */ /** - * @file + * @example odp_pktio_ordered.c * - * @example odp_pktio_ordered.c ODP ordered pktio test application + * Test application for ordered packet IO + * + * @cond _ODP_HIDE_FROM_DOXYGEN_ */ /** enable strtok */ diff --git a/test/performance/odp_pktio_perf.c b/test/performance/odp_pktio_perf.c index 06620fd27..4cfeb50cf 100644 --- a/test/performance/odp_pktio_perf.c +++ b/test/performance/odp_pktio_perf.c @@ -2,14 +2,16 @@ * All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause + */ + +/** + * @example odp_pktio_perf.c * - * ODP Packet IO basic performance test application. - * - * Runs a number of transmit and receive workers on separate cores, the - * transmitters generate packets at a defined rate and the receivers consume - * them. Generated packets are UDP and each packet is marked with a magic - * number in the UDP payload allowing receiver to distinguish them from other - * traffic. + * Packet IO basic performance test application. Runs a number of transmit and + * receive workers on separate cores, the transmitters generate packets at a + * defined rate and the receivers consume them. Generated packets are UDP and + * each packet is marked with a magic number in the UDP payload allowing + * receiver to distinguish them from other traffic. * * Each test iteration runs for a fixed period, at the end of the iteration * it is verified that the number of packets transmitted was as expected and @@ -19,6 +21,7 @@ * determine the maximum rate at which no packet loss occurs. Alternatively * a single packet rate can be specified on the command line. * + * @cond _ODP_HIDE_FROM_DOXYGEN_ */ #include <odp_api.h> diff --git a/test/performance/odp_pool_latency.c b/test/performance/odp_pool_latency.c new file mode 100644 index 000000000..6b964e773 --- /dev/null +++ b/test/performance/odp_pool_latency.c @@ -0,0 +1,1382 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2024 Nokia + */ + +/** + * @example odp_pool_latency.c + * + * Pool latency tester. Allocate from different kind of pools with a varying set of configurations + * and record latencies. + * + * @cond _ODP_HIDE_FROM_DOXYGEN_ + */ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include <inttypes.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> + +#include <odp_api.h> +#include <odp/helper/odph_api.h> + +#define PROG_NAME "odp_pool_latency" +#define DELIMITER "," +#define ALLOC '+' +#define FREE '-' +#define TOP 't' +#define BOTTOM 'b' +#define DELAY 'd' + +enum { + BUFFER = 0U, + PACKET, + TMO, + VECTOR +}; + +enum { + SINGLE = 0U, + MANY +}; + +#define DEF_ALLOC 1U +#define DEF_FREE 1U +#define DEF_DIR TOP +#define DEF_TYPE BUFFER +#define DEF_CNT 32768U +#define DEF_SIZE 1024U +#define DEF_POLICY MANY +#define DEF_ROUNDS 100000U +#define DEF_IGNORE 0U +#define DEF_WORKERS 1U +#define DEF_UA_SIZE 0U + +#define MAX_PATTERN_LEN 32U +#define MAX_WORKERS ((uint32_t)(ODP_THREAD_COUNT_MAX - 1)) +#define MAX_RETRIES 10U + +#define COND_MIN(a, b) ((a) > 0U ? ODPH_MIN((a), (b)) : (b)) +#define UA_DATA 0xAA + +ODP_STATIC_ASSERT(MAX_PATTERN_LEN < UINT8_MAX, "Too long pattern length"); + +typedef struct { + uint32_t num_evs_buf; + uint32_t num_evs_pkt; + uint32_t num_evs_tmo; + uint32_t num_evs_vec; + uint32_t data_size_buf; + uint32_t data_size_pkt; + uint32_t data_size_vec; + uint32_t cache_size_buf; + uint32_t cache_size_pkt; + uint32_t cache_size_tmo; + uint32_t cache_size_vec; +} dynamic_defs_t; + +typedef enum { + PRS_OK, + PRS_NOK, + PRS_TERM +} parse_result_t; + +typedef struct { + uint64_t tot_tm; + uint64_t alloc_tm; + uint64_t max_alloc_tm; + uint64_t min_alloc_tm; + uint64_t max_alloc_rnd; + uint64_t min_alloc_rnd; + uint64_t alloc_cnt; + uint64_t alloc_b_cnt; + uint64_t uarea_tm; + uint64_t max_uarea_tm; + uint64_t min_uarea_tm; + uint64_t max_uarea_rnd; + uint64_t min_uarea_rnd; + uint64_t free_tm; + uint64_t max_free_tm; + uint64_t min_free_tm; + uint64_t max_free_rnd; + uint64_t min_free_rnd; + uint64_t free_b_cnt; + uint64_t reallocs; + uint64_t alloc_errs; + uint64_t pattern_errs; + uint8_t max_alloc_pt; + uint8_t min_alloc_pt; + uint8_t max_uarea_pt; + uint8_t min_uarea_pt; + uint8_t max_free_pt; + uint8_t min_free_pt; +} stats_t; + +typedef struct { + uint32_t val; + uint8_t op; + uint8_t opt; +} alloc_elem_t; + +typedef struct prog_config_s prog_config_t; + +typedef struct ODP_ALIGNED_CACHE { + stats_t stats; + odp_pool_t pool; + void *data; + prog_config_t *prog_config; + odp_shm_t shm; + uint32_t data_size; + uint32_t uarea_size; +} worker_config_t; + +typedef uint32_t (*alloc_fn_t)(worker_config_t *config, void *data, uint32_t idx, uint32_t num, + uint64_t round, uint8_t pattern, odp_bool_t is_saved); +typedef void (*free_fn_t)(void *data, uint32_t idx, uint32_t num, stats_t *stats, + uint64_t round, uint8_t pattern, odp_bool_t is_saved); + +typedef struct prog_config_s { + odph_thread_t thread_tbl[MAX_WORKERS]; + worker_config_t worker_config[MAX_WORKERS]; + alloc_elem_t alloc_elems[MAX_PATTERN_LEN]; + dynamic_defs_t dyn_defs; + odp_instance_t odp_instance; + odp_cpumask_t worker_mask; + odp_barrier_t init_barrier; + odp_barrier_t term_barrier; + alloc_fn_t alloc_fn; + free_fn_t free_fn; + int64_t cache_size; + uint32_t num_data_elems; + uint32_t seg_len; + uint32_t handle_size; + uint32_t num_evs; + uint32_t data_size; + uint32_t num_rounds; + uint32_t num_ignore; + uint32_t num_workers; + uint32_t uarea_size; + uint8_t num_elems; + uint8_t type; + uint8_t policy; +} prog_config_t; + +static prog_config_t *prog_conf; + +static void init_config(prog_config_t *config) +{ + alloc_elem_t *alloc_elem; + odp_pool_capability_t capa; + odp_pool_param_t param; + worker_config_t *worker; + + memset(config, 0, sizeof(*config)); + alloc_elem = &config->alloc_elems[0]; + alloc_elem->val = DEF_ALLOC; + alloc_elem->op = ALLOC; + alloc_elem = &config->alloc_elems[1]; + alloc_elem->val = DEF_FREE; + alloc_elem->op = FREE; + alloc_elem->opt = DEF_DIR; + config->num_elems = 2U; + + if (odp_pool_capability(&capa) == 0) { + config->dyn_defs.num_evs_buf = COND_MIN(capa.buf.max_num, DEF_CNT); + config->dyn_defs.num_evs_pkt = COND_MIN(capa.pkt.max_num, DEF_CNT); + config->dyn_defs.num_evs_tmo = COND_MIN(capa.tmo.max_num, DEF_CNT); + config->dyn_defs.num_evs_vec = COND_MIN(capa.vector.max_num, DEF_CNT); + config->dyn_defs.data_size_buf = COND_MIN(capa.buf.max_size, DEF_SIZE); + config->dyn_defs.data_size_pkt = COND_MIN(capa.pkt.max_len, DEF_SIZE); + config->dyn_defs.data_size_vec = COND_MIN(capa.vector.max_size, DEF_SIZE); + odp_pool_param_init(¶m); + config->dyn_defs.cache_size_buf = param.buf.cache_size; + config->dyn_defs.cache_size_pkt = param.pkt.cache_size; + config->dyn_defs.cache_size_tmo = param.tmo.cache_size; + config->dyn_defs.cache_size_vec = param.vector.cache_size; + } + + config->cache_size = -1; + config->num_rounds = DEF_ROUNDS; + config->num_ignore = DEF_IGNORE; + config->num_workers = DEF_WORKERS; + config->uarea_size = DEF_UA_SIZE; + config->type = DEF_TYPE; + config->policy = DEF_POLICY; + + for (uint32_t i = 0U; i < MAX_WORKERS; ++i) { + worker = &config->worker_config[i]; + worker->stats.min_alloc_tm = UINT64_MAX; + worker->stats.min_uarea_tm = UINT64_MAX; + worker->stats.min_free_tm = UINT64_MAX; + worker->pool = ODP_POOL_INVALID; + worker->shm = ODP_SHM_INVALID; + } +} + +static void parse_burst_pattern(prog_config_t *config, const char *optarg) +{ + char *tmp_str = strdup(optarg), *tmp, op, opt; + uint8_t num_elems = 0U; + alloc_elem_t *elem; + uint32_t val; + int ret; + + if (tmp_str == NULL) + return; + + tmp = strtok(tmp_str, DELIMITER); + + while (tmp && num_elems < MAX_PATTERN_LEN) { + elem = &config->alloc_elems[num_elems]; + ret = sscanf(tmp, "%c%u%c", &op, &val, &opt); + + if (ret == 2 || ret == 3) { + if (op == ALLOC || (op == FREE && (opt == TOP || opt == BOTTOM)) || + op == DELAY) { + if (op == FREE) + elem->opt = opt; + + elem->val = val; + elem->op = op; + ++num_elems; + } + } + + tmp = strtok(NULL, DELIMITER); + } + + free(tmp_str); + config->num_elems = num_elems; +} + +static void print_usage(const dynamic_defs_t *dyn_defs) +{ + printf("\n" + "Pool latency tester. Allocate from different kind of pools with a varying set of\n" + "configurations and record latencies.\n" + "\n" + "Usage: " PROG_NAME " [OPTIONS]\n"); + printf("\n" + " E.g. " PROG_NAME "\n" + " " PROG_NAME " -b %c7" DELIMITER "%c1%c" DELIMITER "%c3" DELIMITER "%c9%c\n", + ALLOC, FREE, TOP, ALLOC, FREE, BOTTOM); + printf(" " PROG_NAME " -b %c10" DELIMITER "%c1000" DELIMITER "%c10%c -t 1 -d 2048 " + "-p 0 -w 64\n", ALLOC, DELAY, FREE, TOP); + printf("\n" + "Optional OPTIONS:\n" + "\n" + " -b, --burst_pattern Burst pattern for allocations, frees and delays per round,\n" + " delimited by '%s', no spaces. Allocations are indicated\n" + " with a '%c' prefix, frees with a '%c' prefix. The location\n" + " of frees are indicated from the top of a previously\n" + " allocated array of events with a '%c' suffix and from the\n" + " bottom with a '%c' suffix. Delays are indicated with a\n" + " '%c' prefix, followed by a delay in nanoseconds.\n" + " Allocations and frees should be equal in the aggregate and\n" + " frees should never outnumber allocations at any instant.\n" + " '%c%u%s%c%u%c' by default. Maximum pattern length is %u.\n" + " -t, --type Pool type. %u by default.\n" + " 0: buffer\n" + " 1: packet\n" + " 2: timeout\n" + " 3: vector\n" + " -e, --event_count Number of events. Defaults:\n" + " buffer: %u\n" + " packet: %u\n" + " timeout: %u\n" + " vector: %u\n" + " -d, --data_size Data size in bytes, ignored in case of timeout pools, with\n" + " vector pools, defines the vector size.\n" + " Defaults:\n" + " buffer: %u\n" + " packet: %u\n" + " vector: %u\n" + " -p, --policy Pool allocation policy. %u by default.\n" + " Policies:\n" + " 0: One pool shared by workers\n" + " 1: One pool per worker\n" + " -r, --round_count Number of rounds to run. %u by default.\n" + " -i, --ignore_rounds Ignore an amount of initial rounds. %u by default.\n" + " -c, --worker_count Number of workers. %u by default.\n" + " -C, --cache_size Maximum cache size for pools. Defaults:\n" + " buffer: %u\n" + " packet: %u\n" + " timeout: %u\n" + " vector: %u\n" + " -w, --write_uarea Write data to allocated event user areas. 0 bytes disables\n" + " user area write. %u by default.\n" + " -h, --help This help.\n" + "\n", DELIMITER, ALLOC, FREE, TOP, BOTTOM, DELAY, ALLOC, DEF_ALLOC, DELIMITER, FREE, + DEF_FREE, DEF_DIR, MAX_PATTERN_LEN, DEF_TYPE, dyn_defs->num_evs_buf, + dyn_defs->num_evs_pkt, dyn_defs->num_evs_tmo, dyn_defs->num_evs_vec, + dyn_defs->data_size_buf, dyn_defs->data_size_pkt, dyn_defs->data_size_vec, + DEF_POLICY, DEF_ROUNDS, DEF_IGNORE, DEF_WORKERS, dyn_defs->cache_size_buf, + dyn_defs->cache_size_pkt, dyn_defs->cache_size_tmo, dyn_defs->cache_size_vec, + DEF_UA_SIZE); +} + +static parse_result_t check_options(prog_config_t *config) +{ + odp_pool_capability_t pool_capa; + uint32_t max_workers, num_pools; + alloc_elem_t *elem; + int64_t num_tot = 0; + odp_shm_capability_t shm_capa; + uint64_t shm_size; + + if (config->type != BUFFER && config->type != PACKET && config->type != TMO && + config->type != VECTOR) { + ODPH_ERR("Invalid pool type: %u\n", config->type); + return PRS_NOK; + } + + if (odp_pool_capability(&pool_capa) < 0) { + ODPH_ERR("Error querying pool capabilities\n"); + return PRS_NOK; + } + + max_workers = ODPH_MIN(MAX_WORKERS, (uint32_t)odp_cpumask_default_worker(NULL, 0)); + + if (config->num_workers == 0U || config->num_workers > max_workers) { + ODPH_ERR("Invalid worker count: %u (min: 1, max: %u)\n", config->num_workers, + max_workers); + return PRS_NOK; + } + + (void)odp_cpumask_default_worker(&config->worker_mask, config->num_workers); + num_pools = config->policy == SINGLE ? 1U : config->num_workers; + + if (config->type == BUFFER) { + if (config->num_evs == 0U) + config->num_evs = config->dyn_defs.num_evs_buf; + + if (config->data_size == 0U) + config->data_size = config->dyn_defs.data_size_buf; + + if (config->cache_size == -1) + config->cache_size = config->dyn_defs.cache_size_buf; + + if (config->num_evs > pool_capa.buf.max_num) { + ODPH_ERR("Invalid event count: %u (max: %u)\n", config->num_evs, + pool_capa.buf.max_num); + return PRS_NOK; + } + + if (config->data_size > pool_capa.buf.max_size) { + ODPH_ERR("Invalid data size: %u (max: %u)\n", config->data_size, + pool_capa.buf.max_size); + return PRS_NOK; + } + + if (config->cache_size < pool_capa.buf.min_cache_size || + config->cache_size > pool_capa.buf.max_cache_size) { + ODPH_ERR("Invalid cache size: %" PRIi64 " (min: %u, max: %u)\n", + config->cache_size, pool_capa.buf.min_cache_size, + pool_capa.buf.max_cache_size); + return PRS_NOK; + } + + if (num_pools > pool_capa.buf.max_pools) { + ODPH_ERR("Invalid pool count: %u (max: %u)\n", num_pools, + pool_capa.buf.max_pools); + return PRS_NOK; + } + + config->handle_size = sizeof(odp_buffer_t); + config->uarea_size = ODPH_MIN(config->uarea_size, pool_capa.buf.max_uarea_size); + } else if (config->type == PACKET) { + if (config->num_evs == 0U) + config->num_evs = config->dyn_defs.num_evs_pkt; + + if (config->data_size == 0U) + config->data_size = config->dyn_defs.data_size_pkt; + + if (config->cache_size == -1) + config->cache_size = config->dyn_defs.cache_size_pkt; + + if (config->num_evs > pool_capa.pkt.max_num) { + ODPH_ERR("Invalid event count: %u (max: %u)\n", config->num_evs, + pool_capa.pkt.max_num); + return PRS_NOK; + } + + if (config->data_size > pool_capa.pkt.max_len) { + ODPH_ERR("Invalid data size: %u (max: %u)\n", config->data_size, + pool_capa.pkt.max_len); + return PRS_NOK; + } + + if (config->cache_size < pool_capa.pkt.min_cache_size || + config->cache_size > pool_capa.pkt.max_cache_size) { + ODPH_ERR("Invalid cache size: %" PRIi64 " (min: %u, max: %u)\n", + config->cache_size, pool_capa.pkt.min_cache_size, + pool_capa.pkt.max_cache_size); + return PRS_NOK; + } + + if (num_pools > pool_capa.pkt.max_pools) { + ODPH_ERR("Invalid pool count: %u (max: %u)\n", num_pools, + pool_capa.pkt.max_pools); + return PRS_NOK; + } + + config->seg_len = pool_capa.pkt.max_seg_len > config->data_size ? + config->data_size : pool_capa.pkt.max_seg_len; + config->handle_size = sizeof(odp_packet_t); + config->uarea_size = ODPH_MIN(config->uarea_size, pool_capa.pkt.max_uarea_size); + } else if (config->type == TMO) { + if (config->num_evs == 0U) + config->num_evs = config->dyn_defs.num_evs_tmo; + + if (config->cache_size == -1) + config->cache_size = config->dyn_defs.cache_size_tmo; + + if (config->num_evs > pool_capa.tmo.max_num) { + ODPH_ERR("Invalid event count: %u (max: %u)\n", config->num_evs, + pool_capa.tmo.max_num); + return PRS_NOK; + } + + if (config->cache_size < pool_capa.tmo.min_cache_size || + config->cache_size > pool_capa.tmo.max_cache_size) { + ODPH_ERR("Invalid cache size: %" PRIi64 " (min: %u, max: %u)\n", + config->cache_size, pool_capa.tmo.min_cache_size, + pool_capa.tmo.max_cache_size); + return PRS_NOK; + } + + if (num_pools > pool_capa.tmo.max_pools) { + ODPH_ERR("Invalid pool count: %u (max: %u)\n", num_pools, + pool_capa.tmo.max_pools); + return PRS_NOK; + } + + config->handle_size = sizeof(odp_timeout_t); + config->uarea_size = ODPH_MIN(config->uarea_size, pool_capa.tmo.max_uarea_size); + } else { + if (config->num_evs == 0U) + config->num_evs = config->dyn_defs.num_evs_vec; + + if (config->data_size == 0U) + config->data_size = config->dyn_defs.data_size_vec; + + if (config->cache_size == -1) + config->cache_size = config->dyn_defs.cache_size_vec; + + if (config->num_evs > pool_capa.vector.max_num) { + ODPH_ERR("Invalid event count: %u (max: %u)\n", config->num_evs, + pool_capa.vector.max_num); + return PRS_NOK; + } + + if (config->data_size > pool_capa.vector.max_size) { + ODPH_ERR("Invalid vector size: %u (max: %u)\n", config->data_size, + pool_capa.vector.max_size); + return PRS_NOK; + } + + if (config->cache_size < pool_capa.vector.min_cache_size || + config->cache_size > pool_capa.vector.max_cache_size) { + ODPH_ERR("Invalid cache size: %" PRIi64 " (min: %u, max: %u)\n", + config->cache_size, pool_capa.vector.min_cache_size, + pool_capa.vector.max_cache_size); + return PRS_NOK; + } + + if (num_pools > pool_capa.vector.max_pools) { + ODPH_ERR("Invalid pool count: %u (max: %u)\n", num_pools, + pool_capa.vector.max_pools); + return PRS_NOK; + } + + config->handle_size = sizeof(odp_packet_vector_t); + config->uarea_size = ODPH_MIN(config->uarea_size, pool_capa.vector.max_uarea_size); + } + + if (config->num_elems == 0U) { + ODPH_ERR("Invalid burst pattern, no elements\n"); + return PRS_NOK; + } + + for (uint8_t i = 0U; i < config->num_elems; ++i) { + elem = &config->alloc_elems[i]; + + if (elem->op == ALLOC) + num_tot += elem->val; + else if (elem->op == FREE) + num_tot -= elem->val; + + if (num_tot < 0) { + ODPH_ERR("Invalid burst pattern, frees exceed allocations " + "instantaneously\n"); + return PRS_NOK; + } + + config->num_data_elems += (elem->op == ALLOC ? elem->val : 0U); + } + + if (num_tot != 0) { + ODPH_ERR("Invalid burst pattern, cumulative sum not zero: %" PRId64 "\n", num_tot); + return PRS_NOK; + } + + if (odp_shm_capability(&shm_capa) < 0) { + ODPH_ERR("Error querying SHM capabilities\n"); + return PRS_NOK; + } + + if (shm_capa.max_blocks < config->num_workers + 1U) { + ODPH_ERR("Invalid amount of SHM blocks: %u (max: %u)\n", config->num_workers + 1U, + shm_capa.max_blocks); + return PRS_NOK; + } + + shm_size = (uint64_t)config->num_data_elems * config->handle_size; + + if (shm_capa.max_size != 0U && shm_size > shm_capa.max_size) { + ODPH_ERR("Invalid total SHM block size: %" PRIu64 " (max: %" PRIu64 ")\n", + shm_size, shm_capa.max_size); + return PRS_NOK; + } + + if (config->policy != SINGLE && config->policy != MANY) { + ODPH_ERR("Invalid pool policy: %u\n", config->policy); + return PRS_NOK; + } + + if (config->num_rounds == 0U) { + ODPH_ERR("Invalid round count: %u (min: 1)\n", config->num_rounds); + return PRS_NOK; + } + + if (config->num_ignore >= config->num_rounds) { + ODPH_ERR("Invalid round ignorance count: %u (max: %u)\n", config->num_ignore, + config->num_rounds - 1U); + return PRS_NOK; + } + + return PRS_OK; +} + +static parse_result_t parse_options(int argc, char **argv, prog_config_t *config) +{ + int opt, long_index; + + static const struct option longopts[] = { + { "burst_pattern", required_argument, NULL, 'b' }, + { "type", required_argument, NULL, 't' }, + { "event_count", required_argument, NULL, 'e' }, + { "data_size", required_argument, NULL, 'd' }, + { "policy", required_argument, NULL, 'p' }, + { "round_count", required_argument, NULL, 'r' }, + { "ignore_rounds", required_argument, NULL, 'i' }, + { "worker_count", required_argument, NULL, 'c' }, + { "cache_size", required_argument, NULL, 'C' }, + { "write_uarea", required_argument, NULL, 'w' }, + { "help", no_argument, NULL, 'h' }, + { NULL, 0, NULL, 0 } + }; + + static const char *shortopts = "b:t:e:d:p:r:i:c:C:w:h"; + + init_config(config); + + while (1) { + opt = getopt_long(argc, argv, shortopts, longopts, &long_index); + + if (opt == -1) + break; + + switch (opt) { + case 'b': + parse_burst_pattern(config, optarg); + break; + case 't': + config->type = atoi(optarg); + break; + case 'e': + config->num_evs = atoi(optarg); + break; + case 'd': + config->data_size = atoi(optarg); + break; + case 'p': + config->policy = atoi(optarg); + break; + case 'r': + config->num_rounds = atoi(optarg); + break; + case 'i': + config->num_ignore = atoi(optarg); + break; + case 'c': + config->num_workers = atoi(optarg); + break; + case 'C': + config->cache_size = atoi(optarg); + break; + case 'w': + config->uarea_size = atoi(optarg); + break; + case 'h': + print_usage(&config->dyn_defs); + return PRS_TERM; + case '?': + default: + print_usage(&config->dyn_defs); + return PRS_NOK; + } + } + + return check_options(config); +} + +static inline void save_alloc_stats(odp_time_t t1, odp_time_t t2, uint32_t num_alloc, + uint64_t round, uint8_t pattern, stats_t *stats) +{ + const uint64_t tm_diff = odp_time_diff_ns(t2, t1); + + stats->alloc_tm += tm_diff; + stats->alloc_cnt += num_alloc; + ++stats->alloc_b_cnt; + + if (tm_diff > stats->max_alloc_tm) { + stats->max_alloc_tm = tm_diff; + stats->max_alloc_rnd = round; + stats->max_alloc_pt = pattern; + } + + if (tm_diff < stats->min_alloc_tm) { + stats->min_alloc_tm = tm_diff; + stats->min_alloc_rnd = round; + stats->min_alloc_pt = pattern; + } +} + +static inline void write_to_uarea(uint8_t *data, uint32_t size) +{ + memset(data, UA_DATA, size); +} + +static inline void save_uarea_stats(odp_time_t t1, odp_time_t t2, uint64_t round, uint8_t pattern, + stats_t *stats) +{ + const uint64_t tm_diff = odp_time_diff_ns(t2, t1); + + stats->uarea_tm += tm_diff; + + if (tm_diff > stats->max_uarea_tm) { + stats->max_uarea_tm = tm_diff; + stats->max_uarea_rnd = round; + stats->max_uarea_pt = pattern; + } + + if (tm_diff < stats->min_uarea_tm) { + stats->min_uarea_tm = tm_diff; + stats->min_uarea_rnd = round; + stats->min_uarea_pt = pattern; + } +} + +static inline void save_free_stats(odp_time_t t1, odp_time_t t2, uint64_t round, uint8_t pattern, + stats_t *stats) +{ + const uint64_t tm_diff = odp_time_diff_ns(t2, t1); + + stats->free_tm += tm_diff; + ++stats->free_b_cnt; + + if (tm_diff > stats->max_free_tm) { + stats->max_free_tm = tm_diff; + stats->max_free_rnd = round; + stats->max_free_pt = pattern; + } + + if (tm_diff < stats->min_free_tm) { + stats->min_free_tm = tm_diff; + stats->min_free_rnd = round; + stats->min_free_pt = pattern; + } + + stats->max_free_tm = ODPH_MAX(tm_diff, stats->max_free_tm); + stats->min_free_tm = ODPH_MIN(tm_diff, stats->min_free_tm); +} + +static uint32_t allocate_buffers(worker_config_t *config, void *data, uint32_t idx, uint32_t num, + uint64_t round, uint8_t pattern, odp_bool_t is_saved) +{ + odp_time_t t1, t2; + odp_pool_t pool = config->pool; + uint32_t retries = MAX_RETRIES; + odp_buffer_t *bufs = &((odp_buffer_t *)data)[idx]; + uint32_t num_alloc, num_tot = 0U; + int ret; + stats_t *stats = &config->stats; + + while (retries-- > 0U && num_tot < num) { + num_alloc = num - num_tot; + t1 = odp_time_local_strict(); + ret = odp_buffer_alloc_multi(pool, &bufs[num_tot], num_alloc); + t2 = odp_time_local_strict(); + + if (odp_unlikely(ret < 0)) { + ++stats->alloc_errs; + break; + } + + if (odp_unlikely((uint32_t)ret < num_alloc)) + ++stats->reallocs; + + num_tot += ret; + + if (odp_likely(is_saved)) + save_alloc_stats(t1, t2, ret, round, pattern, stats); + } + + if (config->uarea_size > 0U) { + t1 = odp_time_local_strict(); + + for (uint32_t i = 0U; i < num_tot; ++i) + write_to_uarea(odp_buffer_user_area(bufs[i]), config->uarea_size); + + t2 = odp_time_local_strict(); + + if (odp_likely(is_saved)) + save_uarea_stats(t1, t2, round, pattern, stats); + } + + return num_tot; +} + +static void free_buffers(void *data, uint32_t idx, uint32_t num, stats_t *stats, uint64_t round, + uint8_t pattern, odp_bool_t is_saved) +{ + odp_time_t t1, t2; + odp_buffer_t *bufs = &((odp_buffer_t *)data)[idx]; + + t1 = odp_time_local_strict(); + odp_buffer_free_multi(bufs, num); + t2 = odp_time_local_strict(); + + if (odp_likely(is_saved)) + save_free_stats(t1, t2, round, pattern, stats); +} + +static uint32_t allocate_packets(worker_config_t *config, void *data, uint32_t idx, uint32_t num, + uint64_t round, uint8_t pattern, odp_bool_t is_saved) +{ + odp_time_t t1, t2; + odp_pool_t pool = config->pool; + uint32_t retries = MAX_RETRIES, data_size = config->data_size; + odp_packet_t *pkts = &((odp_packet_t *)data)[idx]; + uint32_t num_alloc, num_tot = 0U; + int ret; + stats_t *stats = &config->stats; + + while (retries-- > 0U && num_tot < num) { + num_alloc = num - num_tot; + t1 = odp_time_local_strict(); + ret = odp_packet_alloc_multi(pool, data_size, &pkts[num_tot], num_alloc); + t2 = odp_time_local_strict(); + + if (odp_unlikely(ret < 0)) { + ++stats->alloc_errs; + break; + } + + if (odp_unlikely((uint32_t)ret < num_alloc)) + ++stats->reallocs; + + num_tot += ret; + + if (odp_likely(is_saved)) + save_alloc_stats(t1, t2, ret, round, pattern, stats); + } + + if (config->uarea_size > 0U) { + t1 = odp_time_local_strict(); + + for (uint32_t i = 0U; i < num_tot; ++i) + write_to_uarea(odp_packet_user_area(pkts[i]), config->uarea_size); + + t2 = odp_time_local_strict(); + + if (odp_likely(is_saved)) + save_uarea_stats(t1, t2, round, pattern, stats); + } + + return num_tot; +} + +static void free_packets(void *data, uint32_t idx, uint32_t num, stats_t *stats, uint64_t round, + uint8_t pattern, odp_bool_t is_saved) +{ + odp_time_t t1, t2; + odp_packet_t *pkts = &((odp_packet_t *)data)[idx]; + + t1 = odp_time_local_strict(); + odp_packet_free_multi(pkts, num); + t2 = odp_time_local_strict(); + + if (odp_likely(is_saved)) + save_free_stats(t1, t2, round, pattern, stats); +} + +static uint32_t allocate_timeouts(worker_config_t *config, void *data, uint32_t idx, uint32_t num, + uint64_t round, uint8_t pattern, odp_bool_t is_saved) +{ + odp_time_t t1, t2; + odp_pool_t pool = config->pool; + uint32_t retries = MAX_RETRIES; + odp_timeout_t *tmos = &((odp_timeout_t *)data)[idx]; + uint32_t num_alloc, num_tot = 0U; + int ret; + stats_t *stats = &config->stats; + + while (retries-- > 0U && num_tot < num) { + num_alloc = num - num_tot; + t1 = odp_time_local_strict(); + ret = odp_timeout_alloc_multi(pool, &tmos[num_tot], num_alloc); + t2 = odp_time_local_strict(); + + if (odp_unlikely(ret < 0)) { + ++stats->alloc_errs; + break; + } + + if (odp_unlikely((uint32_t)ret < num_alloc)) + ++stats->reallocs; + + num_tot += ret; + + if (odp_likely(is_saved)) + save_alloc_stats(t1, t2, ret, round, pattern, stats); + } + + if (config->uarea_size > 0U) { + t1 = odp_time_local_strict(); + + for (uint32_t i = 0U; i < num_tot; ++i) + write_to_uarea(odp_timeout_user_area(tmos[i]), config->uarea_size); + + t2 = odp_time_local_strict(); + + if (odp_likely(is_saved)) + save_uarea_stats(t1, t2, round, pattern, stats); + } + + return num_tot; +} + +static void free_timeouts(void *data, uint32_t idx, uint32_t num, stats_t *stats, uint64_t round, + uint8_t pattern, odp_bool_t is_saved) +{ + odp_time_t t1, t2; + odp_timeout_t *tmos = &((odp_timeout_t *)data)[idx]; + + t1 = odp_time_local_strict(); + odp_timeout_free_multi(tmos, num); + t2 = odp_time_local_strict(); + + if (odp_likely(is_saved)) + save_free_stats(t1, t2, round, pattern, stats); +} + +static uint32_t allocate_vectors(worker_config_t *config, void *data, uint32_t idx, uint32_t num, + uint64_t round, uint8_t pattern, odp_bool_t is_saved) +{ + odp_time_t t1, t2; + odp_pool_t pool = config->pool; + uint32_t num_tot = 0U; + odp_packet_vector_t *vecs = &((odp_packet_vector_t *)data)[idx], vec; + stats_t *stats = &config->stats; + + t1 = odp_time_local_strict(); + + for (uint32_t i = 0U; i < num; ++i) { + vec = odp_packet_vector_alloc(pool); + + if (odp_unlikely(vec == ODP_PACKET_VECTOR_INVALID)) + break; + + vecs[num_tot++] = vec; + } + + t2 = odp_time_local_strict(); + + if (odp_unlikely(num_tot == 0)) + ++stats->alloc_errs; + else if (odp_likely(is_saved)) + save_alloc_stats(t1, t2, num_tot, round, pattern, stats); + + if (config->uarea_size > 0U) { + t1 = odp_time_local_strict(); + + for (uint32_t i = 0U; i < num_tot; ++i) + write_to_uarea(odp_packet_vector_user_area(vecs[i]), config->uarea_size); + + t2 = odp_time_local_strict(); + + if (odp_likely(is_saved)) + save_uarea_stats(t1, t2, round, pattern, stats); + } + + return num_tot; +} + +static void free_vectors(void *data, uint32_t idx, uint32_t num, stats_t *stats, uint64_t round, + uint8_t pattern, odp_bool_t is_saved) +{ + odp_time_t t1, t2; + odp_packet_vector_t *vecs = &((odp_packet_vector_t *)data)[idx]; + + t1 = odp_time_local_strict(); + + for (uint32_t i = 0U; i < num; ++i) + odp_packet_vector_free(vecs[i]); + + t2 = odp_time_local_strict(); + + if (odp_likely(is_saved)) + save_free_stats(t1, t2, round, pattern, stats); +} + +static odp_pool_t create_pool(const char *name, const odp_pool_param_t *params, uint8_t policy) +{ + static odp_pool_t pool = ODP_POOL_INVALID; + + if (policy == SINGLE && pool != ODP_POOL_INVALID) + return pool; + + pool = odp_pool_create(name, params); + + return pool; +} + +static odp_bool_t setup_worker_config(prog_config_t *config) +{ + odp_pool_param_t param; + odp_pool_t pool; + worker_config_t *worker; + odp_shm_t shm; + void *data; + + odp_pool_param_init(¶m); + + if (config->type == BUFFER) { + param.type = ODP_POOL_BUFFER; + param.buf.num = config->num_evs; + param.buf.size = config->data_size; + param.buf.uarea_size = config->uarea_size; + param.buf.cache_size = config->cache_size; + config->alloc_fn = allocate_buffers; + config->free_fn = free_buffers; + } else if (config->type == PACKET) { + param.type = ODP_POOL_PACKET; + param.pkt.num = config->num_evs; + param.pkt.len = config->data_size; + param.pkt.seg_len = config->seg_len; + param.pkt.uarea_size = config->uarea_size; + param.pkt.cache_size = config->cache_size; + config->alloc_fn = allocate_packets; + config->free_fn = free_packets; + } else if (config->type == TMO) { + param.type = ODP_POOL_TIMEOUT; + param.tmo.num = config->num_evs; + param.tmo.uarea_size = config->uarea_size; + param.tmo.cache_size = config->cache_size; + config->alloc_fn = allocate_timeouts; + config->free_fn = free_timeouts; + } else { + param.type = ODP_POOL_VECTOR; + param.vector.num = config->num_evs; + param.vector.max_size = config->data_size; + param.vector.uarea_size = config->uarea_size; + param.vector.cache_size = config->cache_size; + config->alloc_fn = allocate_vectors; + config->free_fn = free_vectors; + } + + for (uint32_t i = 0U; i < config->num_workers; ++i) { + pool = create_pool(PROG_NAME "_pool", ¶m, config->policy); + + if (pool == ODP_POOL_INVALID) { + ODPH_ERR("Error creating worker pool\n"); + return false; + } + + shm = odp_shm_reserve(PROG_NAME "_shm", + config->handle_size * config->num_data_elems, + ODP_CACHE_LINE_SIZE, 0U); + + if (shm == ODP_SHM_INVALID) { + ODPH_ERR("Error creating worker SHM\n"); + return false; + } + + data = odp_shm_addr(shm); + + if (data == NULL) { + ODPH_ERR("Error resolving worker SHM\n"); + return false; + } + + worker = &config->worker_config[i]; + worker->pool = pool; + worker->data = data; + worker->prog_config = config; + worker->shm = shm; + worker->data_size = config->data_size; + worker->uarea_size = config->uarea_size; + } + + return true; +} + +static int run_test(void *args) +{ + worker_config_t *config = args; + odp_time_t t1, t2; + uint32_t head_idx, cur_idx, num_ignore = config->prog_config->num_ignore, val, num_alloc, + idx; + odp_bool_t is_saved; + const uint8_t num_elems = config->prog_config->num_elems; + const alloc_elem_t *elems = config->prog_config->alloc_elems, *elem; + uint8_t op; + void *data = config->data; + const alloc_fn_t alloc_fn = config->prog_config->alloc_fn; + stats_t *stats = &config->stats; + const free_fn_t free_fn = config->prog_config->free_fn; + + odp_barrier_wait(&config->prog_config->init_barrier); + t1 = odp_time_local_strict(); + + for (uint32_t i = 0U; i < config->prog_config->num_rounds; ++i) { + head_idx = 0U; + cur_idx = head_idx; + is_saved = (num_ignore > 0U ? num_ignore-- : num_ignore) == 0U; + + for (uint8_t j = 0U; j < num_elems; ++j) { + elem = &elems[j]; + val = elem->val; + op = elem->op; + + if (op == ALLOC) { + num_alloc = alloc_fn(config, data, cur_idx, val, i, j, is_saved); + + if (odp_unlikely(num_alloc < val)) + ++stats->pattern_errs; + + cur_idx += num_alloc; + } else if (op == FREE) { + /* Due to potential pattern errors, there might not be expected + * amount of freeable events. */ + val = ODPH_MIN(val, cur_idx - head_idx); + + if (elem->opt == TOP) { + idx = head_idx; + head_idx += val; + } else { + cur_idx -= val; + idx = cur_idx; + } + + free_fn(data, idx, val, stats, i, j, is_saved); + } else { + odp_time_wait_ns(val); + } + } + } + + t2 = odp_time_local_strict(); + stats->tot_tm = odp_time_diff_ns(t2, t1); + odp_barrier_wait(&config->prog_config->term_barrier); + + return 0; +} + +static odp_bool_t setup_workers(prog_config_t *config) +{ + odph_thread_common_param_t thr_common; + odph_thread_param_t thr_params[config->num_workers], *thr_param; + + odp_barrier_init(&config->init_barrier, config->num_workers + 1); + odp_barrier_init(&config->term_barrier, config->num_workers + 1); + odph_thread_common_param_init(&thr_common); + thr_common.instance = config->odp_instance; + thr_common.cpumask = &config->worker_mask; + + for (uint32_t i = 0; i < config->num_workers; ++i) { + thr_param = &thr_params[i]; + odph_thread_param_init(thr_param); + thr_param->start = run_test; + thr_param->thr_type = ODP_THREAD_WORKER; + thr_param->arg = &config->worker_config[i]; + } + + if ((uint32_t)odph_thread_create(config->thread_tbl, &thr_common, thr_params, + config->num_workers) != config->num_workers) { + ODPH_ERR("Error configuring worker threads\n"); + return false; + } + + odp_barrier_wait(&config->init_barrier); + + return true; +} + +static odp_bool_t setup_test(prog_config_t *config) +{ + return setup_worker_config(config) && setup_workers(config); +} + +static void stop_test(prog_config_t *config) +{ + odp_barrier_wait(&config->term_barrier); + (void)odph_thread_join(config->thread_tbl, config->num_workers); +} + +static void print_stats(const prog_config_t *config) +{ + const alloc_elem_t *elem; + const stats_t *stats; + uint64_t ev_rate, ave_b_alloc_tm, b_alloc_min, b_alloc_max, ave_b_free_tm, b_free_min, + b_free_max, ave_alloc_tm, ave_free_tm, ave_ua_b_tm, b_ua_min, b_ua_max, ave_ua_tm, + tot_b_alloc_tm = 0U, tot_b_free_tm = 0U, tot_alloc_tm = 0U, tot_free_tm = 0U, + tot_alloc_min = 0U, tot_alloc_max = 0U, tot_free_min = 0U, tot_free_max = 0U, + tot_b_ua_tm = 0U, tot_ua_tm = 0U, tot_ua_min = 0U, tot_ua_max = 0U; + + printf("\n==================\n\n" + "Pool latency test done\n\n" + " type: %s\n" + " event count: %u\n", config->type == BUFFER ? "buffer" : + config->type == PACKET ? "packet" : config->type == TMO ? "timeout" : "vector", + config->num_evs); + + if (config->type != TMO) + printf(" %s %u\n", config->type != VECTOR ? "data size: " : "vector size:", + config->data_size); + + printf(" pool policy: %s\n" + " round count: %u\n" + " ignore count: %u\n" + " cache size: %" PRIi64 "\n" + " user area: %u (B)\n" + " burst pattern:\n", config->policy == SINGLE ? "shared" : "per-worker", + config->num_rounds, config->num_ignore, config->cache_size, config->uarea_size); + + for (uint8_t i = 0U; i < config->num_elems; ++i) { + elem = &config->alloc_elems[i]; + printf(" %s %u%s\n", elem->op == ALLOC ? "allocate:" : + elem->op == FREE && elem->opt == TOP ? "free (t):" : + elem->op == FREE && elem->opt == BOTTOM ? "free (b):" : + "delay: ", elem->val, elem->op == DELAY ? " (ns)" : ""); + } + + printf("\n"); + + for (uint32_t i = 0U; i < config->num_workers; ++i) { + stats = &config->worker_config[i].stats; + ev_rate = stats->tot_tm > 0U ? + (double)stats->alloc_cnt / stats->tot_tm * ODP_TIME_SEC_IN_NS : 0U; + ave_b_alloc_tm = stats->alloc_b_cnt > 0U ? + stats->alloc_tm / stats->alloc_b_cnt : 0U; + b_alloc_min = ave_b_alloc_tm > 0U ? stats->min_alloc_tm : 0U; + b_alloc_max = ave_b_alloc_tm > 0U ? stats->max_alloc_tm : 0U; + ave_b_free_tm = stats->free_b_cnt > 0U ? + stats->free_tm / stats->free_b_cnt : 0U; + b_free_min = ave_b_free_tm > 0U ? stats->min_free_tm : 0U; + b_free_max = ave_b_free_tm > 0U ? stats->max_free_tm : 0U; + ave_alloc_tm = stats->alloc_cnt > 0U ? stats->alloc_tm / stats->alloc_cnt : 0U; + ave_free_tm = stats->alloc_cnt > 0U ? stats->free_tm / stats->alloc_cnt : 0U; + + printf(" worker %d:\n" + " significant events allocated/freed: %" PRIu64 "\n" + " allocation retries: %" PRIu64 "\n" + " allocation errors: %" PRIu64 "\n" + " pattern errors: %" PRIu64 "\n" + " run time: %" PRIu64 " (ns)\n" + " event rate %" PRIu64 " (evs/s)\n" + " average latency breakdown (ns):\n" + " per allocation burst: %" PRIu64 " (min: %" PRIu64 " (round: %" + PRIu64 ", pattern: %u), max: %" PRIu64 " (round: %" PRIu64 ", pattern: %u))" + "\n" + " per allocation: %" PRIu64 "\n" + " per free burst: %" PRIu64 " (min: %" PRIu64 " (round: %" + PRIu64 ", pattern: %u), max: %" PRIu64 " (round: %" PRIu64 ", pattern: %u))" + "\n" + " per free: %" PRIu64 "\n", i, stats->alloc_cnt, + stats->reallocs, stats->alloc_errs, stats->pattern_errs, stats->tot_tm, + ev_rate, ave_b_alloc_tm, b_alloc_min, stats->min_alloc_rnd, + stats->min_alloc_pt, b_alloc_max, stats->max_alloc_rnd, stats->max_alloc_pt, + ave_alloc_tm, ave_b_free_tm, b_free_min, stats->min_free_rnd, + stats->min_free_pt, b_free_max, stats->max_free_rnd, stats->max_free_pt, + ave_free_tm); + tot_b_alloc_tm += ave_b_alloc_tm; + tot_b_free_tm += ave_b_free_tm; + tot_alloc_tm += ave_alloc_tm; + tot_free_tm += ave_free_tm; + tot_alloc_min += b_alloc_min; + tot_alloc_max += b_alloc_max; + tot_free_min += b_free_min; + tot_free_max += b_free_max; + + if (config->uarea_size > 0U) { + ave_ua_b_tm = stats->alloc_b_cnt > 0U ? + stats->uarea_tm / stats->alloc_b_cnt : 0U; + ave_ua_tm = stats->alloc_cnt > 0U ? + stats->uarea_tm / stats->alloc_cnt : 0U; + b_ua_min = ave_ua_b_tm > 0U ? stats->min_uarea_tm : 0U; + b_ua_max = ave_ua_b_tm > 0U ? stats->max_uarea_tm : 0U; + printf(" per ua write burst: %" PRIu64 " (min: %" PRIu64 " (" + "round: %" PRIu64 ", pattern: %u), max: %" PRIu64 " (round: %" + PRIu64 ", pattern: %u))\n" + " per ua write: %" PRIu64 "\n", ave_ua_b_tm, + b_ua_min, stats->min_uarea_rnd, stats->min_uarea_pt, b_ua_max, + stats->max_uarea_rnd, stats->max_uarea_pt, ave_ua_tm); + tot_b_ua_tm += ave_ua_b_tm; + tot_ua_tm += ave_ua_tm; + tot_ua_min += b_ua_min; + tot_ua_max += b_ua_max; + } + + printf("\n"); + } + + printf(" total (ns):\n" + " per allocation burst: %" PRIu64 " (min: %" PRIu64 ", max: %" PRIu64 ")\n" + " per allocation: %" PRIu64 "\n" + " per free burst: %" PRIu64 " (min: %" PRIu64 ", max: %" PRIu64 ")\n" + " per free: %" PRIu64 "\n", + tot_b_alloc_tm / config->num_workers, tot_alloc_min / config->num_workers, + tot_alloc_max / config->num_workers, tot_alloc_tm / config->num_workers, + tot_b_free_tm / config->num_workers, tot_free_min / config->num_workers, + tot_free_max / config->num_workers, tot_free_tm / config->num_workers); + + if (config->uarea_size > 0U) { + printf(" per ua write burst: %" PRIu64 " (min: %" PRIu64 ", max: %" + PRIu64 ")\n" + " per ua write: %" PRIu64 "\n", + tot_b_ua_tm / config->num_workers, tot_ua_min / config->num_workers, + tot_ua_max / config->num_workers, tot_ua_tm / config->num_workers); + } + + printf("\n==================\n"); +} + +static void destroy_pool(odp_pool_t pool, uint8_t policy) +{ + static odp_bool_t is_destroyed; + + if (policy == SINGLE && is_destroyed) + return; + + (void)odp_pool_destroy(pool); + is_destroyed = true; +} + +static void teardown(const prog_config_t *config) +{ + const worker_config_t *worker; + + for (uint32_t i = 0U; i < config->num_workers; ++i) { + worker = &config->worker_config[i]; + + if (worker->pool != ODP_POOL_INVALID) + destroy_pool(worker->pool, config->policy); + + if (worker->shm != ODP_SHM_INVALID) + (void)odp_shm_free(worker->shm); + } +} + +int main(int argc, char **argv) +{ + odph_helper_options_t odph_opts; + odp_init_t init_param; + odp_instance_t odp_instance; + odp_shm_t shm_cfg = ODP_SHM_INVALID; + int ret = EXIT_SUCCESS; + parse_result_t parse_res; + + argc = odph_parse_options(argc, argv); + + if (odph_options(&odph_opts) == -1) { + ODPH_ERR("Error while reading ODP helper options, exiting\n"); + exit(EXIT_FAILURE); + } + + odp_init_param_init(&init_param); + init_param.mem_model = odph_opts.mem_model; + + if (odp_init_global(&odp_instance, &init_param, NULL)) { + ODPH_ERR("ODP global init failed, exiting\n"); + exit(EXIT_FAILURE); + } + + if (odp_init_local(odp_instance, ODP_THREAD_CONTROL)) { + ODPH_ERR("ODP local init failed, exiting\n"); + exit(EXIT_FAILURE); + } + + shm_cfg = odp_shm_reserve(PROG_NAME "_cfg", sizeof(prog_config_t), ODP_CACHE_LINE_SIZE, + 0U); + + if (shm_cfg == ODP_SHM_INVALID) { + ODPH_ERR("Error reserving shared memory\n"); + ret = EXIT_FAILURE; + goto out; + } + + prog_conf = odp_shm_addr(shm_cfg); + + if (prog_conf == NULL) { + ODPH_ERR("Error resolving shared memory address\n"); + ret = EXIT_FAILURE; + goto out; + } + + parse_res = parse_options(argc, argv, prog_conf); + + if (parse_res == PRS_NOK) { + ret = EXIT_FAILURE; + goto out; + } + + if (parse_res == PRS_TERM) { + ret = EXIT_SUCCESS; + goto out; + } + + prog_conf->odp_instance = odp_instance; + + if (!setup_test(prog_conf)) { + ret = EXIT_FAILURE; + goto out_test; + } + + stop_test(prog_conf); + print_stats(prog_conf); + +out_test: + teardown(prog_conf); + +out: + if (shm_cfg != ODP_SHM_INVALID) + (void)odp_shm_free(shm_cfg); + + if (odp_term_local()) { + ODPH_ERR("ODP local terminate failed, exiting\n"); + exit(EXIT_FAILURE); + } + + if (odp_term_global(odp_instance)) { + ODPH_ERR("ODP global terminate failed, exiting\n"); + exit(EXIT_FAILURE); + } + + return ret; +} diff --git a/test/performance/odp_pool_perf.c b/test/performance/odp_pool_perf.c index 4ae2cf7d3..43a39a21e 100644 --- a/test/performance/odp_pool_perf.c +++ b/test/performance/odp_pool_perf.c @@ -6,6 +6,14 @@ * SPDX-License-Identifier: BSD-3-Clause */ +/** + * @example odp_pool_perf.c + * + * Performance test application for pool APIs + * + * @cond _ODP_HIDE_FROM_DOXYGEN_ + */ + #include <stdio.h> #include <string.h> #include <stdint.h> diff --git a/test/performance/odp_queue_perf.c b/test/performance/odp_queue_perf.c index 4e4446610..7d4612cb8 100644 --- a/test/performance/odp_queue_perf.c +++ b/test/performance/odp_queue_perf.c @@ -5,6 +5,14 @@ * SPDX-License-Identifier: BSD-3-Clause */ +/** + * @example odp_queue_perf.c + * + * Performance test application for queue APIs + * + * @cond _ODP_HIDE_FROM_DOXYGEN_ + */ + #include <stdio.h> #include <string.h> #include <stdint.h> diff --git a/test/performance/odp_random.c b/test/performance/odp_random.c index 46134ac0c..99714d7b3 100644 --- a/test/performance/odp_random.c +++ b/test/performance/odp_random.c @@ -5,6 +5,14 @@ * SPDX-License-Identifier: BSD-3-Clause */ +/** + * @example odp_random.c + * + * Performance test application for random data APIs + * + * @cond _ODP_HIDE_FROM_DOXYGEN_ + */ + #include <stdio.h> #include <string.h> #include <stdint.h> diff --git a/test/performance/odp_sched_latency.c b/test/performance/odp_sched_latency.c index c8dc74656..0fec49fb9 100644 --- a/test/performance/odp_sched_latency.c +++ b/test/performance/odp_sched_latency.c @@ -6,9 +6,11 @@ */ /** - * @file + * @example odp_sched_latency.c * - * @example odp_sched_latency.c ODP scheduling latency benchmark application + * Scheduling latency benchmark application + * + * @cond _ODP_HIDE_FROM_DOXYGEN_ */ #include <string.h> diff --git a/test/performance/odp_sched_latency_run.sh b/test/performance/odp_sched_latency_run.sh index 372fdb166..b051c1a4e 100755 --- a/test/performance/odp_sched_latency_run.sh +++ b/test/performance/odp_sched_latency_run.sh @@ -19,7 +19,7 @@ run() if [ $(nproc) -lt $1 ]; then echo "Not enough CPU cores. Skipping test." else - $TEST_DIR/odp_sched_latency${EXEEXT} -c $1 || exit $? + $TEST_DIR/odp_sched_latency${EXEEXT} -c $1 -d 1 || exit $? fi } diff --git a/test/performance/odp_sched_perf.c b/test/performance/odp_sched_perf.c index f89705576..47f703338 100644 --- a/test/performance/odp_sched_perf.c +++ b/test/performance/odp_sched_perf.c @@ -1,10 +1,23 @@ /* Copyright (c) 2018, Linaro Limited - * Copyright (c) 2020-2022, Nokia + * Copyright (c) 2020-2024, Nokia * All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ +/** + * @example odp_sched_perf.c + * + * Performance test application for scheduling + * + * @cond _ODP_HIDE_FROM_DOXYGEN_ + */ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE /* Needed for sigaction */ +#endif + +#include <signal.h> #include <stdio.h> #include <string.h> #include <stdint.h> @@ -52,6 +65,9 @@ typedef struct test_options_t { uint32_t ctx_size; uint32_t ctx_rd_words; uint32_t ctx_rw_words; + uint32_t uarea_rd; + uint32_t uarea_rw; + uint32_t uarea_size; uint64_t wait_ns; int verbose; @@ -88,6 +104,7 @@ typedef struct test_global_t { test_stat_t stat[ODP_THREAD_COUNT_MAX]; thread_arg_t thread_arg[ODP_THREAD_COUNT_MAX]; odp_atomic_u32_t num_worker; + odp_atomic_u32_t exit_threads; } test_global_t; @@ -96,6 +113,23 @@ typedef struct { odp_atomic_u64_t count; } queue_context_t; +static test_global_t *test_globals; + +static void sig_handler(int signum ODP_UNUSED) +{ + odp_atomic_store_u32(&test_globals->exit_threads, 1); +} + +static int setup_sig_handler(void) +{ + struct sigaction action = { .sa_handler = sig_handler }; + + if (sigemptyset(&action.sa_mask) || sigaction(SIGINT, &action, NULL)) + return -1; + + return 0; +} + static void print_usage(void) { printf("\n" @@ -111,7 +145,8 @@ static void print_usage(void) " the queues are default (or lowest) priority. Default: 0.\n" " -d, --num_dummy Number of empty queues. Default: 0.\n" " -e, --num_event Number of events per queue. Default: 100.\n" - " -s, --num_sched Number of events to schedule per thread. Default: 100 000.\n" + " -s, --num_sched Number of events to schedule per thread. If zero, the application runs\n" + " until SIGINT is received. Default: 100 000.\n" " -g, --num_group Number of schedule groups. Round robins threads and queues into groups.\n" " -1: SCHED_GROUP_WORKER\n" " 0: SCHED_GROUP_ALL (default)\n" @@ -127,6 +162,8 @@ static void print_usage(void) " -l, --ctx_rw_words Number of queue context words (uint64_t) to modify on every event. Default: 0.\n" " -n, --rd_words Number of event data words (uint64_t) to read before enqueueing it. Default: 0.\n" " -m, --rw_words Number of event data words (uint64_t) to modify before enqueueing it. Default: 0.\n" + " -u, --uarea_rd Number of user area words (uint64_t) to read on every event. Default: 0.\n" + " -U, --uarea_rw Number of user area words (uint64_t) to modify on every event. Default: 0.\n" " -p, --pool_type Pool type. 0: buffer, 1: packet. Default: 0.\n" " -v, --verbose Verbose output.\n" " -h, --help This help\n" @@ -159,13 +196,15 @@ static int parse_options(int argc, char *argv[], test_options_t *test_options) {"ctx_rw_words", required_argument, NULL, 'l'}, {"rd_words", required_argument, NULL, 'n'}, {"rw_words", required_argument, NULL, 'm'}, + {"uarea_rd", required_argument, NULL, 'u'}, + {"uarea_rw", required_argument, NULL, 'U'}, {"pool_type", required_argument, NULL, 'p'}, {"verbose", no_argument, NULL, 'v'}, {"help", no_argument, NULL, 'h'}, {NULL, 0, NULL, 0} }; - static const char *shortopts = "+c:q:L:H:d:e:s:g:j:b:t:f:a:w:k:l:n:m:p:vh"; + static const char *shortopts = "+c:q:L:H:d:e:s:g:j:b:t:f:a:w:k:l:n:m:p:u:U:vh"; test_options->num_cpu = 1; test_options->num_queue = 1; @@ -184,6 +223,8 @@ static int parse_options(int argc, char *argv[], test_options_t *test_options) test_options->ctx_rw_words = 0; test_options->rd_words = 0; test_options->rw_words = 0; + test_options->uarea_rd = 0; + test_options->uarea_rw = 0; test_options->wait_ns = 0; test_options->verbose = 0; @@ -245,6 +286,12 @@ static int parse_options(int argc, char *argv[], test_options_t *test_options) case 'm': test_options->rw_words = atoi(optarg); break; + case 'u': + test_options->uarea_rd = atoi(optarg); + break; + case 'U': + test_options->uarea_rw = atoi(optarg); + break; case 'p': pool_type = atoi(optarg); break; @@ -337,6 +384,7 @@ static int parse_options(int argc, char *argv[], test_options_t *test_options) ctx_size = ROUNDUP(ctx_size, ODP_CACHE_LINE_SIZE); test_options->ctx_size = ctx_size; + test_options->uarea_size = 8 * (test_options->uarea_rd + test_options->uarea_rw); return ret; } @@ -377,7 +425,7 @@ static int create_pool(test_global_t *global) odp_pool_capability_t pool_capa; odp_pool_param_t pool_param; odp_pool_t pool; - uint32_t max_num, max_size; + uint32_t max_num, max_size, max_uarea; test_options_t *test_options = &global->test_options; uint32_t num_cpu = test_options->num_cpu; uint32_t num_queue = test_options->num_queue; @@ -395,6 +443,7 @@ static int create_pool(test_global_t *global) uint32_t event_size = 16; int touch_data = test_options->touch_data; uint32_t ctx_size = test_options->ctx_size; + uint32_t uarea_size = test_options->uarea_size; if (touch_data) { event_size = test_options->rd_words + test_options->rw_words; @@ -426,22 +475,22 @@ static int create_pool(test_global_t *global) printf(" max burst size %u\n", max_burst); printf(" total events %u\n", tot_event); printf(" event size %u bytes", event_size); - if (touch_data) { - printf(" (rd: %u, rw: %u)\n", - 8 * test_options->rd_words, - 8 * test_options->rw_words); - } else { - printf("\n"); - } + if (touch_data) + printf(" (rd: %u, rw: %u)", 8 * test_options->rd_words, 8 * test_options->rw_words); + printf("\n"); printf(" context size %u bytes", ctx_size); if (test_options->ctx_rd_words || test_options->ctx_rw_words) { - printf(" (rd: %u, rw: %u)\n", + printf(" (rd: %u, rw: %u)", 8 * test_options->ctx_rd_words, 8 * test_options->ctx_rw_words); - } else { - printf("\n"); } + printf("\n"); + + printf(" user area size %u bytes", uarea_size); + if (uarea_size) + printf(" (rd: %u, rw: %u)", 8 * test_options->uarea_rd, 8 * test_options->uarea_rw); + printf("\n"); if (odp_pool_capability(&pool_capa)) { ODPH_ERR("Error: pool capa failed\n"); @@ -452,11 +501,12 @@ static int create_pool(test_global_t *global) printf(" pool type buffer\n"); max_num = pool_capa.buf.max_num; max_size = pool_capa.buf.max_size; - + max_uarea = pool_capa.buf.max_uarea_size; } else { printf(" pool type packet\n"); max_num = pool_capa.pkt.max_num; max_size = pool_capa.pkt.max_seg_len; + max_uarea = pool_capa.pkt.max_uarea_size; } if (max_num && tot_event > max_num) { @@ -469,18 +519,25 @@ static int create_pool(test_global_t *global) return -1; } + if (uarea_size > max_uarea) { + ODPH_ERR("Error: max supported user area size %u\n", max_uarea); + return -1; + } + odp_pool_param_init(&pool_param); if (test_options->pool_type == ODP_POOL_BUFFER) { pool_param.type = ODP_POOL_BUFFER; pool_param.buf.num = tot_event; pool_param.buf.size = event_size; pool_param.buf.align = 8; + pool_param.buf.uarea_size = uarea_size; } else { pool_param.type = ODP_POOL_PACKET; pool_param.pkt.num = tot_event; pool_param.pkt.len = event_size; pool_param.pkt.seg_len = event_size; pool_param.pkt.align = 8; + pool_param.pkt.uarea_size = uarea_size; } pool = odp_pool_create("sched perf", &pool_param); @@ -853,6 +910,28 @@ static int destroy_groups(test_global_t *global) return 0; } +static uint64_t rw_uarea(odp_event_t ev[], int num, uint32_t rd_words, uint32_t rw_words) +{ + uint64_t *data; + int i; + uint32_t j; + uint64_t sum = 0; + + for (i = 0; i < num; i++) { + data = odp_event_user_area(ev[i]); + + for (j = 0; j < rd_words; j++) + sum += data[j]; + + for (; j < rd_words + rw_words; j++) { + sum += data[j]; + data[j] += 1; + } + } + + return sum; +} + static inline uint64_t rw_ctx_data(void *ctx, uint32_t offset, uint32_t rd_words, uint32_t rw_words) { @@ -921,12 +1000,17 @@ static int test_sched(void *arg) uint32_t ctx_size = test_options->ctx_size; uint32_t ctx_rd_words = test_options->ctx_rd_words; uint32_t ctx_rw_words = test_options->ctx_rw_words; + const uint32_t uarea_size = test_options->uarea_size; + const uint32_t uarea_rd = test_options->uarea_rd; + const uint32_t uarea_rw = test_options->uarea_rw; odp_pool_type_t pool_type = test_options->pool_type; int touch_ctx = ctx_rd_words || ctx_rw_words; + odp_atomic_u32_t *exit_threads = &global->exit_threads; uint32_t ctx_offset = 0; uint32_t sched_retries = 0; uint64_t data_sum = 0; uint64_t ctx_sum = 0; + uint64_t uarea_sum = 0; uint64_t wait_ns = test_options->wait_ns; odp_event_t ev[max_burst]; @@ -981,7 +1065,10 @@ static int test_sched(void *arg) c1 = odp_cpu_cycles(); last_retry_ts = t1; - for (rounds = 0; events < num_sched; rounds++) { + for (rounds = 0; odp_likely(!odp_atomic_load_u32(exit_threads)); rounds++) { + if (odp_unlikely(num_sched && events >= num_sched)) + break; + num = odp_schedule_multi(&queue, ODP_SCHED_NO_WAIT, ev, max_burst); @@ -990,6 +1077,9 @@ static int test_sched(void *arg) events += num; i = 0; + if (odp_unlikely(uarea_size)) + uarea_sum += rw_uarea(ev, num, uarea_rd, uarea_rw); + if (odp_unlikely(ctx_size)) { queue_context_t *ctx = odp_queue_context(queue); @@ -1077,7 +1167,7 @@ static int test_sched(void *arg) global->stat[thr].nsec = nsec; global->stat[thr].cycles = cycles; global->stat[thr].waits = waits; - global->stat[thr].dummy_sum = data_sum + ctx_sum; + global->stat[thr].dummy_sum = data_sum + ctx_sum + uarea_sum; global->stat[thr].failed = ret; if (odp_atomic_fetch_dec_u32(&global->num_worker) == 1) { @@ -1337,10 +1427,17 @@ int main(int argc, char **argv) ODPH_ERR("Error: SHM alloc failed\n"); exit(EXIT_FAILURE); } + test_globals = global; memset(global, 0, sizeof(test_global_t)); global->pool = ODP_POOL_INVALID; global->ctx_shm = ODP_SHM_INVALID; + odp_atomic_init_u32(&global->exit_threads, 0); + + if (setup_sig_handler()) { + ODPH_ERR("Error: signal handler setup failed\n"); + exit(EXIT_FAILURE); + } if (parse_options(argc, argv, &global->test_options)) return -1; diff --git a/test/performance/odp_sched_pktio.c b/test/performance/odp_sched_pktio.c index 3a85a91a5..d8ab1b279 100644 --- a/test/performance/odp_sched_pktio.c +++ b/test/performance/odp_sched_pktio.c @@ -4,6 +4,14 @@ * SPDX-License-Identifier: BSD-3-Clause */ +/** + * @example odp_sched_pktio.c + * + * Test application for scheduled packet IO + * + * @cond _ODP_HIDE_FROM_DOXYGEN_ + */ + #include <stdio.h> #include <string.h> #include <signal.h> diff --git a/test/performance/odp_scheduling.c b/test/performance/odp_scheduling.c index f9c083c92..c9f3eb89f 100644 --- a/test/performance/odp_scheduling.c +++ b/test/performance/odp_scheduling.c @@ -1,13 +1,14 @@ -/* Copyright (c) 2013-2018, Linaro Limited - * All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2013-2018 Linaro Limited + * Copyright (c) 2019-2023 Nokia */ /** - * @file + * @example odp_scheduling.c + * + * Performance test application for miscellaneous scheduling operations * - * @example odp_example.c ODP example application + * @cond _ODP_HIDE_FROM_DOXYGEN_ */ #include <string.h> @@ -47,6 +48,7 @@ typedef struct { /** Test arguments */ typedef struct { + double test_sec; /**< CPU frequency test duration in seconds */ unsigned int cpu_count; /**< CPU count */ int fairness; /**< Check fairness */ } test_args_t; @@ -685,17 +687,17 @@ static int run_thread(void *arg ODP_UNUSED) /** * @internal Test cycle counter frequency */ -static void test_cpu_freq(void) +static void test_cpu_freq(double test_sec) { odp_time_t cur_time, test_time, start_time, end_time; uint64_t c1, c2, cycles; uint64_t nsec; double diff_max_hz, max_cycles; - printf("\nCPU cycle count frequency test (runs about %i sec)\n", - TEST_SEC); + printf("\nCPU cycle count frequency test (runs about %f sec)\n", + test_sec); - test_time = odp_time_local_from_ns(TEST_SEC * ODP_TIME_SEC_IN_NS); + test_time = odp_time_local_from_ns(test_sec * ODP_TIME_SEC_IN_NS); start_time = odp_time_local(); end_time = odp_time_sum(start_time, test_time); @@ -732,6 +734,7 @@ static void print_usage(void) { printf("\n\nUsage: ./odp_example [options]\n"); printf("Options:\n"); + printf(" -t, --time <number> test duration, default=%.1f\n", (double)TEST_SEC); printf(" -c, --count <number> CPU count, 0=all available, default=1\n"); printf(" -h, --help this help\n"); printf(" -f, --fair collect fairness statistics\n"); @@ -751,15 +754,17 @@ static void parse_args(int argc, char *argv[], test_args_t *args) int long_index; static const struct option longopts[] = { + {"time", required_argument, NULL, 't'}, {"count", required_argument, NULL, 'c'}, {"fair", no_argument, NULL, 'f'}, {"help", no_argument, NULL, 'h'}, {NULL, 0, NULL, 0} }; - static const char *shortopts = "+c:fh"; + static const char *shortopts = "+t:c:fh"; args->cpu_count = 1; /* use one worker by default */ + args->test_sec = TEST_SEC; while (1) { opt = getopt_long(argc, argv, shortopts, longopts, &long_index); @@ -772,6 +777,10 @@ static void parse_args(int argc, char *argv[], test_args_t *args) args->fairness = 1; break; + case 't': + args->test_sec = atof(optarg); + break; + case 'c': args->cpu_count = atoi(optarg); break; @@ -855,14 +864,14 @@ int main(int argc, char *argv[]) printf("first CPU: %i\n", odp_cpumask_first(&cpumask)); printf("cpu mask: %s\n", cpumaskstr); - thread_tbl = calloc(sizeof(odph_thread_t), num_workers); + thread_tbl = calloc(num_workers, sizeof(odph_thread_t)); if (!thread_tbl) { ODPH_ERR("no memory for thread_tbl\n"); return -1; } /* Test cycle count frequency */ - test_cpu_freq(); + test_cpu_freq(args.test_sec); shm = odp_shm_reserve("test_globals", sizeof(test_globals_t), ODP_CACHE_LINE_SIZE, 0); diff --git a/test/performance/odp_scheduling_run.sh b/test/performance/odp_scheduling_run.sh index 2b4281ee9..4e004264e 100755 --- a/test/performance/odp_scheduling_run.sh +++ b/test/performance/odp_scheduling_run.sh @@ -19,7 +19,7 @@ run() if [ $(nproc) -lt $1 ]; then echo "Not enough CPU cores. Skipping test." else - $TEST_DIR/odp_scheduling${EXEEXT} -c $1 + $TEST_DIR/odp_scheduling${EXEEXT} -c $1 -t 0.1 RET_VAL=$? if [ $RET_VAL -ne 0 ]; then echo odp_scheduling FAILED diff --git a/test/performance/odp_stash_perf.c b/test/performance/odp_stash_perf.c index ffbc92b4b..cb223999e 100644 --- a/test/performance/odp_stash_perf.c +++ b/test/performance/odp_stash_perf.c @@ -4,6 +4,14 @@ * Copyright (c) 2023 Arm */ +/** + * @example odp_stash_perf.c + * + * Performance test application for stash APIs + * + * @cond _ODP_HIDE_FROM_DOXYGEN_ + */ + #include <stdio.h> #include <string.h> #include <stdint.h> diff --git a/test/performance/odp_stress.c b/test/performance/odp_stress.c index 84bc4fe6c..3ec01df33 100644 --- a/test/performance/odp_stress.c +++ b/test/performance/odp_stress.c @@ -4,6 +4,14 @@ * SPDX-License-Identifier: BSD-3-Clause */ +/** + * @example odp_stress.c + * + * Test application that can be used to stress CPU, memory, and HW accelerators. + * + * @cond _ODP_HIDE_FROM_DOXYGEN_ + */ + #include <stdio.h> #include <string.h> #include <stdint.h> @@ -73,8 +81,8 @@ static void print_usage(void) "Stress test options:\n" "\n" " -c, --num_cpu Number of CPUs (worker threads). 0: all available CPUs. Default: 1\n" - " -p, --period_ns Timeout period in nsec. Default: 1 sec\n" - " -r, --rounds Number of timeout rounds. Default: 10\n" + " -p, --period_ns Timeout period in nsec. Default: 100 ms\n" + " -r, --rounds Number of timeout rounds. Default: 2\n" " -m, --mode Select test mode. Default: 1\n" " 0: No stress, just wait for timeouts\n" " 1: Memcpy\n" @@ -106,8 +114,8 @@ static int parse_options(int argc, char *argv[], test_options_t *test_options) static const char *shortopts = "+c:p:r:m:s:g:h"; test_options->num_cpu = 1; - test_options->period_ns = 1000 * ODP_TIME_MSEC_IN_NS; - test_options->rounds = 10; + test_options->period_ns = 100 * ODP_TIME_MSEC_IN_NS; + test_options->rounds = 2; test_options->mode = 1; test_options->mem_size = 2048; test_options->group_mode = 1; diff --git a/test/performance/odp_timer_perf.c b/test/performance/odp_timer_perf.c index 8632fcb73..918267a1b 100644 --- a/test/performance/odp_timer_perf.c +++ b/test/performance/odp_timer_perf.c @@ -4,6 +4,14 @@ * SPDX-License-Identifier: BSD-3-Clause */ +/** + * @example odp_timer_perf.c + * + * Performance test application for timer APIs + * + * @cond _ODP_HIDE_FROM_DOXYGEN_ + */ + #include <stdio.h> #include <string.h> #include <stdint.h> @@ -659,10 +667,7 @@ static int cancel_timers(test_global_t *global, uint32_t worker_idx) int ret = 0; for (i = 0; i < num_tp; i++) { - for (j = 0; j < num_timer; j++) { - if ((j % num_worker) != worker_idx) - continue; - + for (j = worker_idx; j < num_timer; j += num_worker) { timer = global->timer[i][j]; if (timer == ODP_TIMER_INVALID) continue; @@ -686,7 +691,7 @@ static int cancel_timers(test_global_t *global, uint32_t worker_idx) static int set_cancel_mode_worker(void *arg) { uint64_t tick, start_tick, period_tick, nsec; - uint64_t c1, c2, diff; + uint64_t c1, c2; int thr, status; uint32_t i, j, worker_idx; odp_event_t ev; @@ -707,11 +712,12 @@ static int set_cancel_mode_worker(void *arg) uint64_t num_tmo = 0; uint64_t num_cancel = 0; uint64_t num_set = 0; + uint64_t cancel_cycles = 0, start_cycles = 0; + odp_event_t ev_tbl[MAX_TIMERS]; thr = odp_thread_id(); worker_idx = thread_arg->worker_idx; t1 = ODP_TIME_NULL; - c1 = 0; /* Start all workers at the same time */ odp_barrier_wait(&global->barrier); @@ -761,7 +767,6 @@ static int set_cancel_mode_worker(void *arg) /* Start measurements */ started = 1; t1 = odp_time_local(); - c1 = odp_cpu_cycles(); } /* Cancel and set timers again */ @@ -774,16 +779,16 @@ static int set_cancel_mode_worker(void *arg) period_tick = global->timer_pool[i].period_tick; tick = odp_timer_current_tick(tp) + start_tick; + c1 = odp_cpu_cycles(); - for (j = 0; j < num_timer; j++) { - if ((j % num_worker) != worker_idx) - continue; + for (j = worker_idx; j < num_timer; j += num_worker) { + ev_tbl[j] = ODP_EVENT_INVALID; timer = global->timer[i][j]; if (timer == ODP_TIMER_INVALID) continue; - status = odp_timer_cancel(timer, &ev); + status = odp_timer_cancel(timer, &ev_tbl[j]); num_cancel++; if (odp_unlikely(status == ODP_TIMER_TOO_NEAR)) { @@ -794,10 +799,23 @@ static int set_cancel_mode_worker(void *arg) ret = -1; break; } + } + + c2 = odp_cpu_cycles(); + cancel_cycles += odp_cpu_cycles_diff(c2, c1); + c1 = c2; + + for (j = worker_idx; j < num_timer; j += num_worker) { + if (ev_tbl[j] == ODP_EVENT_INVALID) + continue; + + timer = global->timer[i][j]; + if (timer == ODP_TIMER_INVALID) + continue; start_param.tick_type = ODP_TIMER_TICK_ABS; start_param.tick = tick + j * period_tick; - start_param.tmo_ev = ev; + start_param.tmo_ev = ev_tbl[j]; status = odp_timer_start(timer, &start_param); num_set++; @@ -809,6 +827,9 @@ static int set_cancel_mode_worker(void *arg) break; } } + + c2 = odp_cpu_cycles(); + start_cycles += odp_cpu_cycles_diff(c2, c1); } if (test_rounds) { @@ -819,9 +840,7 @@ static int set_cancel_mode_worker(void *arg) } t2 = odp_time_local(); - c2 = odp_cpu_cycles(); nsec = odp_time_diff_ns(t2, t1); - diff = odp_cpu_cycles_diff(c2, c1); /* Cancel all timers that belong to this thread */ if (cancel_timers(global, worker_idx)) @@ -831,7 +850,8 @@ static int set_cancel_mode_worker(void *arg) global->stat[thr].events = num_tmo; global->stat[thr].rounds = test_options->test_rounds - test_rounds; global->stat[thr].nsec = nsec; - global->stat[thr].cycles_0 = diff; + global->stat[thr].cycles_0 = cancel_cycles; + global->stat[thr].cycles_1 = start_cycles; global->stat[thr].cancels = num_cancel; global->stat[thr].sets = num_set; @@ -1104,16 +1124,38 @@ static void print_stat_set_cancel_mode(test_global_t *global) int num = 0; printf("\n"); - printf("RESULTS - timer cancel + set cycles per thread:\n"); - printf("-----------------------------------------------\n"); + printf("RESULTS\n"); + printf("odp_timer_cancel() cycles per thread:\n"); + printf("-------------------------------------------------\n"); printf(" 1 2 3 4 5 6 7 8 9 10"); for (i = 0; i < ODP_THREAD_COUNT_MAX; i++) { - if (global->stat[i].sets) { + const test_stat_t *si = &global->stat[i]; + + if (si->cancels) { + if ((num % 10) == 0) + printf("\n "); + + printf("%6.1f ", (double)si->cycles_0 / si->cancels); + num++; + } + } + + printf("\n\n"); + + num = 0; + printf("odp_timer_start() cycles per thread:\n"); + printf("-------------------------------------------------\n"); + printf(" 1 2 3 4 5 6 7 8 9 10"); + + for (i = 0; i < ODP_THREAD_COUNT_MAX; i++) { + const test_stat_t *si = &global->stat[i]; + + if (si->sets) { if ((num % 10) == 0) printf("\n "); - printf("%6.1f ", (double)global->stat[i].cycles_0 / global->stat[i].sets); + printf("%6.1f ", (double)si->cycles_1 / si->sets); num++; } } |