aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatias Elo <matias.elo@nokia.com>2018-06-13 16:53:56 +0300
committerMaxim Uvarov <maxim.uvarov@linaro.org>2018-07-05 15:40:37 +0300
commitf7c03ade975b018be0766d5fea300d03642b4b12 (patch)
treed8ae36e0e36872dc90bb092fb71b5cf197cc654b
parent60cc4b660bea6af9b37a14fbf1ad07589b15b34e (diff)
test: performance: add new CPU benchmarking application
New self-contained test application for CPUs stress testing. The application launches a number of worker threads, which receive preallocated test UDP packets from the scheduler and perform the following per packet processing: 1. Calculate a CRC-32C hash over the packet data and check that it matches to the value stored in test header 2. Perform full packet parsing 3. Read a value from dummy lookup table based on packet data 4. Modify packet payload based on the lookup table data and sequence number 5. Update UDP checksum 6. Recalculate CRC-32C hash and store it in test header Dummy lookup table (default 1M entries) is used to increase memory usage. The size of the lookup table can be modified with '-l' option. The application measures and prints the maximum throughput per each worker thread. Signed-off-by: Matias Elo <matias.elo@nokia.com> Reviewed-by: Bogdan Pricope <bogdan.pricope@linaro.org> Signed-off-by: Maxim Uvarov <maxim.uvarov@linaro.org>
-rw-r--r--test/performance/.gitignore1
-rw-r--r--test/performance/Makefile.am2
-rw-r--r--test/performance/odp_cpu_bench.c813
3 files changed, 816 insertions, 0 deletions
diff --git a/test/performance/.gitignore b/test/performance/.gitignore
index 499c4d33c..ca0e884fa 100644
--- a/test/performance/.gitignore
+++ b/test/performance/.gitignore
@@ -2,6 +2,7 @@
*.trs
odp_atomic
odp_bench_packet
+odp_cpu_bench
odp_crypto
odp_ipsec
odp_l2fwd
diff --git a/test/performance/Makefile.am b/test/performance/Makefile.am
index a110c23a6..887a013ed 100644
--- a/test/performance/Makefile.am
+++ b/test/performance/Makefile.am
@@ -3,6 +3,7 @@ include $(top_srcdir)/test/Makefile.inc
TESTS_ENVIRONMENT += TEST_DIR=${builddir}
EXECUTABLES = odp_bench_packet \
+ odp_cpu_bench \
odp_crypto \
odp_ipsec \
odp_pktio_perf \
@@ -34,6 +35,7 @@ endif
bin_PROGRAMS = $(EXECUTABLES) $(COMPILE_ONLY)
odp_bench_packet_SOURCES = odp_bench_packet.c
+odp_cpu_bench_SOURCES = odp_cpu_bench.c
odp_crypto_SOURCES = odp_crypto.c
odp_ipsec_SOURCES = odp_ipsec.c
odp_pktio_ordered_SOURCES = odp_pktio_ordered.c dummy_crc.h
diff --git a/test/performance/odp_cpu_bench.c b/test/performance/odp_cpu_bench.c
new file mode 100644
index 000000000..949825e99
--- /dev/null
+++ b/test/performance/odp_cpu_bench.c
@@ -0,0 +1,813 @@
+/* Copyright (c) 2018, Linaro Limited
+ * All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include "config.h"
+
+#include <odp_api.h>
+#include <odp/helper/odph_api.h>
+
+#include <test_debug.h>
+
+#include <getopt.h>
+#include <inttypes.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+/* Queues are divided into groups and tests packets are passed only between
+ * queues which belong to the same group. */
+#define MAX_GROUPS 64
+#define QUEUES_PER_GROUP 4
+#define PKTS_PER_QUEUE 256
+
+#define MAX_EVENT_BURST 32
+#define CRC_INIT_VAL 123456789
+#define PASS_PACKETS 10000
+
+/* Default number of entries in the test lookup table */
+#define DEF_LOOKUP_TBL_SIZE (1024 * 1024)
+
+#define MAX_WORKERS (ODP_THREAD_COUNT_MAX - 1)
+ODP_STATIC_ASSERT(MAX_WORKERS <= MAX_GROUPS * QUEUES_PER_GROUP,
+ "Not enough queues for all workers");
+
+/* Get rid of path in filename - only for unix-type paths using '/' */
+#define NO_PATH(file_name) (strrchr((file_name), '/') ? \
+ strrchr((file_name), '/') + 1 : (file_name))
+
+/* Test dummy lookup table entry */
+typedef struct {
+ uint64_t idx;
+ uint32_t val0;
+ uint32_t val1;
+} lookup_entry_t;
+
+/* Test packet */
+typedef struct {
+ uint32_t seq;
+ uint32_t crc;
+ uint16_t group;
+} test_hdr_t;
+
+/* Parsed application arguments */
+typedef struct {
+ uint64_t lookup_tbl_size; /* Lookup table size */
+ int accuracy; /* Number of seconds between stats prints */
+ unsigned int cpu_count; /* CPU count */
+ int time; /* Time in seconds to run */
+} appl_args_t;
+
+/* Statistics */
+typedef union ODP_ALIGNED_CACHE {
+ struct {
+ /* Number of processed packets */
+ uint64_t pkts;
+ /* Number of dropped packets */
+ uint64_t dropped_pkts;
+ /* Time spent processing packets */
+ uint64_t nsec;
+ /* Cycles spent processing packets */
+ uint64_t cycles;
+ } s;
+
+ uint8_t padding[ODP_CACHE_LINE_SIZE];
+} stats_t;
+
+/* Thread specific data */
+typedef struct thread_args_t {
+ stats_t stats;
+ uint16_t idx;
+} thread_args_t;
+
+/* Grouping of all global data */
+typedef struct {
+ /* Thread specific arguments */
+ thread_args_t thread[MAX_WORKERS];
+ /* Barriers to synchronize main and workers */
+ odp_barrier_t init_barrier;
+ odp_barrier_t term_barrier;
+ /* Application (parsed) arguments */
+ appl_args_t appl;
+ /* Test queues */
+ odp_queue_t queue[MAX_GROUPS][QUEUES_PER_GROUP];
+ /* Test lookup table */
+ lookup_entry_t *lookup_tbl;
+} args_t;
+
+/* Global pointer to args */
+static args_t *gbl_args;
+
+static volatile int exit_threads; /* Break workers loop if set to 1 */
+
+static const uint8_t test_udp_packet[] = {
+ 0x00, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x00, 0x01,
+ 0x02, 0x03, 0x04, 0x05, 0x08, 0x00, 0x45, 0x00,
+ 0x02, 0x1C, 0x00, 0x01, 0x00, 0x00, 0x40, 0x11,
+ 0xF7, 0x7C, 0xC0, 0xA8, 0x00, 0x01, 0xC0, 0xA8,
+ 0x00, 0x02, 0x04, 0xD2, 0x1A, 0x82, 0x02, 0x08,
+ 0x24, 0x1E, 0xC9, 0x56, 0xB4, 0xD6, 0x4B, 0x64,
+ 0xB3, 0x01, 0xA1, 0x97, 0x4D, 0xD1, 0xA4, 0x76,
+ 0xF5, 0x7B, 0x27, 0x22, 0x6C, 0xA9, 0xED, 0x29,
+ 0x6E, 0x02, 0x80, 0xF7, 0xC4, 0x2D, 0x2A, 0x96,
+ 0x2D, 0xF6, 0x02, 0x8E, 0x89, 0x9F, 0x8C, 0xF4,
+ 0x0D, 0xC5, 0xE5, 0x1F, 0xA1, 0x52, 0xC3, 0x4B,
+ 0x5C, 0x4C, 0xDF, 0x14, 0x05, 0x6A, 0xA8, 0xD7,
+ 0xAD, 0x4F, 0x22, 0xA6, 0xB8, 0xF9, 0x52, 0x5A,
+ 0xB8, 0xF9, 0xE2, 0x2C, 0x05, 0x2A, 0x6F, 0xF2,
+ 0xCA, 0xA1, 0xA7, 0xC3, 0x56, 0xE1, 0xDB, 0xC1,
+ 0xDB, 0x86, 0x26, 0x55, 0xAC, 0xBE, 0xE1, 0x3D,
+ 0x82, 0x86, 0xB9, 0xDE, 0x3E, 0xD3, 0x11, 0xAB,
+ 0x65, 0x6A, 0xED, 0x1B, 0x60, 0xBE, 0x69, 0x71,
+ 0xB2, 0xA8, 0x5B, 0xB1, 0x06, 0xE3, 0x48, 0x14,
+ 0xC9, 0x13, 0x73, 0xDA, 0xBE, 0xE4, 0x7A, 0x5F,
+ 0xC0, 0xE0, 0xCA, 0xF3, 0x7A, 0xCA, 0x3F, 0xC9,
+ 0x4A, 0xEE, 0x47, 0x76, 0x67, 0xF0, 0x0D, 0x3F,
+ 0x7F, 0x3D, 0x69, 0xEA, 0x39, 0x53, 0x7C, 0xE3,
+ 0xED, 0x78, 0x79, 0x47, 0x60, 0x95, 0xCB, 0xDC,
+ 0x26, 0x60, 0x46, 0xAC, 0x47, 0xDA, 0x4C, 0x4D,
+ 0x0F, 0xE1, 0x68, 0x43, 0xBC, 0xCD, 0x4E, 0xFE,
+ 0x2E, 0xD6, 0xC2, 0x6E, 0x63, 0xEA, 0xB3, 0x98,
+ 0xCA, 0x8F, 0x7F, 0x05, 0xDF, 0x72, 0x8F, 0x6E,
+ 0x3E, 0x6D, 0xC7, 0x94, 0x59, 0x9D, 0x15, 0x5B,
+ 0xB8, 0x02, 0x52, 0x4F, 0x68, 0x3A, 0xF1, 0xFF,
+ 0xA9, 0xA4, 0x30, 0x29, 0xE0, 0x1C, 0xA0, 0x1B,
+ 0x50, 0xAB, 0xFD, 0x06, 0x84, 0xD4, 0x33, 0x51,
+ 0x01, 0xB3, 0x5F, 0x49, 0x5F, 0x21, 0xA0, 0xA1,
+ 0xC9, 0x08, 0xB3, 0xDF, 0x72, 0x9B, 0x5B, 0x70,
+ 0x89, 0x96, 0x08, 0x25, 0x88, 0x1E, 0xED, 0x52,
+ 0xDC, 0x98, 0xA0, 0xB8, 0x83, 0x2A, 0xA0, 0x90,
+ 0x45, 0xC9, 0x77, 0xD2, 0x19, 0xD7, 0x6B, 0xAB,
+ 0x49, 0x67, 0x7C, 0xD1, 0xE0, 0x23, 0xA2, 0x36,
+ 0xB2, 0x91, 0x3B, 0x23, 0x3B, 0x03, 0x36, 0xAF,
+ 0xAD, 0x81, 0xFA, 0x6F, 0x68, 0xD5, 0xBE, 0x73,
+ 0x1D, 0x56, 0x8A, 0xE8, 0x1A, 0xB4, 0xA8, 0x7C,
+ 0xF3, 0x82, 0x10, 0xD0, 0xF2, 0x1D, 0x9C, 0xEA,
+ 0xAB, 0xE7, 0xEC, 0x53, 0x6D, 0x52, 0xBD, 0x29,
+ 0x86, 0x21, 0xCE, 0xAA, 0xF3, 0x68, 0xA6, 0xEC,
+ 0x7E, 0xCA, 0x6F, 0xEB, 0xE1, 0x81, 0x80, 0x7C,
+ 0xF3, 0xE5, 0x22, 0xA0, 0x91, 0x08, 0xB7, 0x35,
+ 0x15, 0x87, 0x0C, 0x77, 0x31, 0x9C, 0x2F, 0x73,
+ 0xCE, 0x29, 0x6F, 0xC6, 0xAC, 0x9F, 0x68, 0xB8,
+ 0x6A, 0xFC, 0xD3, 0xB5, 0x08, 0x98, 0xAE, 0xE4,
+ 0x20, 0x84, 0x24, 0x69, 0xA5, 0xF5, 0x4A, 0x9D,
+ 0x44, 0x26, 0x5A, 0xF9, 0x6B, 0x5E, 0x5D, 0xC8,
+ 0x6F, 0xD4, 0x62, 0x91, 0xE5, 0x8E, 0x80, 0x05,
+ 0xA1, 0x95, 0x09, 0xEA, 0xFE, 0x84, 0x6D, 0xC3,
+ 0x0D, 0xD4, 0x32, 0xA4, 0x38, 0xB2, 0xF7, 0x9D,
+ 0x58, 0xD3, 0x5D, 0x93, 0x5F, 0x67, 0x86, 0xE1,
+ 0xAF, 0xFF, 0xE9, 0xFE, 0xF4, 0x71, 0x63, 0xE3,
+ 0x3E, 0xE1, 0x7A, 0x80, 0x5A, 0x23, 0x4F, 0x5B,
+ 0x54, 0x21, 0x0E, 0xE2, 0xAF, 0x01, 0x2E, 0xA4,
+ 0xF5, 0x1F, 0x59, 0x96, 0x3E, 0x82, 0xF3, 0x44,
+ 0xDF, 0xA6, 0x7C, 0x64, 0x5D, 0xC7, 0x79, 0xA1,
+ 0x17, 0xE1, 0x06, 0x14, 0x3E, 0x1B, 0x46, 0xCA,
+ 0x71, 0xC8, 0x05, 0x62, 0xD0, 0x56, 0x23, 0x9B,
+ 0xBA, 0xFE, 0x6D, 0xA8, 0x03, 0x4C, 0x23, 0xD8,
+ 0x98, 0x8A, 0xE8, 0x9C, 0x93, 0x8E, 0xB7, 0x24,
+ 0x31, 0x2A, 0x81, 0x72, 0x8F, 0x13, 0xD4, 0x7E,
+ 0xEB, 0xB1, 0xEE, 0x33, 0xD9, 0xF4, 0x96, 0x5E,
+ 0x6C, 0x3D, 0x45, 0x9C, 0xE0, 0x71, 0xA3, 0xFA,
+ 0x17, 0x2B, 0xC3, 0x07, 0xD6, 0x86, 0xA2, 0x06,
+ 0xC5, 0x33, 0xF0, 0xEA, 0x25, 0x70, 0x68, 0x56,
+ 0xD5, 0xB0
+};
+
+static void sig_handler(int signo ODP_UNUSED)
+{
+ exit_threads = 1;
+}
+
+static inline void init_packet(odp_packet_t pkt, uint32_t seq, uint16_t group)
+{
+ uint32_t *payload;
+ test_hdr_t *hdr;
+ odp_packet_parse_param_t param;
+
+ param.proto = ODP_PROTO_ETH;
+ param.last_layer = ODP_PROTO_LAYER_ALL;
+ param.chksums.all_chksum = 0;
+ if (odp_packet_parse(pkt, 0, &param))
+ LOG_ABORT("odp_packet_parse() failed\n");
+
+ /* Modify UDP payload and update checksum */
+ payload = odp_packet_offset(pkt, odp_packet_l4_offset(pkt) +
+ ODPH_UDPHDR_LEN, NULL, NULL);
+ *payload = seq;
+ if (odph_udp_chksum_set(pkt))
+ LOG_ABORT("odph_udp_chksum_set() failed\n");
+
+ /* Test header is stored in user area */
+ hdr = odp_packet_user_area(pkt);
+ hdr->seq = seq;
+ hdr->group = group;
+ hdr->crc = odp_hash_crc32c(odp_packet_data(pkt), odp_packet_len(pkt),
+ CRC_INIT_VAL);
+}
+
+static inline odp_queue_t work_on_event(odp_event_t event)
+{
+ odp_packet_t pkt;
+ odp_packet_parse_param_t param;
+ odph_udphdr_t *udp_hdr;
+ test_hdr_t *hdr;
+ lookup_entry_t *lookup_entry;
+ uint32_t *payload;
+ uint32_t crc;
+ uint32_t pkt_len;
+ uint8_t *data;
+ uint32_t new_val;
+ uint32_t old_val;
+
+ if (odp_event_type(event) != ODP_EVENT_PACKET)
+ return ODP_QUEUE_INVALID;
+
+ pkt = odp_packet_from_event(event);
+ hdr = odp_packet_user_area(pkt);
+ pkt_len = odp_packet_len(pkt);
+ data = odp_packet_data(pkt);
+
+ crc = odp_hash_crc32c(data, pkt_len, CRC_INIT_VAL);
+ if (crc != hdr->crc)
+ LOG_ERR("Error: Invalid packet crc\n");
+
+ param.proto = ODP_PROTO_ETH;
+ param.last_layer = ODP_PROTO_LAYER_ALL;
+ param.chksums.all_chksum = 1;
+ if (odp_packet_parse(pkt, 0, &param)) {
+ LOG_ERR("Error: odp_packet_parse() failed\n");
+ return ODP_QUEUE_INVALID;
+ }
+
+ /* Modify packet data using lookup table value and sequence number, and
+ * update UDP checksum accordingly. */
+ lookup_entry = &gbl_args->lookup_tbl[(crc + hdr->seq) %
+ gbl_args->appl.lookup_tbl_size];
+ udp_hdr = odp_packet_l4_ptr(pkt, NULL);
+ payload = odp_packet_offset(pkt, odp_packet_l4_offset(pkt) +
+ ODPH_UDPHDR_LEN, NULL, NULL);
+ old_val = *payload;
+ *payload += lookup_entry->idx % 2 ? lookup_entry->val1 :
+ lookup_entry->val0;
+ new_val = *payload;
+ udp_hdr->chksum = ~(~udp_hdr->chksum + (-old_val) + new_val);
+
+ payload++;
+ old_val = *payload;
+ *payload += hdr->seq;
+ new_val = *payload;
+ udp_hdr->chksum = ~(~udp_hdr->chksum + (-old_val) + new_val);
+
+ hdr->crc = odp_hash_crc32c(data, pkt_len, CRC_INIT_VAL);
+
+ return gbl_args->queue[hdr->group][hdr->seq++ % QUEUES_PER_GROUP];
+}
+
+/**
+ * Worker thread
+ */
+static int run_thread(void *arg)
+{
+ thread_args_t *thr_args = arg;
+ stats_t *stats = &thr_args->stats;
+ odp_time_t t1, t2;
+ uint64_t c1, c2;
+
+ odp_barrier_wait(&gbl_args->init_barrier);
+
+ c1 = odp_cpu_cycles();
+ t1 = odp_time_local();
+
+ while (!exit_threads) {
+ odp_event_t event_tbl[MAX_EVENT_BURST];
+ odp_queue_t dst_queue;
+ int num_events;
+ int i;
+
+ num_events = odp_schedule_multi(NULL, ODP_SCHED_NO_WAIT,
+ event_tbl, MAX_EVENT_BURST);
+ if (num_events <= 0)
+ continue;
+
+ for (i = 0; i < num_events; i++) {
+ odp_event_t event = event_tbl[i];
+
+ dst_queue = work_on_event(event);
+ if (odp_unlikely(dst_queue == ODP_QUEUE_INVALID)) {
+ stats->s.dropped_pkts++;
+ odp_event_free(event);
+ continue;
+ }
+
+ if (odp_unlikely(odp_queue_enq(dst_queue, event))) {
+ LOG_ERR("Error: odp_queue_enq() failed\n");
+ stats->s.dropped_pkts++;
+ odp_event_free(event);
+ break;
+ }
+
+ stats->s.pkts++;
+ }
+ }
+
+ c2 = odp_cpu_cycles();
+ t2 = odp_time_local();
+
+ stats->s.cycles = c2 - c1;
+ stats->s.nsec = odp_time_diff_ns(t2, t1);
+
+ odp_barrier_wait(&gbl_args->term_barrier);
+
+ /* Free remaining events in queues */
+ while (1) {
+ odp_event_t ev;
+
+ ev = odp_schedule(NULL,
+ odp_schedule_wait_time(ODP_TIME_SEC_IN_NS));
+
+ if (ev == ODP_EVENT_INVALID)
+ break;
+
+ odp_event_free(ev);
+ }
+
+ return 0;
+}
+
+/*
+ * Prinf usage information
+ */
+static void usage(char *progname)
+{
+ printf("\n"
+ "OpenDataPlane CPU benchmarking application.\n"
+ "\n"
+ "Usage: %s [options]\n"
+ "\n"
+ " E.g. %s -c 4 -t 30\n"
+ "Options:\n"
+ " -c, --count <number> CPU count, 0=all available, default=1\n"
+ " -t, --time <sec> Time in seconds to run\n"
+ " (default is 10 second).\n"
+ " -a, --accuracy <sec> Time in seconds get print statistics\n"
+ " (default is 1 second).\n"
+ " -l, --lookup_tbl <num> Number of entries in dummy lookup table\n"
+ " (default is %d).\n"
+ " -h, --help Display help and exit.\n\n"
+ "\n", NO_PATH(progname), NO_PATH(progname), DEF_LOOKUP_TBL_SIZE);
+}
+
+/**
+ * @internal Parse arguments
+ *
+ * @param argc Argument count
+ * @param argv Argument vector
+ * @param args Test arguments
+ */
+static void parse_args(int argc, char *argv[], appl_args_t *appl_args)
+{
+ int opt;
+ int long_index;
+
+ static const struct option longopts[] = {
+ {"accuracy", required_argument, NULL, 'a'},
+ {"cpu", required_argument, NULL, 'c'},
+ {"lookup_tbl", required_argument, NULL, 'l'},
+ {"time", required_argument, NULL, 't'},
+ {"help", no_argument, NULL, 'h'},
+ {NULL, 0, NULL, 0}
+ };
+
+ static const char *shortopts = "+a:+c:+l:+t:h";
+
+ /* Let helper collect its own arguments (e.g. --odph_proc) */
+ argc = odph_parse_options(argc, argv);
+
+ appl_args->accuracy = 1; /* Get and print pps stats second */
+ appl_args->cpu_count = 1;
+ appl_args->lookup_tbl_size = DEF_LOOKUP_TBL_SIZE;
+ appl_args->time = 10; /* Loop forever if time to run is 0 */
+
+ while (1) {
+ opt = getopt_long(argc, argv, shortopts, longopts, &long_index);
+
+ if (opt == -1)
+ break; /* No more options */
+
+ switch (opt) {
+ case 'a':
+ appl_args->accuracy = atoi(optarg);
+ break;
+ case 'c':
+ appl_args->cpu_count = atoi(optarg);
+ break;
+ case 'l':
+ appl_args->lookup_tbl_size = atoi(optarg);
+ break;
+ case 't':
+ appl_args->time = atoi(optarg);
+ break;
+ case 'h':
+ usage(argv[0]);
+ exit(EXIT_SUCCESS);
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ if (appl_args->lookup_tbl_size < 1) {
+ printf("At least one lookup table entry required.\n");
+ exit(EXIT_FAILURE);
+ }
+}
+
+/*
+ * Print statistics
+ *
+ * num_workers Number of worker threads
+ * thr_stats Pointers to stats storage
+ * duration Number of seconds to loop
+ */
+static int print_stats(int num_workers, stats_t **thr_stats, int duration,
+ int accuracy)
+{
+ uint64_t pkts;
+ uint64_t dropped;
+ uint64_t pkts_prev = 0;
+ uint64_t nsec = 0;
+ uint64_t cycles = 0;
+ int i;
+ int elapsed = 0;
+ int stats_enabled = 1;
+ int loop_forever = (duration == 0);
+
+ if (accuracy <= 0) {
+ stats_enabled = 0;
+ accuracy = 1;
+ }
+ /* Wait for all threads to be ready*/
+ odp_barrier_wait(&gbl_args->init_barrier);
+
+ do {
+ uint64_t pps;
+
+ sleep(accuracy);
+
+ pkts = 0;
+ dropped = 0;
+ for (i = 0; i < num_workers; i++) {
+ pkts += thr_stats[i]->s.pkts;
+ dropped += thr_stats[i]->s.dropped_pkts;
+ }
+
+ pps = (pkts - pkts_prev) / accuracy;
+
+ if (stats_enabled) {
+ printf("%.2f Mpps, ", pps / 1000000.0);
+
+ printf("%" PRIu64 " dropped\n", dropped);
+ }
+
+ pkts_prev = pkts;
+ elapsed += accuracy;
+ } while (!exit_threads && (loop_forever || (elapsed < duration)));
+
+ exit_threads = 1;
+ odp_barrier_wait(&gbl_args->term_barrier);
+
+ pkts = 0;
+ dropped = 0;
+ for (i = 0; i < num_workers; i++) {
+ pkts += thr_stats[i]->s.pkts;
+ dropped += thr_stats[i]->s.dropped_pkts;
+ nsec += thr_stats[i]->s.nsec;
+ cycles += thr_stats[i]->s.cycles;
+ }
+
+ printf("\nRESULTS - per thread (Million packets per sec):\n");
+ printf("-----------------------------------------------\n");
+ printf(" avg 1 2 3 4 5 6 7 8 9 10\n");
+ printf("%6.2f ", pkts / (nsec / 1000.0));
+
+ for (i = 0; i < num_workers; i++) {
+ if (i != 0 && (i % 10) == 0)
+ printf("\n ");
+
+ printf("%6.2f ", thr_stats[i]->s.pkts /
+ (thr_stats[i]->s.nsec / 1000.0));
+ }
+ printf("\n\n");
+
+ nsec /= num_workers;
+ printf("RESULTS - total over %i threads:\n", num_workers);
+ printf("----------------------------------\n");
+ printf(" avg packets per sec: %.3f M\n", pkts / (nsec / 1000.0));
+ printf(" avg cycles per packet: %" PRIu64 "\n", cycles / pkts);
+ printf(" dropped packets: %" PRIu64 "\n\n", dropped);
+
+ return pkts > PASS_PACKETS ? 0 : -1;
+}
+
+static void gbl_args_init(args_t *args)
+{
+ memset(args, 0, sizeof(args_t));
+}
+
+/**
+ * Test main function
+ */
+int main(int argc, char *argv[])
+{
+ stats_t *stats[MAX_WORKERS];
+ odph_odpthread_t thread_tbl[MAX_WORKERS];
+ odp_cpumask_t cpumask;
+ odp_pool_capability_t pool_capa;
+ odp_pool_t pool;
+ odp_queue_capability_t queue_capa;
+ odp_shm_t shm;
+ odp_shm_t lookup_tbl_shm;
+ odp_pool_param_t params;
+ odp_instance_t instance;
+ odp_init_t init;
+ char cpumaskstr[ODP_CPUMASK_STR_SIZE];
+ uint32_t num_pkts;
+ uint32_t num_groups;
+ uint32_t num_queues;
+ uint32_t pkts_per_group;
+ uint32_t pkt_len;
+ uint32_t init_val;
+ unsigned int num_workers;
+ unsigned int i, j;
+ int cpu;
+ int ret = 0;
+
+ odp_init_param_init(&init);
+
+ /* List features not to be used (may optimize performance) */
+ init.not_used.feat.cls = 1;
+ init.not_used.feat.crypto = 1;
+ init.not_used.feat.ipsec = 1;
+ init.not_used.feat.timer = 1;
+ init.not_used.feat.tm = 1;
+
+ /* Signal handler has to be registered before global init in case ODP
+ * implementation creates internal threads/processes. */
+ signal(SIGINT, sig_handler);
+
+ if (odp_init_global(&instance, &init, NULL)) {
+ LOG_ERR("Error: ODP global init failed\n");
+ return -1;
+ }
+
+ if (odp_init_local(instance, ODP_THREAD_CONTROL)) {
+ LOG_ERR("Error: ODP local init failed\n");
+ exit(EXIT_FAILURE);
+ }
+
+ shm = odp_shm_reserve("shm_args", sizeof(args_t), ODP_CACHE_LINE_SIZE,
+ 0);
+ gbl_args = odp_shm_addr(shm);
+ if (gbl_args == NULL) {
+ LOG_ERR("Error: shared mem alloc failed\n");
+ exit(EXIT_FAILURE);
+ }
+ gbl_args_init(gbl_args);
+
+ /* Parse and store the application arguments */
+ parse_args(argc, argv, &gbl_args->appl);
+
+ lookup_tbl_shm = odp_shm_reserve("lookup_tbl_shm",
+ sizeof(lookup_entry_t) *
+ gbl_args->appl.lookup_tbl_size,
+ ODP_CACHE_LINE_SIZE, 0);
+ gbl_args->lookup_tbl = odp_shm_addr(lookup_tbl_shm);
+ if (gbl_args->lookup_tbl == NULL) {
+ LOG_ERR("Error: lookup table mem alloc failed\n");
+ exit(EXIT_FAILURE);
+ }
+
+ printf("\n");
+ odp_sys_info_print();
+
+ /* Default to system CPU count unless user specified */
+ num_workers = MAX_WORKERS;
+ if (gbl_args->appl.cpu_count && gbl_args->appl.cpu_count < MAX_WORKERS)
+ num_workers = gbl_args->appl.cpu_count;
+
+ /* Get default worker cpumask */
+ num_workers = odp_cpumask_default_worker(&cpumask, num_workers);
+ (void)odp_cpumask_to_str(&cpumask, cpumaskstr, sizeof(cpumaskstr));
+
+ printf("num worker threads: %i\n", num_workers);
+ printf("first CPU: %i\n", odp_cpumask_first(&cpumask));
+ printf("cpu mask: %s\n", cpumaskstr);
+
+ /* Create application queues */
+ if (odp_queue_capability(&queue_capa)) {
+ LOG_ERR("Error: odp_queue_capability() failed\n");
+ exit(EXIT_FAILURE);
+ }
+
+ /* Make sure a single queue can store all the packets in a group */
+ pkts_per_group = QUEUES_PER_GROUP * PKTS_PER_QUEUE;
+ if (queue_capa.sched.max_size &&
+ queue_capa.sched.max_size < pkts_per_group)
+ pkts_per_group = queue_capa.sched.max_size;
+
+ /* Divide queues evenly into groups */
+ if (queue_capa.sched.max_num < QUEUES_PER_GROUP) {
+ LOG_ERR("Error: min %d queues required\n", QUEUES_PER_GROUP);
+ return -1;
+ }
+ num_queues = num_workers > queue_capa.sched.max_num ?
+ queue_capa.sched.max_num : num_workers;
+ num_groups = (num_queues + QUEUES_PER_GROUP - 1) / QUEUES_PER_GROUP;
+ if (num_groups * QUEUES_PER_GROUP > queue_capa.sched.max_num)
+ num_groups--;
+ num_queues = num_groups * QUEUES_PER_GROUP;
+
+ for (i = 0; i < num_groups; i++) {
+ for (j = 0; j < QUEUES_PER_GROUP; j++) {
+ odp_queue_t queue;
+ odp_queue_param_t param;
+
+ odp_queue_param_init(&param);
+ param.type = ODP_QUEUE_TYPE_SCHED;
+ param.sched.prio = ODP_SCHED_PRIO_NORMAL;
+ param.sched.sync = ODP_SCHED_SYNC_PARALLEL;
+ param.sched.group = ODP_SCHED_GROUP_ALL;
+ param.size = pkts_per_group;
+
+ queue = odp_queue_create(NULL, &param);
+ if (queue == ODP_QUEUE_INVALID) {
+ LOG_ERR("Error: odp_queue_create() failed\n");
+ return -1;
+ }
+ gbl_args->queue[i][j] = queue;
+ }
+ }
+
+ /* Create packet pool */
+ if (odp_pool_capability(&pool_capa)) {
+ LOG_ERR("Error: odp_pool_capability() failed\n");
+ exit(EXIT_FAILURE);
+ }
+ num_pkts = pkts_per_group * num_groups;
+ if (num_pkts > pool_capa.pkt.max_num)
+ num_pkts = pool_capa.pkt.max_num;
+
+ pkt_len = sizeof(test_udp_packet);
+ if (pool_capa.pkt.max_len && pkt_len > pool_capa.pkt.max_len)
+ pkt_len = pool_capa.pkt.max_len;
+
+ if (pool_capa.pkt.max_seg_len && pkt_len > pool_capa.pkt.max_seg_len)
+ pkt_len = pool_capa.pkt.max_seg_len;
+
+ if (pkt_len < sizeof(test_udp_packet)) {
+ LOG_ERR("Error: min %dB single segment packets required\n",
+ (int)sizeof(test_udp_packet));
+ exit(EXIT_FAILURE);
+ }
+
+ if (pool_capa.pkt.max_uarea_size &&
+ pool_capa.pkt.max_uarea_size < sizeof(test_hdr_t)) {
+ LOG_ERR("Error: min %dB of packet user area required\n",
+ (int)sizeof(test_hdr_t));
+ exit(EXIT_FAILURE);
+ }
+
+ odp_pool_param_init(&params);
+ params.pkt.len = pkt_len;
+ params.pkt.max_len = pkt_len;
+ params.pkt.seg_len = pkt_len;
+ params.pkt.num = num_pkts;
+ params.pkt.max_num = num_pkts;
+ params.pkt.uarea_size = sizeof(test_hdr_t);
+ params.type = ODP_POOL_PACKET;
+ pool = odp_pool_create("pkt_pool", &params);
+ if (pool == ODP_POOL_INVALID) {
+ LOG_ERR("Error: packet pool create failed\n");
+ exit(EXIT_FAILURE);
+ }
+ odp_pool_print(pool);
+
+ printf("CPU bench args\n--------------\n");
+ printf(" workers: %u\n", num_workers);
+ printf(" queues: %" PRIu32 "\n", num_queues);
+ printf(" pkts: %" PRIu32 "\n", num_pkts);
+ printf(" pkt size: %" PRIu32 " B\n", pkt_len);
+ printf(" lookup entries: %" PRIu64 "\n\n",
+ gbl_args->appl.lookup_tbl_size);
+
+ /* Spread test packets into queues */
+ for (i = 0; i < num_pkts; i++) {
+ odp_packet_t pkt = odp_packet_alloc(pool, pkt_len);
+ odp_event_t ev;
+ odp_queue_t queue;
+ uint16_t group = i % num_groups;
+
+ if (pkt == ODP_PACKET_INVALID) {
+ LOG_ERR("Error: odp_packet_alloc() failed\n");
+ return -1;
+ }
+
+ odp_packet_copy_from_mem(pkt, 0, pkt_len, test_udp_packet);
+
+ init_packet(pkt, i, group);
+
+ queue = gbl_args->queue[group][i % QUEUES_PER_GROUP];
+
+ ev = odp_packet_to_event(pkt);
+ if (odp_queue_enq(queue, ev)) {
+ LOG_ERR("Error: odp_queue_enq() failed\n");
+ return -1;
+ }
+ }
+
+ memset(thread_tbl, 0, sizeof(thread_tbl));
+ odp_barrier_init(&gbl_args->init_barrier, num_workers + 1);
+ odp_barrier_init(&gbl_args->term_barrier, num_workers + 1);
+
+ /* Initialize lookup table */
+ init_val = CRC_INIT_VAL;
+ for (i = 0; i < gbl_args->appl.lookup_tbl_size; i++) {
+ uint32_t *val0 = &gbl_args->lookup_tbl[i].val0;
+ uint32_t *val1 = &gbl_args->lookup_tbl[i].val1;
+
+ gbl_args->lookup_tbl[i].idx = i;
+
+ *val0 = i;
+ *val0 = odp_hash_crc32c(val0, sizeof(uint32_t), init_val);
+ *val1 = odp_hash_crc32c(val0, sizeof(uint32_t), init_val);
+ init_val = *val1;
+ }
+
+ /* Create worker threads */
+ cpu = odp_cpumask_first(&cpumask);
+ for (i = 0; i < num_workers; i++) {
+ odp_cpumask_t thd_mask;
+ odph_odpthread_params_t thr_params;
+
+ gbl_args->thread[i].idx = i;
+
+ memset(&thr_params, 0, sizeof(thr_params));
+ thr_params.start = run_thread;
+ thr_params.arg = &gbl_args->thread[i];
+ thr_params.thr_type = ODP_THREAD_WORKER;
+ thr_params.instance = instance;
+
+ stats[i] = &gbl_args->thread[i].stats;
+
+ odp_cpumask_zero(&thd_mask);
+ odp_cpumask_set(&thd_mask, cpu);
+ odph_odpthreads_create(&thread_tbl[i], &thd_mask,
+ &thr_params);
+ cpu = odp_cpumask_next(&cpumask, cpu);
+ }
+
+ ret = print_stats(num_workers, stats, gbl_args->appl.time,
+ gbl_args->appl.accuracy);
+
+ /* Master thread waits for other threads to exit */
+ for (i = 0; i < num_workers; ++i)
+ odph_odpthreads_join(&thread_tbl[i]);
+
+ for (i = 0; i < num_groups; i++) {
+ for (j = 0; j < QUEUES_PER_GROUP; j++) {
+ if (odp_queue_destroy(gbl_args->queue[i][j])) {
+ LOG_ERR("Error: queue destroy\n");
+ exit(EXIT_FAILURE);
+ }
+ }
+ }
+ if (odp_pool_destroy(pool)) {
+ LOG_ERR("Error: pool destroy\n");
+ exit(EXIT_FAILURE);
+ }
+
+ if (odp_shm_free(shm)) {
+ LOG_ERR("Error: shm free\n");
+ exit(EXIT_FAILURE);
+ }
+
+ if (odp_shm_free(lookup_tbl_shm)) {
+ LOG_ERR("Error: shm free\n");
+ exit(EXIT_FAILURE);
+ }
+
+ if (odp_term_local()) {
+ LOG_ERR("Error: term local\n");
+ exit(EXIT_FAILURE);
+ }
+
+ if (odp_term_global(instance)) {
+ LOG_ERR("Error: term global\n");
+ exit(EXIT_FAILURE);
+ }
+
+ return ret;
+}