diff options
Diffstat (limited to 'config/odp-linux-generic.conf')
-rw-r--r-- | config/odp-linux-generic.conf | 398 |
1 files changed, 398 insertions, 0 deletions
diff --git a/config/odp-linux-generic.conf b/config/odp-linux-generic.conf new file mode 100644 index 000000000..93997ecb3 --- /dev/null +++ b/config/odp-linux-generic.conf @@ -0,0 +1,398 @@ +# ODP runtime configuration options +# +# This template configuration file (odp-linux-generic.conf) is hardcoded +# during configure/build phase and the values defined here are used if +# optional ODP_CONFIG_FILE is not set. This configuration file MUST +# include all configuration options. +# +# ODP_CONFIG_FILE can be used to override default values and it doesn't +# have to include all available options. The missing options are +# replaced with hardcoded default values. +# +# The options defined here are implementation specific and valid option +# values should be checked from the implementation code. +# +# See libconfig syntax: https://hyperrealm.github.io/libconfig/libconfig_manual.html#Configuration-Files + +# Mandatory fields +odp_implementation = "linux-generic" +config_file_version = "0.1.28" + +# System options +system: { + # CPU frequency value returned by odp_cpu_hz() and odp_cpu_hz_id() + # calls on platforms where frequency isn't available using standard + # Linux methods. + cpu_mhz = 0 + + # CPU max frequency value returned by odp_cpu_hz_max() and + # odp_cpu_hz_max_id() calls on platforms where max frequency isn't + # available using standard Linux methods. + cpu_mhz_max = 1400 + + # When enabled (1), implementation reads the CPU frequency values from + # OS only once during ODP initialization. Enabling this option removes + # system calls from odp_cpu_hz() and odp_cpu_hz_id() implementations. + # + # NOTE: This option should only be used on systems where CPU frequency + # scaling is disabled. + cpu_hz_static = 0 + + # Maximum number of ODP threads that can be created. + # odp_thread_count_max() returns this value or the build time + # maximum ODP_THREAD_COUNT_MAX, whichever is lower. This setting + # can be used to reduce thread related resource usage. + thread_count_max = 256 +} + +# Shared memory options +shm: { + # Number of cached default size huge pages. These pages are allocated + # during odp_init_global() and freed back to the kernel in + # odp_term_global(). A value of zero means no pages are cached. + # No negative values should be used here, they are reserved for future + # implementations. + # + # ODP will reserve as many huge pages as possible, which may be less + # than requested here if the system does not have enough huge pages + # available. + # + # When using process mode threads, this value should be set to 0 + # because the current implementation won't work properly otherwise. + num_cached_hp = 0 + + # Huge page usage limit in kilobytes. Memory reservations larger than + # this value are done using huge pages (if available). Smaller + # reservations are done using normal pages to conserve memory. + huge_page_limit_kb = 64 + + # Amount of memory pre-reserved for ODP_SHM_SINGLE_VA usage in kilobytes + single_va_size_kb = 262144 +} + +# Pool options +pool: { + # Default thread local cache size. Cache size in pool parameters is + # initialized to this value. Value must be a multiple of burst_size + # (min 2 x burst_size). + # + # The total maximum number of cached events is the number of threads + # using the pool multiplied with local_cache_size. + local_cache_size = 256 + + # Transfer size between local cache and global pool. Must be larger + # than zero. + burst_size = 32 + + # Packet pool options + pkt: { + # Maximum packet data length in bytes + max_len = 65536 + + # Maximum number of packets per pool. Power of two minus one + # results optimal memory usage (e.g. (256 * 1024) - 1). + max_num = 262143 + + # Base alignment for segment data. When set to zero, + # cache line size is used. Use power of two values. This is + # also the maximum value for the packet pool alignment param. + base_align = 0 + } + + buf: { + # Minimum data alignment. The alignment request in pool + # parameters is rounded up to this value. When set to zero, + # cache line size is used. Use power of two values. + min_align = 0 + } +} + +# General pktio options +pktio: { + # Frame start offset from packet base pointer at packet input. This can + # be used (together with pool.pkt.base_align option) to tune packet data + # alignment for received frames. Currently, packet IO drivers + # (zero-copy DPDK, loop and ipc) that do not copy data ignore this + # option. + pktin_frame_offset = 0 + + # Pool size allocated for potential completion events for transmitted and + # dropped packets. Separate pool for different packet IO instances. + tx_compl_pool_size = 1024 +} + +# DPDK pktio options +pktio_dpdk: { + # Default options + num_rx_desc = 128 + num_tx_desc = 512 + rx_drop_en = 0 + + # Store RX RSS hash result as ODP flow hash + set_flow_hash = 0 + + # Enable reception of Ethernet frames sent to any multicast group + multicast_en = 1 + + # Driver specific options (use PMD names from DPDK) + net_ixgbe: { + rx_drop_en = 1 + } +} + +# XDP pktio options +pktio_xdp: { + # Number of RX and TX descriptors to be reserved for AF_XDP socket + # memory. Adjusting these may improve performance depending on NIC ring + # configuration. In zero-copy mode, packet pools used as pktio pools + # need to be large enough to accommodate RX and TX descriptors of every + # pktio queue. Values must be a power of two. + num_rx_desc = 1024 + num_tx_desc = 1024 +} + +queue_basic: { + # Maximum queue size. Value must be a power of two. + max_queue_size = 8192 + + # Default queue size. Value must be a power of two. + default_queue_size = 4096 +} + +sched_basic: { + # Priority level spread + # + # Each priority level is spread into multiple scheduler internal queues. + # This value defines the number of those queues. Minimum value is 1. + # Each thread prefers one of the queues over other queues. A higher + # spread value typically improves parallelism and thus is better for + # high thread counts, but causes uneven service level for low thread + # counts. Typically, optimal value is the number of threads using + # the scheduler. + prio_spread = 4 + + # Weight of the preferred scheduler internal queue + # + # Each thread prefers one of the internal queues over other queues. + # This value controls how many times the preferred queue is polled + # between a poll to another internal queue. Minimum value is 1. A higher + # value typically improves parallelism as threads work mostly on their + # preferred queues, but causes uneven service level for low thread + # counts as non-preferred queues are served less often + prio_spread_weight = 63 + + # Dynamic load balance of scheduler internal queues + # + # When enabled (1), scheduler checks periodically internal queue load levels and + # moves event queues from one spread to another in order to even out the loads. + # Load level of an internal queue (group/prio/spread) is measures as number of + # event queues allocated to it, divided by number of threads serving it. + load_balance = 1 + + # Burst size configuration per priority. The first array element + # represents the highest queue priority. The scheduler tries to get + # burst_size_default[prio] events from a queue and stashes those that + # cannot be passed to the application immediately. More events than the + # default burst size may be returned from application request, but no + # more than burst_size_max[prio]. + # + # Large burst sizes improve throughput, but decrease application + # responsiveness to higher priority events due to head of line blocking + # caused by a burst of lower priority events. + burst_size_default = [ 32, 32, 32, 32, 32, 16, 8, 4] + burst_size_max = [255, 255, 255, 255, 255, 16, 16, 8] + + # Burst size configuration per priority for each scheduled queue type. + # Overrides default values set in 'burst_size_default' and + # 'burst_size_max' if != 0. + burst_size_parallel = [0, 0, 0, 0, 0, 0, 0, 0] + burst_size_max_parallel = [0, 0, 0, 0, 0, 0, 0, 0] + burst_size_atomic = [0, 0, 0, 0, 0, 0, 0, 0] + burst_size_max_atomic = [0, 0, 0, 0, 0, 0, 0, 0] + burst_size_ordered = [0, 0, 0, 0, 0, 0, 0, 0] + burst_size_max_ordered = [0, 0, 0, 0, 0, 0, 0, 0] + + # Automatically updated schedule groups + # + # DEPRECATED: use odp_schedule_config() API instead + # + # API specification defines that ODP_SCHED_GROUP_ALL, + # _WORKER and _CONTROL are updated automatically. These options can be + # used to disable these group when not used. Set value to 0 to disable + # a group. Performance may improve when unused groups are disabled. + group_enable: { + all = 1 + worker = 1 + control = 1 + } + + # Ordered queue reorder stash size + # + # Number of events each thread can stash internally before having to + # wait for the right order context. Reorder stash can improve + # performance if threads process events in bursts. If 'order_stash_size' + # > 0, events may be dropped by the implementation if the target queue + # is full. To prevent this set 'order_stash_size' to 0. + order_stash_size = 512 + + # Power saving options for schedule with wait + # + # When waiting for events during a schedule call, save power by + # sleeping in the poll loop. First, run schedule loop normally for + # poll_time_nsec nanoseconds. If there are no events to schedule in that + # time, continue polling, but sleep on each round. Sleep time is + # sleep_time_nsec nanoseconds, or the time to the next timer expiration, + # whichever is smaller. Timer pools are scanned just before sleep. + # + # During sleep, the thread is not polling for packet input or timers. + # Each thread measures time and sleeps independently of other threads. + # + # When using this feature, it may be necessary to decrease + # /proc/<pid>/timerslack_ns, or use a real-time priority. Sleeping may + # have an adverse effect on performance for a short time after sleep. + powersave: { + # Time in nsec to poll before sleeping + # + # <1: Disabled. Never sleep. sleep_time_nsec is ignored. + poll_time_nsec = 0 + + # Time in nsec to sleep + # + # Must be less than one second. Actual sleep time may vary. + sleep_time_nsec = 0 + } +} + +stash: { + # Maximum number of stashes + max_num = 512 + + # Maximum number of objects in a stash + # + # The value may be rounded up by the implementation. For optimal memory + # usage set value to a power of two - 1. + max_num_obj = 4095 +} + +timer: { + # Use inline timer implementation + # + # By default, timer processing is done in background threads (thread per + # timer pool). With inline implementation timers are processed by ODP + # application threads instead. When using inline timers the application + # has to call odp_schedule() or odp_queue_deq() regularly to actuate + # timer processing. + # + # 0: Use POSIX timer and background threads to process timers + # 1: Use inline timer implementation and application threads to process + # timers + inline = 0 + + # Inline timer poll interval + # + # When set to 1 inline timers are polled during every schedule round. + # Increasing the value reduces timer processing overhead while + # decreasing accuracy. Ignored when inline timer is not used. + inline_poll_interval = 10 + + # Inline timer poll interval in nanoseconds + # + # When inline_poll_interval is larger than 1, use this option to limit + # inline timer polling rate in nanoseconds. By default, this defines the + # maximum rate a thread may poll timers. If a timer pool is created with + # a higher resolution than this, the polling rate is increased + # accordingly. Ignored when inline timer is not used. + inline_poll_interval_nsec = 500000 + + # Inline timer use of threads + # + # Select which thread types process non-private timer pools in inline + # timer implementation. Thread type does not affect private timer + # pool procesessing, those are always processed by the thread which + # created the pool. Ignored when inline timer is not used. + # + # 0: Both control and worker threads process non-private timer pools + # 1: Only worker threads process non-private timer pools + # 2: Only control threads process non-private timer pools + inline_thread_type = 0 +} + +ipsec: { + # Packet ordering method for asynchronous IPsec processing + # + # Asynchronous IPsec processing maintains original packet order when + # started within ordered or atomic scheduling context. In addition + # to that, ODP API specifies that the order of IPsec processing + # (i.e. anti-replay window update and sequence number generation) + # is the same as the original packet order. + # + # The following settings control how the order is maintained in + # asynchronous IPsec operations. They have no effect on synchronous + # operations where the ODP application is responsible of the ordering. + # + # Values: + # + # 0: Ordering is not attempted. + # + # This has the lowest overhead and the greatest parallelism but + # is not fully compliant with the API specification. + # + # Lack of ordering means that outbound IPsec packets, although + # remaining in the correct order, may have their sequence numbers + # assigned out of order. This can cause unexpected packet loss if + # the anti-replay window of the receiving end is not large enough + # to cover the possible misordering. + # + # Similarly, since anti-replay check is not done in the reception + # order, the anti-replay check sees additional packet misordering + # on top of the true misordering of the received packets. This + # means that a larger anti-replay window may be required to avoid + # packet loss. + # + # 1: Ordering by waiting + # + # Correct processing order is maintained by a simple mechanism + # that makes a thread wait until its scheduling context has + # reached the head of its input queue. + # + # This limits parallelism when single input queue is used, even + # when packets get distributed to multiple SAs. + ordering: { + # Odering method for asynchronous inbound operations. + async_inbound = 0 + + # Odering method for asynchronous outbound operations. + async_outbound = 0 + } +} + +ml: { + # Enable onnxruntime profiling, when enabled, a json file will be + # generated after inference. chrome://tracing/ can be used to check + # the profiling. Use 0 to disable and 1 to enable profiling. + enable_profiling = 0 + + # Choose onnxruntime execution mode, which can be "SEQUENTIAL" or + # "PARALLEL" + execution_mode = "SEQUENTIAL" + + # Set the number of threads used to parallelize the execution of the + # graph across nodes. A value of 0 means onnxruntime will pick a default. + inter_op_num_threads = 0 + + # Set the number of threads used to parallelize the execution within + # a node. A value of 0 means onnxruntime will pick a default. + intra_op_num_threads = 0 + + # Set graph optimization level. Valid values are: + # DISABLE_ALL: disables all optimizations + # ENABLE_BASIC: enables basic optimizations + # ENABLE_EXTENDED: enables basic and extended optimizations + # ENABLE_ALL: enables all available optimizations including layout optimization + graph_optimization_level = "ENABLE_ALL" + + # Serialize the optimized model to disk. When initializing a session + # with the same model, no need to apply optimization anymore, thus + # reducing model startup time. + optimized_model_filepath = "" +} |