aboutsummaryrefslogtreecommitdiff
path: root/config/odp-linux-generic.conf
diff options
context:
space:
mode:
Diffstat (limited to 'config/odp-linux-generic.conf')
-rw-r--r--config/odp-linux-generic.conf398
1 files changed, 398 insertions, 0 deletions
diff --git a/config/odp-linux-generic.conf b/config/odp-linux-generic.conf
new file mode 100644
index 000000000..93997ecb3
--- /dev/null
+++ b/config/odp-linux-generic.conf
@@ -0,0 +1,398 @@
+# ODP runtime configuration options
+#
+# This template configuration file (odp-linux-generic.conf) is hardcoded
+# during configure/build phase and the values defined here are used if
+# optional ODP_CONFIG_FILE is not set. This configuration file MUST
+# include all configuration options.
+#
+# ODP_CONFIG_FILE can be used to override default values and it doesn't
+# have to include all available options. The missing options are
+# replaced with hardcoded default values.
+#
+# The options defined here are implementation specific and valid option
+# values should be checked from the implementation code.
+#
+# See libconfig syntax: https://hyperrealm.github.io/libconfig/libconfig_manual.html#Configuration-Files
+
+# Mandatory fields
+odp_implementation = "linux-generic"
+config_file_version = "0.1.28"
+
+# System options
+system: {
+ # CPU frequency value returned by odp_cpu_hz() and odp_cpu_hz_id()
+ # calls on platforms where frequency isn't available using standard
+ # Linux methods.
+ cpu_mhz = 0
+
+ # CPU max frequency value returned by odp_cpu_hz_max() and
+ # odp_cpu_hz_max_id() calls on platforms where max frequency isn't
+ # available using standard Linux methods.
+ cpu_mhz_max = 1400
+
+ # When enabled (1), implementation reads the CPU frequency values from
+ # OS only once during ODP initialization. Enabling this option removes
+ # system calls from odp_cpu_hz() and odp_cpu_hz_id() implementations.
+ #
+ # NOTE: This option should only be used on systems where CPU frequency
+ # scaling is disabled.
+ cpu_hz_static = 0
+
+ # Maximum number of ODP threads that can be created.
+ # odp_thread_count_max() returns this value or the build time
+ # maximum ODP_THREAD_COUNT_MAX, whichever is lower. This setting
+ # can be used to reduce thread related resource usage.
+ thread_count_max = 256
+}
+
+# Shared memory options
+shm: {
+ # Number of cached default size huge pages. These pages are allocated
+ # during odp_init_global() and freed back to the kernel in
+ # odp_term_global(). A value of zero means no pages are cached.
+ # No negative values should be used here, they are reserved for future
+ # implementations.
+ #
+ # ODP will reserve as many huge pages as possible, which may be less
+ # than requested here if the system does not have enough huge pages
+ # available.
+ #
+ # When using process mode threads, this value should be set to 0
+ # because the current implementation won't work properly otherwise.
+ num_cached_hp = 0
+
+ # Huge page usage limit in kilobytes. Memory reservations larger than
+ # this value are done using huge pages (if available). Smaller
+ # reservations are done using normal pages to conserve memory.
+ huge_page_limit_kb = 64
+
+ # Amount of memory pre-reserved for ODP_SHM_SINGLE_VA usage in kilobytes
+ single_va_size_kb = 262144
+}
+
+# Pool options
+pool: {
+ # Default thread local cache size. Cache size in pool parameters is
+ # initialized to this value. Value must be a multiple of burst_size
+ # (min 2 x burst_size).
+ #
+ # The total maximum number of cached events is the number of threads
+ # using the pool multiplied with local_cache_size.
+ local_cache_size = 256
+
+ # Transfer size between local cache and global pool. Must be larger
+ # than zero.
+ burst_size = 32
+
+ # Packet pool options
+ pkt: {
+ # Maximum packet data length in bytes
+ max_len = 65536
+
+ # Maximum number of packets per pool. Power of two minus one
+ # results optimal memory usage (e.g. (256 * 1024) - 1).
+ max_num = 262143
+
+ # Base alignment for segment data. When set to zero,
+ # cache line size is used. Use power of two values. This is
+ # also the maximum value for the packet pool alignment param.
+ base_align = 0
+ }
+
+ buf: {
+ # Minimum data alignment. The alignment request in pool
+ # parameters is rounded up to this value. When set to zero,
+ # cache line size is used. Use power of two values.
+ min_align = 0
+ }
+}
+
+# General pktio options
+pktio: {
+ # Frame start offset from packet base pointer at packet input. This can
+ # be used (together with pool.pkt.base_align option) to tune packet data
+ # alignment for received frames. Currently, packet IO drivers
+ # (zero-copy DPDK, loop and ipc) that do not copy data ignore this
+ # option.
+ pktin_frame_offset = 0
+
+ # Pool size allocated for potential completion events for transmitted and
+ # dropped packets. Separate pool for different packet IO instances.
+ tx_compl_pool_size = 1024
+}
+
+# DPDK pktio options
+pktio_dpdk: {
+ # Default options
+ num_rx_desc = 128
+ num_tx_desc = 512
+ rx_drop_en = 0
+
+ # Store RX RSS hash result as ODP flow hash
+ set_flow_hash = 0
+
+ # Enable reception of Ethernet frames sent to any multicast group
+ multicast_en = 1
+
+ # Driver specific options (use PMD names from DPDK)
+ net_ixgbe: {
+ rx_drop_en = 1
+ }
+}
+
+# XDP pktio options
+pktio_xdp: {
+ # Number of RX and TX descriptors to be reserved for AF_XDP socket
+ # memory. Adjusting these may improve performance depending on NIC ring
+ # configuration. In zero-copy mode, packet pools used as pktio pools
+ # need to be large enough to accommodate RX and TX descriptors of every
+ # pktio queue. Values must be a power of two.
+ num_rx_desc = 1024
+ num_tx_desc = 1024
+}
+
+queue_basic: {
+ # Maximum queue size. Value must be a power of two.
+ max_queue_size = 8192
+
+ # Default queue size. Value must be a power of two.
+ default_queue_size = 4096
+}
+
+sched_basic: {
+ # Priority level spread
+ #
+ # Each priority level is spread into multiple scheduler internal queues.
+ # This value defines the number of those queues. Minimum value is 1.
+ # Each thread prefers one of the queues over other queues. A higher
+ # spread value typically improves parallelism and thus is better for
+ # high thread counts, but causes uneven service level for low thread
+ # counts. Typically, optimal value is the number of threads using
+ # the scheduler.
+ prio_spread = 4
+
+ # Weight of the preferred scheduler internal queue
+ #
+ # Each thread prefers one of the internal queues over other queues.
+ # This value controls how many times the preferred queue is polled
+ # between a poll to another internal queue. Minimum value is 1. A higher
+ # value typically improves parallelism as threads work mostly on their
+ # preferred queues, but causes uneven service level for low thread
+ # counts as non-preferred queues are served less often
+ prio_spread_weight = 63
+
+ # Dynamic load balance of scheduler internal queues
+ #
+ # When enabled (1), scheduler checks periodically internal queue load levels and
+ # moves event queues from one spread to another in order to even out the loads.
+ # Load level of an internal queue (group/prio/spread) is measures as number of
+ # event queues allocated to it, divided by number of threads serving it.
+ load_balance = 1
+
+ # Burst size configuration per priority. The first array element
+ # represents the highest queue priority. The scheduler tries to get
+ # burst_size_default[prio] events from a queue and stashes those that
+ # cannot be passed to the application immediately. More events than the
+ # default burst size may be returned from application request, but no
+ # more than burst_size_max[prio].
+ #
+ # Large burst sizes improve throughput, but decrease application
+ # responsiveness to higher priority events due to head of line blocking
+ # caused by a burst of lower priority events.
+ burst_size_default = [ 32, 32, 32, 32, 32, 16, 8, 4]
+ burst_size_max = [255, 255, 255, 255, 255, 16, 16, 8]
+
+ # Burst size configuration per priority for each scheduled queue type.
+ # Overrides default values set in 'burst_size_default' and
+ # 'burst_size_max' if != 0.
+ burst_size_parallel = [0, 0, 0, 0, 0, 0, 0, 0]
+ burst_size_max_parallel = [0, 0, 0, 0, 0, 0, 0, 0]
+ burst_size_atomic = [0, 0, 0, 0, 0, 0, 0, 0]
+ burst_size_max_atomic = [0, 0, 0, 0, 0, 0, 0, 0]
+ burst_size_ordered = [0, 0, 0, 0, 0, 0, 0, 0]
+ burst_size_max_ordered = [0, 0, 0, 0, 0, 0, 0, 0]
+
+ # Automatically updated schedule groups
+ #
+ # DEPRECATED: use odp_schedule_config() API instead
+ #
+ # API specification defines that ODP_SCHED_GROUP_ALL,
+ # _WORKER and _CONTROL are updated automatically. These options can be
+ # used to disable these group when not used. Set value to 0 to disable
+ # a group. Performance may improve when unused groups are disabled.
+ group_enable: {
+ all = 1
+ worker = 1
+ control = 1
+ }
+
+ # Ordered queue reorder stash size
+ #
+ # Number of events each thread can stash internally before having to
+ # wait for the right order context. Reorder stash can improve
+ # performance if threads process events in bursts. If 'order_stash_size'
+ # > 0, events may be dropped by the implementation if the target queue
+ # is full. To prevent this set 'order_stash_size' to 0.
+ order_stash_size = 512
+
+ # Power saving options for schedule with wait
+ #
+ # When waiting for events during a schedule call, save power by
+ # sleeping in the poll loop. First, run schedule loop normally for
+ # poll_time_nsec nanoseconds. If there are no events to schedule in that
+ # time, continue polling, but sleep on each round. Sleep time is
+ # sleep_time_nsec nanoseconds, or the time to the next timer expiration,
+ # whichever is smaller. Timer pools are scanned just before sleep.
+ #
+ # During sleep, the thread is not polling for packet input or timers.
+ # Each thread measures time and sleeps independently of other threads.
+ #
+ # When using this feature, it may be necessary to decrease
+ # /proc/<pid>/timerslack_ns, or use a real-time priority. Sleeping may
+ # have an adverse effect on performance for a short time after sleep.
+ powersave: {
+ # Time in nsec to poll before sleeping
+ #
+ # <1: Disabled. Never sleep. sleep_time_nsec is ignored.
+ poll_time_nsec = 0
+
+ # Time in nsec to sleep
+ #
+ # Must be less than one second. Actual sleep time may vary.
+ sleep_time_nsec = 0
+ }
+}
+
+stash: {
+ # Maximum number of stashes
+ max_num = 512
+
+ # Maximum number of objects in a stash
+ #
+ # The value may be rounded up by the implementation. For optimal memory
+ # usage set value to a power of two - 1.
+ max_num_obj = 4095
+}
+
+timer: {
+ # Use inline timer implementation
+ #
+ # By default, timer processing is done in background threads (thread per
+ # timer pool). With inline implementation timers are processed by ODP
+ # application threads instead. When using inline timers the application
+ # has to call odp_schedule() or odp_queue_deq() regularly to actuate
+ # timer processing.
+ #
+ # 0: Use POSIX timer and background threads to process timers
+ # 1: Use inline timer implementation and application threads to process
+ # timers
+ inline = 0
+
+ # Inline timer poll interval
+ #
+ # When set to 1 inline timers are polled during every schedule round.
+ # Increasing the value reduces timer processing overhead while
+ # decreasing accuracy. Ignored when inline timer is not used.
+ inline_poll_interval = 10
+
+ # Inline timer poll interval in nanoseconds
+ #
+ # When inline_poll_interval is larger than 1, use this option to limit
+ # inline timer polling rate in nanoseconds. By default, this defines the
+ # maximum rate a thread may poll timers. If a timer pool is created with
+ # a higher resolution than this, the polling rate is increased
+ # accordingly. Ignored when inline timer is not used.
+ inline_poll_interval_nsec = 500000
+
+ # Inline timer use of threads
+ #
+ # Select which thread types process non-private timer pools in inline
+ # timer implementation. Thread type does not affect private timer
+ # pool procesessing, those are always processed by the thread which
+ # created the pool. Ignored when inline timer is not used.
+ #
+ # 0: Both control and worker threads process non-private timer pools
+ # 1: Only worker threads process non-private timer pools
+ # 2: Only control threads process non-private timer pools
+ inline_thread_type = 0
+}
+
+ipsec: {
+ # Packet ordering method for asynchronous IPsec processing
+ #
+ # Asynchronous IPsec processing maintains original packet order when
+ # started within ordered or atomic scheduling context. In addition
+ # to that, ODP API specifies that the order of IPsec processing
+ # (i.e. anti-replay window update and sequence number generation)
+ # is the same as the original packet order.
+ #
+ # The following settings control how the order is maintained in
+ # asynchronous IPsec operations. They have no effect on synchronous
+ # operations where the ODP application is responsible of the ordering.
+ #
+ # Values:
+ #
+ # 0: Ordering is not attempted.
+ #
+ # This has the lowest overhead and the greatest parallelism but
+ # is not fully compliant with the API specification.
+ #
+ # Lack of ordering means that outbound IPsec packets, although
+ # remaining in the correct order, may have their sequence numbers
+ # assigned out of order. This can cause unexpected packet loss if
+ # the anti-replay window of the receiving end is not large enough
+ # to cover the possible misordering.
+ #
+ # Similarly, since anti-replay check is not done in the reception
+ # order, the anti-replay check sees additional packet misordering
+ # on top of the true misordering of the received packets. This
+ # means that a larger anti-replay window may be required to avoid
+ # packet loss.
+ #
+ # 1: Ordering by waiting
+ #
+ # Correct processing order is maintained by a simple mechanism
+ # that makes a thread wait until its scheduling context has
+ # reached the head of its input queue.
+ #
+ # This limits parallelism when single input queue is used, even
+ # when packets get distributed to multiple SAs.
+ ordering: {
+ # Odering method for asynchronous inbound operations.
+ async_inbound = 0
+
+ # Odering method for asynchronous outbound operations.
+ async_outbound = 0
+ }
+}
+
+ml: {
+ # Enable onnxruntime profiling, when enabled, a json file will be
+ # generated after inference. chrome://tracing/ can be used to check
+ # the profiling. Use 0 to disable and 1 to enable profiling.
+ enable_profiling = 0
+
+ # Choose onnxruntime execution mode, which can be "SEQUENTIAL" or
+ # "PARALLEL"
+ execution_mode = "SEQUENTIAL"
+
+ # Set the number of threads used to parallelize the execution of the
+ # graph across nodes. A value of 0 means onnxruntime will pick a default.
+ inter_op_num_threads = 0
+
+ # Set the number of threads used to parallelize the execution within
+ # a node. A value of 0 means onnxruntime will pick a default.
+ intra_op_num_threads = 0
+
+ # Set graph optimization level. Valid values are:
+ # DISABLE_ALL: disables all optimizations
+ # ENABLE_BASIC: enables basic optimizations
+ # ENABLE_EXTENDED: enables basic and extended optimizations
+ # ENABLE_ALL: enables all available optimizations including layout optimization
+ graph_optimization_level = "ENABLE_ALL"
+
+ # Serialize the optimized model to disk. When initializing a session
+ # with the same model, no need to apply optimization anymore, thus
+ # reducing model startup time.
+ optimized_model_filepath = ""
+}