110 files changed, 6484 insertions, 828 deletions
diff --git a/platform/linux-generic/Makefile.am b/platform/linux-generic/Makefile.am
index f3707ab3a..11cdb4c64 100644
--- a/platform/linux-generic/Makefile.am
+++ b/platform/linux-generic/Makefile.am
@@ -13,6 +13,7 @@ AM_CPPFLAGS +=  -I$(top_srcdir)/platform/$(with_platform)/arch/default
 AM_CPPFLAGS +=  -I$(top_srcdir)/platform/$(with_platform)/arch/common
 
 AM_CPPFLAGS +=  $(OPENSSL_CPPFLAGS)
+AM_CPPFLAGS +=  $(ORT_CPPFLAGS)
 
 AM_CFLAGS +=  $(AARCH64CRYPTO_CFLAGS)
 AM_CFLAGS +=  $(DPDK_CFLAGS)
@@ -90,6 +91,7 @@ odpapiabiarchinclude_HEADERS += \
 		  include-abi/odp/api/abi/init.h \
 		  include-abi/odp/api/abi/ipsec.h \
 		  include-abi/odp/api/abi/ipsec_types.h \
+		  include-abi/odp/api/abi/ml_types.h \
 		  include-abi/odp/api/abi/packet.h \
 		  include-abi/odp/api/abi/packet_types.h \
 		  include-abi/odp/api/abi/packet_flags.h \
@@ -140,6 +142,7 @@ noinst_HEADERS = \
 		  include/odp_event_validation_internal.h \
 		  include/odp_fdserver_internal.h \
 		  include/odp_forward_typedefs_internal.h \
+		  include/odp_ml_fp16.h \
 		  include/odp_global_data.h \
 		  include/odp_init_internal.h \
 		  include/odp_ipsec_internal.h \
@@ -228,6 +231,8 @@ __LIB__libodp_linux_la_SOURCES = \
 			   odp_ishmphy.c \
 			   odp_ishmpool.c \
 			   odp_libconfig.c \
+			   odp_ml_fp16.c \
+			   odp_ml_quantize.c \
 			   odp_name_table.c \
 			   odp_packet.c \
 			   odp_packet_vector.c \
@@ -297,6 +302,15 @@ __LIB__libodp_linux_la_SOURCES += \
 endif
 endif
 endif
+
+if WITH_ML
+__LIB__libodp_linux_la_SOURCES += \
+			   odp_ml.c
+else
+__LIB__libodp_linux_la_SOURCES += \
+			   odp_ml_null.c
+endif
+
 if ODP_ABI_COMPAT
 __LIB__libodp_linux_la_SOURCES += \
 			   odp_atomic_api.c \
@@ -345,14 +359,11 @@ odpapiabiarchinclude_HEADERS += arch/default/odp/api/abi/atomic_generic.h \
 				arch/default/odp/api/abi/wait_until_generic.h \
 				arch/default/odp/api/abi/wait_until.h
 endif
-noinst_HEADERS += arch/arm/odp_atomic.h \
-		  arch/arm/odp_cpu.h \
-		  arch/arm/odp_cpu_idling.h \
-		  arch/arm/odp_llsc.h \
+noinst_HEADERS += arch/arm/odp_cpu.h \
 		  arch/default/odp_atomic.h \
 		  arch/default/odp_cpu.h \
-		  arch/default/odp_cpu_idling.h \
-		  arch/default/odp_random.h
+		  arch/default/odp_random.h \
+		  arch/default/odp_wait_until.h
 endif
 if ARCH_IS_AARCH64
 __LIB__libodp_linux_la_SOURCES += arch/aarch64/odp_atomic.c \
@@ -380,9 +391,8 @@ endif
 noinst_HEADERS += arch/aarch64/odp_atomic.h \
 		  arch/aarch64/odp_cpu.h \
 		  arch/aarch64/cpu_flags.h \
-		  arch/aarch64/odp_cpu_idling.h \
-		  arch/aarch64/odp_llsc.h \
-		  arch/aarch64/odp_random.h
+		  arch/aarch64/odp_random.h \
+		  arch/aarch64/odp_wait_until.h
 endif
 if ARCH_IS_DEFAULT
 __LIB__libodp_linux_la_SOURCES += arch/default/odp_atomic.c \
@@ -405,8 +415,8 @@ odpapiabiarchinclude_HEADERS += arch/default/odp/api/abi/atomic_generic.h \
 endif
 noinst_HEADERS += arch/default/odp_atomic.h \
 		  arch/default/odp_cpu.h \
-		  arch/default/odp_cpu_idling.h \
-		  arch/default/odp_random.h
+		  arch/default/odp_random.h \
+		  arch/default/odp_wait_until.h
 endif
 if ARCH_IS_POWERPC
 __LIB__libodp_linux_la_SOURCES += arch/default/odp_atomic.c \
@@ -429,8 +439,8 @@ odpapiabiarchinclude_HEADERS += arch/default/odp/api/abi/atomic_generic.h \
 endif
 noinst_HEADERS += arch/default/odp_atomic.h \
 		  arch/default/odp_cpu.h \
-		  arch/default/odp_cpu_idling.h \
-		  arch/default/odp_random.h
+		  arch/default/odp_random.h \
+		  arch/default/odp_wait_until.h
 endif
 if ARCH_IS_X86
 __LIB__libodp_linux_la_SOURCES += arch/default/odp_atomic.c \
@@ -460,7 +470,7 @@ noinst_HEADERS += arch/x86/cpu_flags.h \
 		  arch/x86/odp_random.h \
 		  arch/default/odp_atomic.h \
 		  arch/default/odp_cpu.h \
-		  arch/default/odp_cpu_idling.h
+		  arch/default/odp_wait_until.h
 endif
 
 if ODP_PKTIO_PCAP
@@ -476,6 +486,7 @@ __LIB__libodp_linux_la_LIBADD += $(PTHREAD_LIBS)
 __LIB__libodp_linux_la_LIBADD += $(TIMER_LIBS)
 __LIB__libodp_linux_la_LIBADD += $(LIBXDP_LIBS)
 __LIB__libodp_linux_la_LIBADD += $(IPSEC_MB_LIBS)
+__LIB__libodp_linux_la_LIBADD += $(ORT_LIBS)
 
 if ODP_PKTIO_PCAP
 __LIB__libodp_linux_la_LIBADD += $(PCAP_LIBS)
diff --git a/platform/linux-generic/arch/aarch64/odp_cpu.h b/platform/linux-generic/arch/aarch64/odp_cpu.h
index 84bc4dffd..ad8b36d87 100644
--- a/platform/linux-generic/arch/aarch64/odp_cpu.h
+++ b/platform/linux-generic/arch/aarch64/odp_cpu.h
@@ -14,6 +14,7 @@
 #endif
 
 #include <odp_debug_internal.h>
+#include <odp_types_internal.h>
 
 /*
  * Use LLD/SCD atomic primitives instead of lock-based code path in llqueue
@@ -31,20 +32,6 @@
  */
 #define CONFIG_DMBSTR
 
-/*
- * Use ARM event signalling mechanism
- * Event signalling minimises spinning (busy waiting) which decreases
- * cache coherency traffic when spinning on shared locations (thus faster and
- * more scalable) and enables the CPU to enter a sleep state (lower power
- * consumption).
- */
-#define CONFIG_WFE
-
-static inline void _odp_dmb(void)
-{
-	__asm__ volatile("dmb" : : : "memory");
-}
-
 /* Only ARMv8 supports DMB ISHLD */
 /* A load only barrier is much cheaper than full barrier */
 #define _odp_release_barrier(ro) \
@@ -55,9 +42,156 @@ do {							     \
 		__asm__ volatile("dmb ish" ::: "memory");    \
 } while (0)
 
-#include "odp_llsc.h"
+static inline uint16_t ll8(uint8_t *var, int mm)
+{
+	uint16_t old;
+
+	_ODP_ASSERT(mm == __ATOMIC_ACQUIRE || mm == __ATOMIC_RELAXED);
+
+	if (mm == __ATOMIC_ACQUIRE)
+		__asm__ volatile("ldaxrb %w0, [%1]"
+				 : "=&r" (old)
+				 : "r" (var)
+				 : "memory");
+	else
+		__asm__ volatile("ldxrb %w0, [%1]"
+				 : "=&r" (old)
+				 : "r" (var)
+				 : );
+	return old;
+}
+
+static inline uint32_t ll32(uint32_t *var, int mm)
+{
+	uint32_t old;
+
+	_ODP_ASSERT(mm == __ATOMIC_ACQUIRE || mm == __ATOMIC_RELAXED);
+
+	if (mm == __ATOMIC_ACQUIRE)
+		__asm__ volatile("ldaxr %w0, [%1]"
+				 : "=&r" (old)
+				 : "r" (var)
+				 : "memory");
+	else
+		__asm__ volatile("ldxr %w0, [%1]"
+				 : "=&r" (old)
+				 : "r" (var)
+				 : );
+	return old;
+}
+
+/* Return 0 on success, 1 on failure */
+static inline uint32_t sc32(uint32_t *var, uint32_t neu, int mm)
+{
+	uint32_t ret;
+
+	_ODP_ASSERT(mm == __ATOMIC_RELEASE || mm == __ATOMIC_RELAXED);
+
+	if (mm == __ATOMIC_RELEASE)
+		__asm__ volatile("stlxr %w0, %w1, [%2]"
+				 : "=&r" (ret)
+				 : "r" (neu), "r" (var)
+				 : "memory");
+	else
+		__asm__ volatile("stxr %w0, %w1, [%2]"
+				 : "=&r" (ret)
+				 : "r" (neu), "r" (var)
+				 : );
+	return ret;
+}
+
+static inline uint64_t ll64(uint64_t *var, int mm)
+{
+	uint64_t old;
+
+	_ODP_ASSERT(mm == __ATOMIC_ACQUIRE || mm == __ATOMIC_RELAXED);
+
+	if (mm == __ATOMIC_ACQUIRE)
+		__asm__ volatile("ldaxr %0, [%1]"
+				 : "=&r" (old)
+				 : "r" (var)
+				 : "memory");
+	else
+		__asm__ volatile("ldxr %0, [%1]"
+				 : "=&r" (old)
+				 : "r" (var)
+				 : );
+	return old;
+}
+
+/* Return 0 on success, 1 on failure */
+static inline uint32_t sc64(uint64_t *var, uint64_t neu, int mm)
+{
+	uint32_t ret;
+
+	_ODP_ASSERT(mm == __ATOMIC_RELEASE || mm == __ATOMIC_RELAXED);
+
+	if (mm == __ATOMIC_RELEASE)
+		__asm__ volatile("stlxr %w0, %1, [%2]"
+				 : "=&r" (ret)
+				 : "r" (neu), "r" (var)
+				 : "memory");
+	else
+		__asm__ volatile("stxr %w0, %1, [%2]"
+				 : "=&r" (ret)
+				 : "r" (neu), "r" (var)
+				 : );
+	return ret;
+}
+
+union i128 {
+	_odp_u128_t i128;
+	int64_t  i64[2];
+};
+
+static inline _odp_u128_t lld(_odp_u128_t *var, int mm)
+{
+	union i128 old;
+
+	_ODP_ASSERT(mm == __ATOMIC_ACQUIRE || mm == __ATOMIC_RELAXED);
+
+	if (mm == __ATOMIC_ACQUIRE)
+		__asm__ volatile("ldaxp %0, %1, [%2]"
+				 : "=&r" (old.i64[0]), "=&r" (old.i64[1])
+				 : "r" (var)
+				 : "memory");
+	else
+		__asm__ volatile("ldxp %0, %1, [%2]"
+				 : "=&r" (old.i64[0]), "=&r" (old.i64[1])
+				 : "r" (var)
+				 : );
+	return old.i128;
+}
+
+/* Return 0 on success, 1 on failure */
+static inline uint32_t scd(_odp_u128_t *var, _odp_u128_t neu, int mm)
+{
+	uint32_t ret;
+
+	_ODP_ASSERT(mm == __ATOMIC_RELEASE || mm == __ATOMIC_RELAXED);
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wpedantic"
+	if (mm == __ATOMIC_RELEASE)
+		__asm__ volatile("stlxp %w0, %1, %2, [%3]"
+				 : "=&r" (ret)
+				 : "r" (((*(union i128 *)&neu)).i64[0]),
+				   "r" (((*(union i128 *)&neu)).i64[1]),
+				   "r" (var)
+				 : "memory");
+	else
+		__asm__ volatile("stxp %w0, %1, %2, [%3]"
+				 : "=&r" (ret)
+				 : "r" (((*(union i128 *)&neu)).i64[0]),
+				   "r" (((*(union i128 *)&neu)).i64[1]),
+				   "r" (var)
+				 : );
+#pragma GCC diagnostic pop
+	return ret;
+}
+
 #include "odp_atomic.h"
-#include "odp_cpu_idling.h"
+#include "odp_wait_until.h"
 
 #ifdef __ARM_FEATURE_UNALIGNED
 #define _ODP_UNALIGNED 1
diff --git a/platform/linux-generic/arch/aarch64/odp_cpu_idling.h b/platform/linux-generic/arch/aarch64/odp_cpu_idling.h
deleted file mode 100644
index a6cea8c63..000000000
--- a/platform/linux-generic/arch/aarch64/odp_cpu_idling.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* Copyright (c) 2017, ARM Limited. All rights reserved.
- *
- * Copyright (c) 2017-2018, Linaro Limited
- * All rights reserved.
- *
- * SPDX-License-Identifier: BSD-3-Clause
- */
-
-#ifndef PLATFORM_LINUXGENERIC_ARCH_ARM_CPU_IDLING_H
-#define PLATFORM_LINUXGENERIC_ARCH_ARM_CPU_IDLING_H
-
-#ifndef PLATFORM_LINUXGENERIC_ARCH_ARM_ODP_CPU_H
-#error This file should not be included directly, please include odp_cpu.h
-#endif
-
-#ifndef CONFIG_WFE
-
-#include "../default/odp_cpu_idling.h"
-
-#else /* CONFIG_WFE */
-
-static inline void sevl(void)
-{
-	__asm__ volatile("sevl" : : : );
-}
-
-static inline int wfe(void)
-{
-	__asm__ volatile("wfe" : : : "memory");
-	return 1;
-}
-
-#define monitor128(addr, mo) lld((addr), (mo))
-#define monitor64(addr, mo) ll64((addr), (mo))
-#define monitor32(addr, mo) ll32((addr), (mo))
-#define monitor8(addr, mo) ll8((addr), (mo))
-#endif /* CONFIG_WFE */
-
-#endif  /* PLATFORM_LINUXGENERIC_ARCH_ARM_CPU_IDLING_H */
diff --git a/platform/linux-generic/arch/aarch64/odp_llsc.h b/platform/linux-generic/arch/aarch64/odp_llsc.h
deleted file mode 100644
index 498785bd4..000000000
--- a/platform/linux-generic/arch/aarch64/odp_llsc.h
+++ /dev/null
@@ -1,170 +0,0 @@
-/* Copyright (c) 2017, ARM Limited. All rights reserved.
- *
- * Copyright (c) 2017-2018, Linaro Limited
- * All rights reserved.
- *
- * SPDX-License-Identifier: BSD-3-Clause
- */
-
-#ifndef PLATFORM_LINUXGENERIC_ARCH_ARM_LLSC_H
-#define PLATFORM_LINUXGENERIC_ARCH_ARM_LLSC_H
-
-#ifndef PLATFORM_LINUXGENERIC_ARCH_ARM_ODP_CPU_H
-#error This file should not be included directly, please include odp_cpu.h
-#endif
-
-#include <odp_types_internal.h>
-
-static inline uint16_t ll8(uint8_t *var, int mm)
-{
-	uint16_t old;
-
-	if (mm == __ATOMIC_ACQUIRE)
-		__asm__ volatile("ldaxrb %w0, [%1]"
-				 : "=&r" (old)
-				 : "r" (var)
-				 : "memory");
-	else if (mm == __ATOMIC_RELAXED)
-		__asm__ volatile("ldxrb %w0, [%1]"
-				 : "=&r" (old)
-				 : "r" (var)
-				 : );
-	else
-		_ODP_ABORT();
-	return old;
-}
-
-static inline uint32_t ll32(uint32_t *var, int mm)
-{
-	uint32_t old;
-
-	if (mm == __ATOMIC_ACQUIRE)
-		__asm__ volatile("ldaxr %w0, [%1]"
-				 : "=&r" (old)
-				 : "r" (var)
-				 : "memory");
-	else if (mm == __ATOMIC_RELAXED)
-		__asm__ volatile("ldxr %w0, [%1]"
-				 : "=&r" (old)
-				 : "r" (var)
-				 : );
-	else
-		_ODP_ABORT();
-	return old;
-}
-
-/* Return 0 on success, 1 on failure */
-static inline uint32_t sc32(uint32_t *var, uint32_t neu, int mm)
-{
-	uint32_t ret;
-
-	if (mm == __ATOMIC_RELEASE)
-		__asm__ volatile("stlxr %w0, %w1, [%2]"
-				 : "=&r" (ret)
-				 : "r" (neu), "r" (var)
-				 : "memory");
-	else if (mm == __ATOMIC_RELAXED)
-		__asm__ volatile("stxr %w0, %w1, [%2]"
-				 : "=&r" (ret)
-				 : "r" (neu), "r" (var)
-				 : );
-	else
-		_ODP_ABORT();
-	return ret;
-}
-
-static inline uint64_t ll(uint64_t *var, int mm)
-{
-	uint64_t old;
-
-	if (mm == __ATOMIC_ACQUIRE)
-		__asm__ volatile("ldaxr %0, [%1]"
-				 : "=&r" (old)
-				 : "r" (var)
-				 : "memory");
-	else if (mm == __ATOMIC_RELAXED)
-		__asm__ volatile("ldxr %0, [%1]"
-				 : "=&r" (old)
-				 : "r" (var)
-				 : );
-	else
-		_ODP_ABORT();
-	return old;
-}
-
-#define ll64(a, b) ll((a), (b))
-
-/* Return 0 on success, 1 on failure */
-static inline uint32_t sc(uint64_t *var, uint64_t neu, int mm)
-{
-	uint32_t ret;
-
-	if (mm == __ATOMIC_RELEASE)
-		__asm__ volatile("stlxr %w0, %1, [%2]"
-				 : "=&r" (ret)
-				 : "r" (neu), "r" (var)
-				 : "memory");
-	else if (mm == __ATOMIC_RELAXED)
-		__asm__ volatile("stxr %w0, %1, [%2]"
-				 : "=&r" (ret)
-				 : "r" (neu), "r" (var)
-				 : );
-	else
-		_ODP_ABORT();
-	return ret;
-}
-
-#define sc64(a, b, c) sc((a), (b), (c))
-
-union i128 {
-	_odp_u128_t i128;
-	int64_t  i64[2];
-};
-
-static inline _odp_u128_t lld(_odp_u128_t *var, int mm)
-{
-	union i128 old;
-
-	if (mm == __ATOMIC_ACQUIRE)
-		__asm__ volatile("ldaxp %0, %1, [%2]"
-				 : "=&r" (old.i64[0]), "=&r" (old.i64[1])
-				 : "r" (var)
-				 : "memory");
-	else if (mm == __ATOMIC_RELAXED)
-		__asm__ volatile("ldxp %0, %1, [%2]"
-				 : "=&r" (old.i64[0]), "=&r" (old.i64[1])
-				 : "r" (var)
-				 : );
-	else
-		_ODP_ABORT();
-	return old.i128;
-}
-
-/* Return 0 on success, 1 on failure */
-static inline uint32_t scd(_odp_u128_t *var, _odp_u128_t neu, int mm)
-{
-	uint32_t ret;
-
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wpedantic"
-	if (mm == __ATOMIC_RELEASE)
-		__asm__ volatile("stlxp %w0, %1, %2, [%3]"
-				 : "=&r" (ret)
-				 : "r" (((*(union i128 *)&neu)).i64[0]),
-				   "r" (((*(union i128 *)&neu)).i64[1]),
-				   "r" (var)
-				 : "memory");
-	else if (mm == __ATOMIC_RELAXED)
-		__asm__ volatile("stxp %w0, %1, %2, [%3]"
-				 : "=&r" (ret)
-				 : "r" (((*(union i128 *)&neu)).i64[0]),
-				   "r" (((*(union i128 *)&neu)).i64[1]),
-				   "r" (var)
-				 : );
-	else
-		_ODP_ABORT();
-#pragma GCC diagnostic pop
-	return ret;
-}
-
-#endif  /* PLATFORM_LINUXGENERIC_ARCH_ARM_LLSC_H */
diff --git a/platform/linux-generic/arch/aarch64/odp_wait_until.h b/platform/linux-generic/arch/aarch64/odp_wait_until.h
new file mode 100644
index 000000000..eca3f9ce5
--- /dev/null
+++ b/platform/linux-generic/arch/aarch64/odp_wait_until.h
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2017 ARM Limited
+ * Copyright (c) 2017-2018 Linaro Limited
+ * Copyright (c) 2024 Nokia
+ */
+
+#ifndef ODP_AARCH64_WAIT_UNTIL_H_
+#define ODP_AARCH64_WAIT_UNTIL_H_
+
+#ifndef PLATFORM_LINUXGENERIC_ARCH_ARM_ODP_CPU_H
+#error This file should not be included directly, please include odp_cpu.h
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <odp/api/cpu.h>
+
+#include <odp_cpu.h>
+
+#include <stdint.h>
+
+static inline void _odp_sevl(void)
+{
+	__asm__ volatile("sevl" : : : );
+}
+
+static inline int _odp_wfe(void)
+{
+	__asm__ volatile("wfe" : : : "memory");
+	return 1;
+}
+
+#define _odp_monitor_u8(addr, mo) ll8((addr), (mo))
+#define _odp_monitor_u32(addr, mo) ll32((addr), (mo))
+#define _odp_monitor_u64(addr, mo) ll64((addr), (mo))
+#define _odp_monitor_u128(addr, mo) lld((addr), (mo))
+
+#if ATOM_BITSET_SIZE <= 32
+static inline bitset_t _odp_bitset_monitor(bitset_t *bs, int mo)
+{
+	return _odp_monitor_u32(bs, mo);
+}
+#elif ATOM_BITSET_SIZE <= 64
+static inline bitset_t _odp_bitset_monitor(bitset_t *bs, int mo)
+{
+	return _odp_monitor_u64(bs, mo);
+}
+#elif ATOM_BITSET_SIZE <= 128
+static inline bitset_t _odp_bitset_monitor(bitset_t *bs, int mo)
+{
+	return _odp_monitor_u128(bs, mo);
+}
+#else
+#error Unsupported size of bit sets (ATOM_BITSET_SIZE)
+#endif
+
+/**
+ * The _odp_wait_until_eq_*() functions defined in this header are intended to
+ * be used only with the scalable scheduler and queue implementations. Even
+ * though these functions use standard non-atomic parameter types, the
+ * parameters must only be operated using atomic operations. If new functions
+ * are added to this file, they should use _odp_wait_until_equal_*() prefix and
+ * atomic parameter types.
+ */
+
+static inline void _odp_wait_until_eq_u32(uint32_t *val, uint32_t expected)
+{
+	_odp_sevl();
+	while (_odp_wfe() && _odp_monitor_u32(val, __ATOMIC_RELAXED) != expected)
+		odp_cpu_pause();
+}
+
+static inline void _odp_wait_until_eq_bitset(bitset_t *val, bitset_t expected)
+{
+	_odp_sevl();
+	while (_odp_wfe() && _odp_bitset_monitor(val, __ATOMIC_RELAXED != expected))
+		odp_cpu_pause();
+}
+
+static inline void _odp_wait_until_eq_acq_u8(uint8_t *val, uint8_t expected)
+{
+	_odp_sevl();
+	while (_odp_wfe() && _odp_monitor_u8(val, __ATOMIC_ACQUIRE) != expected)
+		odp_cpu_pause();
+}
+
+static inline void _odp_wait_until_eq_acq_u32(uint32_t *val, uint32_t expected)
+{
+	_odp_sevl();
+	while (_odp_wfe() && _odp_monitor_u32(val, __ATOMIC_ACQUIRE) != expected)
+		odp_cpu_pause();
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/platform/linux-generic/arch/arm/odp_atomic.h b/platform/linux-generic/arch/arm/odp_atomic.h
deleted file mode 100644
index e400f52d4..000000000
--- a/platform/linux-generic/arch/arm/odp_atomic.h
+++ /dev/null
@@ -1,109 +0,0 @@
-/* Copyright (c) 2017-2021, ARM Limited. All rights reserved.
- * Copyright (c) 2017-2018, Linaro Limited
- * All rights reserved.
- *
- * SPDX-License-Identifier: BSD-3-Clause
- */
-
-#ifndef PLATFORM_LINUXGENERIC_ARCH_ARM_ODP_ATOMIC_H
-#define PLATFORM_LINUXGENERIC_ARCH_ARM_ODP_ATOMIC_H
-
-#ifndef PLATFORM_LINUXGENERIC_ARCH_ARM_ODP_CPU_H
-#error This file should not be included directly, please include odp_cpu.h
-#endif
-
-#include <odp_types_internal.h>
-#include <limits.h>
-
-#ifdef CONFIG_DMBSTR
-
-#define atomic_store_release(loc, val, ro)		\
-do {							\
-	_odp_release_barrier(ro);			\
-	__atomic_store_n(loc, val, __ATOMIC_RELAXED);   \
-} while (0)
-
-#else
-
-#define atomic_store_release(loc, val, ro) \
-	__atomic_store_n(loc, val, __ATOMIC_RELEASE)
-
-#endif  /* CONFIG_DMBSTR */
-
-/** Atomic bit set operations with memory ordering */
-#if __GCC_ATOMIC_LLONG_LOCK_FREE == 2 && \
-	__SIZEOF_LONG_LONG__ != __SIZEOF_LONG__
-typedef unsigned long long bitset_t;
-#define ATOM_BITSET_SIZE (CHAR_BIT * __SIZEOF_LONG_LONG__)
-
-#elif __GCC_ATOMIC_LONG_LOCK_FREE == 2 && __SIZEOF_LONG__ != __SIZEOF_INT__
-typedef unsigned long bitset_t;
-#define ATOM_BITSET_SIZE (CHAR_BIT * __SIZEOF_LONG__)
-
-#elif __GCC_ATOMIC_INT_LOCK_FREE == 2
-typedef unsigned int bitset_t;
-#define ATOM_BITSET_SIZE (CHAR_BIT * __SIZEOF_INT__)
-
-#else
-/* Target does not support lock-free atomic operations */
-typedef unsigned int bitset_t;
-#define ATOM_BITSET_SIZE (CHAR_BIT * __SIZEOF_INT__)
-#endif
-
-#if ATOM_BITSET_SIZE <= 32
-
-static inline bitset_t bitset_mask(uint32_t bit)
-{
-	return 1UL << bit;
-}
-
-#elif ATOM_BITSET_SIZE <= 64
-
-static inline bitset_t bitset_mask(uint32_t bit)
-{
-	return 1ULL << bit;
-}
-
-#elif ATOM_BITSET_SIZE <= 128
-
-static inline bitset_t bitset_mask(uint32_t bit)
-{
-	if (bit < 64)
-		return 1ULL << bit;
-	else
-		return (_odp_u128_t)(1ULL << (bit - 64)) << 64;
-}
-
-#else
-#error Unsupported size of bit sets (ATOM_BITSET_SIZE)
-#endif
-
-static inline bitset_t atom_bitset_load(bitset_t *bs, int mo)
-{
-	return __atomic_load_n(bs, mo);
-}
-
-static inline void atom_bitset_set(bitset_t *bs, uint32_t bit, int mo)
-{
-	(void)__atomic_fetch_or(bs, bitset_mask(bit), mo);
-}
-
-static inline void atom_bitset_clr(bitset_t *bs, uint32_t bit, int mo)
-{
-	(void)__atomic_fetch_and(bs, ~bitset_mask(bit), mo);
-}
-
-static inline bitset_t atom_bitset_xchg(bitset_t *bs, bitset_t neu, int mo)
-{
-	return __atomic_exchange_n(bs, neu, mo);
-}
-
-static inline bitset_t atom_bitset_cmpxchg(bitset_t *bs, bitset_t *old,
-					   bitset_t neu, bool weak,
-					   int mo_success, int mo_failure)
-{
-	return __atomic_compare_exchange_n(bs, old, neu, weak, mo_success,
-					   mo_failure);
-}
-
-#endif  /* PLATFORM_LINUXGENERIC_ARCH_ARM_ODP_ATOMIC_H */
diff --git a/platform/linux-generic/arch/arm/odp_cpu.h b/platform/linux-generic/arch/arm/odp_cpu.h
index 82d47325f..6b2674736 100644
--- a/platform/linux-generic/arch/arm/odp_cpu.h
+++ b/platform/linux-generic/arch/arm/odp_cpu.h
@@ -31,26 +31,52 @@
  */
 #define CONFIG_DMBSTR
 
-/*
- * Use ARM event signalling mechanism
- * Event signalling minimises spinning (busy waiting) which decreases
- * cache coherency traffic when spinning on shared locations (thus faster and
- * more scalable) and enables the CPU to enter a sleep state (lower power
- * consumption).
- */
-/* #define CONFIG_WFE */
+static inline uint64_t lld(uint64_t *var, int mm)
+{
+	uint64_t old;
 
-static inline void _odp_dmb(void)
+	__asm__ volatile("ldrexd %0, %H0, [%1]"
+			 : "=&r" (old)
+			 : "r" (var)
+			 : );
+	/* Barrier after an acquiring load */
+	if (mm == __ATOMIC_ACQUIRE)
+		__asm__ volatile("dmb" : : : "memory");
+	return old;
+}
+
+/* Return 0 on success, 1 on failure */
+static inline uint32_t scd(uint64_t *var, uint64_t neu, int mm)
 {
-	__asm__ volatile("dmb" : : : "memory");
+	uint32_t ret;
+
+	/* Barrier before a releasing store */
+	if (mm == __ATOMIC_RELEASE)
+		__asm__ volatile("dmb" : : : "memory");
+	__asm__ volatile("strexd %0, %1, %H1, [%2]"
+			 : "=&r" (ret)
+			 : "r" (neu), "r" (var)
+			 : );
+	return ret;
 }
 
-#define _odp_release_barrier(ro) \
-	__atomic_thread_fence(__ATOMIC_RELEASE)
+#ifdef CONFIG_DMBSTR
+
+#define atomic_store_release(loc, val, ro)		\
+do {							\
+	__atomic_thread_fence(__ATOMIC_RELEASE);	\
+	__atomic_store_n(loc, val, __ATOMIC_RELAXED);	\
+} while (0)
+
+#else
+
+#define atomic_store_release(loc, val, ro) \
+	__atomic_store_n(loc, val, __ATOMIC_RELEASE)
+
+#endif  /* CONFIG_DMBSTR */
 
-#include "odp_llsc.h"
-#include "odp_atomic.h"
-#include "odp_cpu_idling.h"
+#include "../default/odp_atomic.h"
+#include "../default/odp_wait_until.h"
 
 #ifdef __ARM_FEATURE_UNALIGNED
 #define _ODP_UNALIGNED 1
diff --git a/platform/linux-generic/arch/arm/odp_cpu_idling.h b/platform/linux-generic/arch/arm/odp_cpu_idling.h
deleted file mode 100644
index a6cea8c63..000000000
--- a/platform/linux-generic/arch/arm/odp_cpu_idling.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* Copyright (c) 2017, ARM Limited. All rights reserved.
- *
- * Copyright (c) 2017-2018, Linaro Limited
- * All rights reserved.
- *
- * SPDX-License-Identifier: BSD-3-Clause
- */
-
-#ifndef PLATFORM_LINUXGENERIC_ARCH_ARM_CPU_IDLING_H
-#define PLATFORM_LINUXGENERIC_ARCH_ARM_CPU_IDLING_H
-
-#ifndef PLATFORM_LINUXGENERIC_ARCH_ARM_ODP_CPU_H
-#error This file should not be included directly, please include odp_cpu.h
-#endif
-
-#ifndef CONFIG_WFE
-
-#include "../default/odp_cpu_idling.h"
-
-#else /* CONFIG_WFE */
-
-static inline void sevl(void)
-{
-	__asm__ volatile("sevl" : : : );
-}
-
-static inline int wfe(void)
-{
-	__asm__ volatile("wfe" : : : "memory");
-	return 1;
-}
-
-#define monitor128(addr, mo) lld((addr), (mo))
-#define monitor64(addr, mo) ll64((addr), (mo))
-#define monitor32(addr, mo) ll32((addr), (mo))
-#define monitor8(addr, mo) ll8((addr), (mo))
-#endif /* CONFIG_WFE */
-
-#endif  /* PLATFORM_LINUXGENERIC_ARCH_ARM_CPU_IDLING_H */
diff --git a/platform/linux-generic/arch/arm/odp_llsc.h b/platform/linux-generic/arch/arm/odp_llsc.h
deleted file mode 100644
index 2fea6a0dc..000000000
--- a/platform/linux-generic/arch/arm/odp_llsc.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/* Copyright (c) 2017, ARM Limited. All rights reserved.
- *
- * Copyright (c) 2017-2018, Linaro Limited
- * All rights reserved.
- *
- * SPDX-License-Identifier: BSD-3-Clause
- */
-
-#ifndef PLATFORM_LINUXGENERIC_ARCH_ARM_LLSC_H
-#define PLATFORM_LINUXGENERIC_ARCH_ARM_LLSC_H
-
-#ifndef PLATFORM_LINUXGENERIC_ARCH_ARM_ODP_CPU_H
-#error This file should not be included directly, please include odp_cpu.h
-#endif
-
-static inline uint32_t ll8(uint8_t *var, int mm)
-{
-	uint8_t old;
-
-	__asm__ volatile("ldrexb %0, [%1]"
-			 : "=&r" (old)
-			 : "r" (var)
-			 : );
-	/* Barrier after an acquiring load */
-	if (mm == __ATOMIC_ACQUIRE)
-		_odp_dmb();
-	return old;
-}
-
-static inline uint32_t ll(uint32_t *var, int mm)
-{
-	uint32_t old;
-
-	__asm__ volatile("ldrex %0, [%1]"
-			 : "=&r" (old)
-			 : "r" (var)
-			 : );
-	/* Barrier after an acquiring load */
-	if (mm == __ATOMIC_ACQUIRE)
-		_odp_dmb();
-	return old;
-}
-
-#define ll32(a, b) ll((a), (b))
-
-/* Return 0 on success, 1 on failure */
-static inline uint32_t sc(uint32_t *var, uint32_t neu, int mm)
-{
-	uint32_t ret;
-
-	/* Barrier before a releasing store */
-	if (mm == __ATOMIC_RELEASE)
-		_odp_dmb();
-	__asm__ volatile("strex %0, %1, [%2]"
-			 : "=&r" (ret)
-			 : "r" (neu), "r" (var)
-			 : );
-	return ret;
-}
-
-#define sc32(a, b, c) sc((a), (b), (c))
-
-static inline uint64_t lld(uint64_t *var, int mm)
-{
-	uint64_t old;
-
-	__asm__ volatile("ldrexd %0, %H0, [%1]"
-			 : "=&r" (old)
-			 : "r" (var)
-			 : );
-	/* Barrier after an acquiring load */
-	if (mm == __ATOMIC_ACQUIRE)
-		_odp_dmb();
-	return old;
-}
-
-#define ll64(a, b) lld((a), (b))
-
-/* Return 0 on success, 1 on failure */
-static inline uint32_t scd(uint64_t *var, uint64_t neu, int mm)
-{
-	uint32_t ret;
-
-	/* Barrier before a releasing store */
-	if (mm == __ATOMIC_RELEASE)
-		_odp_dmb();
-	__asm__ volatile("strexd %0, %1, %H1, [%2]"
-			 : "=&r" (ret)
-			 : "r" (neu), "r" (var)
-			 : );
-	return ret;
-}
-
-#define sc64(a, b, c) scd((a), (b), (c))
-
-#endif  /* PLATFORM_LINUXGENERIC_ARCH_ARM_LLSC_H */
diff --git a/platform/linux-generic/arch/default/odp/api/abi/atomic_generic.h b/platform/linux-generic/arch/default/odp/api/abi/atomic_generic.h
index af435e495..c6ed86363 100644
--- a/platform/linux-generic/arch/default/odp/api/abi/atomic_generic.h
+++ b/platform/linux-generic/arch/default/odp/api/abi/atomic_generic.h
@@ -197,11 +197,11 @@ static inline int _odp_atomic_cas_acq_rel_u128(odp_atomic_u128_t *atom, odp_u128
 #define ATOMIC_CAS_OP_128(ret_ptr, old_val, new_val) \
 __extension__ ({ \
 	int *_ret_ptr = ret_ptr; \
-	odp_u128_t *_old_val = old_val; \
-	odp_u128_t _new_val = new_val; \
-	if (((_atom)->v.u64[0] == (_old_val)->u64[0]) && \
-	    ((_atom)->v.u64[1] == (_old_val)->u64[1])) { \
-		(_atom)->v = (_new_val); \
+	odp_u128_t *_cas_old = old_val; \
+	odp_u128_t _cas_new = new_val; \
+	if (((_atom)->v.u64[0] == (_cas_old)->u64[0]) && \
+	    ((_atom)->v.u64[1] == (_cas_old)->u64[1])) { \
+		(_atom)->v = (_cas_new); \
 		*(_ret_ptr) = 1; \
 	} else { \
 		*(_ret_ptr) = 0; \
diff --git a/platform/linux-generic/arch/default/odp_cpu.h b/platform/linux-generic/arch/default/odp_cpu.h
index 821956819..6b10966c6 100644
--- a/platform/linux-generic/arch/default/odp_cpu.h
+++ b/platform/linux-generic/arch/default/odp_cpu.h
@@ -21,6 +21,6 @@
 	__atomic_store_n(loc, val, __ATOMIC_RELEASE)
 
 #include "odp_atomic.h"
-#include "odp_cpu_idling.h"
+#include "odp_wait_until.h"
 
 #endif
diff --git a/platform/linux-generic/arch/default/odp_cpu_idling.h b/platform/linux-generic/arch/default/odp_cpu_idling.h
deleted file mode 100644
index 9d23ad20d..000000000
--- a/platform/linux-generic/arch/default/odp_cpu_idling.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/* Copyright (c) 2017, ARM Limited. All rights reserved.
- *
- * Copyright (c) 2017-2018, Linaro Limited
- * All rights reserved.
- *
- * SPDX-License-Identifier: BSD-3-Clause
- */
-
-#ifndef ODP_DEFAULT_CPU_IDLING_H_
-#define ODP_DEFAULT_CPU_IDLING_H_
-
-/******************************************************************************
- * Idle mgmt
- *****************************************************************************/
-
-static inline void sevl(void)
-{
-	/* empty */
-}
-
-static inline int wfe(void)
-{
-	return 1;
-}
-
-#define monitor128(addr, mo) __atomic_load_n((addr), (mo))
-#define monitor64(addr, mo) __atomic_load_n((addr), (mo))
-#define monitor32(addr, mo) __atomic_load_n((addr), (mo))
-#define monitor8(addr, mo) __atomic_load_n((addr), (mo))
-
-#endif
diff --git a/platform/linux-generic/arch/default/odp_wait_until.h b/platform/linux-generic/arch/default/odp_wait_until.h
new file mode 100644
index 000000000..c51f4355e
--- /dev/null
+++ b/platform/linux-generic/arch/default/odp_wait_until.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2024 Nokia
+ */
+
+#ifndef ODP_DEFAULT_WAIT_UNTIL_H_
+#define ODP_DEFAULT_WAIT_UNTIL_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <odp/api/plat/cpu_inlines.h>
+
+#include <stdint.h>
+
+/**
+ * The _odp_wait_until_eq_*() functions defined in this header are intended to
+ * be used only with the scalable scheduler and queue implementations. Even
+ * though these functions use standard non-atomic parameter types, the
+ * parameters must only be operated using atomic operations. If new functions
+ * are added to this file, they should use _odp_wait_until_equal_*() prefix and
+ * atomic parameter types.
+ */
+
+static inline void _odp_wait_until_eq_u32(uint32_t *val, uint32_t expected)
+{
+	while (__atomic_load_n(val, __ATOMIC_RELAXED) != expected)
+		odp_cpu_pause();
+}
+
+static inline void _odp_wait_until_eq_bitset(bitset_t *val, bitset_t expected)
+{
+	while (__atomic_load_n(val, __ATOMIC_RELAXED) != expected)
+		odp_cpu_pause();
+}
+
+static inline void _odp_wait_until_eq_acq_u8(uint8_t *val, uint8_t expected)
+{
+	while (__atomic_load_n(val, __ATOMIC_ACQUIRE) != expected)
+		odp_cpu_pause();
+}
+
+static inline void _odp_wait_until_eq_acq_u32(uint32_t *val, uint32_t expected)
+{
+	while (__atomic_load_n(val, __ATOMIC_ACQUIRE) != expected)
+		odp_cpu_pause();
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/platform/linux-generic/arch/x86/odp_time_cpu.c b/platform/linux-generic/arch/x86/odp_time_cpu.c
index aa00ac04e..ab897296d 100644
--- a/platform/linux-generic/arch/x86/odp_time_cpu.c
+++ b/platform/linux-generic/arch/x86/odp_time_cpu.c
@@ -1,7 +1,6 @@
-/* Copyright (c) 2015-2018, Linaro Limited
- * All rights reserved.
- *
- * SPDX-License-Identifier:     BSD-3-Clause
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2015-2018 Linaro Limited
+ * Copyright (c) 2024 Nokia
  */
 
 #include <odp_posix_extensions.h>
@@ -14,42 +13,77 @@
 #include <odp_debug_internal.h>
 
 #include <time.h>
+#include <errno.h>
+#include <string.h>
 
-/* Measure TSC frequency. Frequency information registers are defined for x86,
- * but those are often not enumerated. */
+static int nwait(uint64_t nsec)
+{
+	struct timespec ts1, ts2;
+	uint64_t diff;
+
+	if (clock_gettime(CLOCK_MONOTONIC_RAW, &ts1))
+		return 1;
+
+	do {
+		if (clock_gettime(CLOCK_MONOTONIC_RAW, &ts2))
+			return 1;
+
+		diff = (ts2.tv_sec - ts1.tv_sec) * ODP_TIME_SEC_IN_NS +
+			ts2.tv_nsec - ts1.tv_nsec;
+	} while (diff < nsec);
+
+	return 0;
+}
+
+static void sort(uint64_t values[], int num)
+{
+	for (int n = 0; n < num; n++) {
+		for (int i = n + 1; i < num; i++) {
+			if (values[i] < values[n]) {
+				uint64_t tmp = values[i];
+
+				values[i] = values[n];
+				values[n] = tmp;
+			}
+		}
+	}
+}
+
+static uint64_t median(uint64_t values[], int num)
+{
+	sort(values, num);
+	if (num % 2 == 0)
+		return (values[num / 2 - 1] + values[num / 2]) / 2;
+	else
+		return values[num / 2];
+}
+
+/* Measure TSC frequency. */
 uint64_t _odp_time_cpu_global_freq(void)
 {
-	struct timespec sleep, ts1, ts2;
-	uint64_t t1, t2, ts_nsec, cycles, hz;
+	struct timespec ts1, ts2;
+	uint64_t t1, t2, ts_nsec, cycles;
 	int i;
-	uint64_t avg = 0;
-	int rounds = 3;
+	const int rounds = 6; /* first round is warmup */
 	int warm_up = 1;
+	uint64_t hz[rounds];
 
 	for (i = 0; i < rounds; i++) {
-		sleep.tv_sec = 0;
+		uint64_t wait_nsec = ODP_TIME_SEC_IN_NS / 50;
 
 		if (warm_up)
-			sleep.tv_nsec = ODP_TIME_SEC_IN_NS / 1000;
-		else
-			sleep.tv_nsec = ODP_TIME_SEC_IN_NS / 4;
+			wait_nsec = ODP_TIME_SEC_IN_NS / 1000;
 
-		if (clock_gettime(CLOCK_MONOTONIC_RAW, &ts1)) {
-			_ODP_ERR("clock_gettime() failed\n");
-			return 0;
-		}
+		if (clock_gettime(CLOCK_MONOTONIC_RAW, &ts1))
+			goto err_out;
 
 		t1 = _odp_time_cpu_global();
 
-		if (nanosleep(&sleep, NULL) < 0) {
-			_ODP_ERR("nanosleep() failed\n");
-			return 0;
-		}
+		if (nwait(wait_nsec))
+			goto err_out;
 
-		if (clock_gettime(CLOCK_MONOTONIC_RAW, &ts2)) {
-			_ODP_ERR("clock_gettime() failed\n");
-			return 0;
-		}
+		if (clock_gettime(CLOCK_MONOTONIC_RAW, &ts2))
+			goto err_out;
 
 		t2 = _odp_time_cpu_global();
 
@@ -58,13 +92,15 @@ uint64_t _odp_time_cpu_global_freq(void)
 
 		cycles = t2 - t1;
 
-		hz = (cycles * ODP_TIME_SEC_IN_NS) / ts_nsec;
+		hz[i] = (cycles * ODP_TIME_SEC_IN_NS) / ts_nsec;
 
 		if (warm_up)
 			warm_up = 0;
-		else
-			avg += hz;
 	}
 
-	return avg / (rounds - 1);
+	return median(&hz[1], rounds - 1);
+
+err_out:
+	_ODP_ERR("clock_gettime() failed (%s)\n", strerror(errno));
+	return 0;
 }
diff --git a/platform/linux-generic/example/Makefile.am b/platform/linux-generic/example/Makefile.am
new file mode 100644
index 000000000..84f337387
--- /dev/null
+++ b/platform/linux-generic/example/Makefile.am
@@ -0,0 +1,5 @@
+SUBDIRS =
+
+if WITH_ML
+SUBDIRS += ml
+endif
diff --git a/platform/linux-generic/example/ml/.gitignore b/platform/linux-generic/example/ml/.gitignore
new file mode 100644
index 000000000..d845f6bb5
--- /dev/null
+++ b/platform/linux-generic/example/ml/.gitignore
@@ -0,0 +1,5 @@
+model_explorer
+simple_linear
+mnist
+*.log
+*.trs
diff --git a/platform/linux-generic/example/ml/Makefile.am b/platform/linux-generic/example/ml/Makefile.am
new file mode 100644
index 000000000..3692b704e
--- /dev/null
+++ b/platform/linux-generic/example/ml/Makefile.am
@@ -0,0 +1,46 @@
+include $(top_srcdir)/example/Makefile.inc
+
+LDADD += -lm
+
+bin_PROGRAMS = model_explorer simple_linear mnist
+
+simple_linear_SOURCES = simple_linear.c model_read.c model_read.h
+model_explorer_SOURCES = model_explorer.c model_read.c model_read.h
+mnist_SOURCES = mnist.c model_read.c model_read.h
+
+EXTRA_DIST = \
+	odp_ml_run_mnist.sh \
+	example_digit.csv \
+	mnist-12.onnx \
+	odp_ml_run_model_explorer.sh \
+	odp_ml_run_simple_linear.sh \
+	simple_linear.onnx \
+	README.md
+
+if test_example
+TESTS = \
+	odp_ml_run_mnist.sh \
+	odp_ml_run_model_explorer.sh \
+	odp_ml_run_simple_linear.sh
+endif
+
+# If building out-of-tree, make check will not copy the scripts and data to the
+# $(builddir) assuming that all commands are run locally. However this prevents
+# running tests on a remote target using LOG_COMPILER.
+# So copy all script and data files explicitly here.
+all-local:
+	if [ "x$(srcdir)" != "x$(builddir)" ]; then \
+		for f in $(EXTRA_DIST); do \
+			if [ -e $(srcdir)/$$f ]; then \
+				mkdir -p $(builddir)/$$(dirname $$f); \
+				cp -f $(srcdir)/$$f $(builddir)/$$f; \
+			fi \
+		done \
+	fi
+
+clean-local:
+	if [ "x$(srcdir)" != "x$(builddir)" ]; then \
+		for f in $(EXTRA_DIST); do \
+			rm -f $(builddir)/$$f; \
+		done \
+	fi
diff --git a/platform/linux-generic/example/ml/README.md b/platform/linux-generic/example/ml/README.md
new file mode 100644
index 000000000..fc6a57c0a
--- /dev/null
+++ b/platform/linux-generic/example/ml/README.md
@@ -0,0 +1,94 @@
+# ML examples
+
+Machine Learning API examples demonstrate how to use ODP ML API in different tasks:
+for example simple linear computation and predicting a handwritten digit in
+a given image.
+
+## Simple Linear
+
+This example runs on a very simple model of form y = 3 * x + 4 where x is given
+as the second argument.
+
+### Generate model
+
+```bash
+python3 <odp_directory>/platform/linux-generic/test/validation/api/ml/simple_linear_gen.py
+```
+
+### Run simple linear
+
+```bash
+$ ./simple_linear 3
+.
+.
+.
+y = 3 * 3 + 4: 13
+.
+```
+
+Or run the program with multiple threads, each thread inferences on one x given in
+the input. Thus, the number of threads is the number of numbers in the second argument.
+
+```bash
+$ ./simple_linear [2,4,5]
+.
+.
+.
+y = 3 * 2 + 4: 10
+y = 3 * 5 + 4: 19
+y = 3 * 4 + 4: 16
+.
+```
+
+## MNIST
+
+This example predicts a handwritten digit in a given image. Refer to
+https://github.com/onnx/models/tree/main/validated/vision/classification/mnist
+for more information. The model file is from
+https://github.com/onnx/models/raw/main/validated/vision/classification/mnist/model/mnist-12.onnx
+(SPDX-License-Identifier: MIT).
+
+### Prepare input data
+
+The input image is stored in a csv file which contains, comma separated, the
+digit label (a number from 0 to 9) and the 784 pixel values (a number from 0 to
+255). Pixel order is left to right and then top down. The MNIST dataset is
+available in this format at https://www.kaggle.com/oddrationale/mnist-in-csv.
+
+### Run mnist
+
+```bash
+$ ./mnist mnist-12.onnx example_digit.csv
+.
+.
+.
+predicted_digit: 4, expected_digit: 4
+.
+```
+
+## Model Explorer
+
+The example prints basic model information.
+
+### Run model_explorer
+
+```bash
+$ ./model_explorer simple_linear.onnx
+.
+.
+.
+Model info
+----------
+  Model handle: 0x7fe8426ce1d8
+  Name: model-explorer
+  Model version: 1
+  Model interface version: 0
+  Index: 0
+  Number of inputs: 1
+    Input[0]: Name: x, Data_type: int32, Shape: static [1], Size: 4
+  Number of outputs: 1
+    Output[0]: Name: y, Data_type: int32, Shape: static [1], Size: 4
+.
+.
+.
+```
diff --git a/platform/linux-generic/example/ml/example_digit.csv b/platform/linux-generic/example/ml/example_digit.csv
new file mode 100644
index 000000000..2ab0f4a0c
--- /dev/null
+++ b/platform/linux-generic/example/ml/example_digit.csv
@@ -0,0 +1 @@
+4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,55,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,36,215,98,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,36,249,144,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,34,246,148,0,0,0,0,0,0,0,7,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,39,255,139,0,0,0,0,0,0,2,95,117,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,51,255,97,0,0,0,0,0,0,8,203,211,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,255,58,0,0,0,0,0,0,13,238,167,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,111,255,23,0,0,0,0,0,0,24,255,110,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,209,222,1,0,0,0,0,0,0,62,255,51,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,55,255,125,0,0,0,0,0,0,0,117,255,19,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,164,255,60,0,0,0,0,0,0,0,171,230,12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,24,235,255,178,120,89,74,72,72,72,74,246,241,121,141,153,148,83,1,0,0,0,0,0,0,0,0,0,6,121,231,255,255,255,255,255,255,255,255,255,255,255,255,255,253,173,14,0,0,0,0,0,0,0,0,0,0,1,19,44,63,76,83,83,83,83,100,255,192,66,52,45,46,34,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,39,255,138,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,68,255,113,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,104,255,84,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,147,255,52,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,190,255,23,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,25,229,210,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,50,255,117,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,91,255,34,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,49,120,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
diff --git a/platform/linux-generic/example/ml/mnist-12.onnx b/platform/linux-generic/example/ml/mnist-12.onnx
new file mode 100644
index 000000000..6661bfe3c
--- /dev/null
+++ b/platform/linux-generic/example/ml/mnist-12.onnx
diff --git a/platform/linux-generic/example/ml/mnist.c b/platform/linux-generic/example/ml/mnist.c
new file mode 100644
index 000000000..4c1066302
--- /dev/null
+++ b/platform/linux-generic/example/ml/mnist.c
@@ -0,0 +1,300 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2023 Nokia
+ */
+
+#include <odp_api.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <inttypes.h>
+
+#include "model_read.h"
+
+/**
+ * About MNIST model used in this example.
+ *
+ * The model predicts handwritten digits. It has one input and one output whose
+ * detailed information is as follows:
+ *
+ * Input:
+ *	Name: Input3, type: float32, shape: [1, 1, 28, 28]
+ *
+ * Output:
+ *	Name: Plus214_Output_0, type: float32, shape: [1, 10]
+ *
+ * Refer https://github.com/onnx/models/tree/main/validated/vision/classification/mnist
+ * for more information about the model.
+ *
+ * The model outputs the likelihood of each number before softmax, so we need to
+ * map the output to probabilities across the 10 classes with softmax function.
+ *
+ * In this example, the input image is stored in example_digit.csv file, which
+ * contains, comma separated, the digit label (a number from 0 to 9) and the 784
+ * pixel values (a number from 0 to 255). Pixel order is first left to right and
+ * then top down. The MNIST dataset is available in this format at
+ * https://www.kaggle.com/oddrationale/mnist-in-csv.
+ */
+
+#define MAX_MODEL_SIZE 30000
+#define INPUT_NUM_ELEMS 784 /* Total shape for input: 1 * 1 * 28 * 28 */
+#define OUTPUT_NUM_ELEMS 10 /* Total shape for output: 1 * 10 */
+
+static int read_digit_csv(const char *file_name, uint8_t *expected_digit, float *pixels)
+{
+	char *tmp;
+	char *token;
+	char *end;
+	FILE *digit_file;
+	size_t size, num_elem;
+	const char *delim = ","; /* Delimiter */
+	size_t num_pixel = 0;
+
+	/* Get the model file size in bytes */
+	digit_file = fopen(file_name, "rb");
+	fseek(digit_file, 0, SEEK_END);
+	size = ftell(digit_file);
+	rewind(digit_file);
+
+	tmp = malloc(size);
+	memset(tmp, 0, size);
+	num_elem = fread(tmp, size, 1, digit_file);
+
+	fclose(digit_file);
+	if (num_elem != 1) {
+		printf("Read digit file failed\n");
+		free(tmp);
+		return -1;
+	}
+
+	/* Get the first token which is the expected digit */
+	token = strtok(tmp, delim);
+	*expected_digit = (uint8_t)strtol(token, &end, 10);
+	if ((*expected_digit > 9) || (end == token)/*No numeric character*/) {
+		printf("Invalid digit %u or no numeric character available\n",
+		       *expected_digit);
+		free(tmp);
+		return -1;
+	}
+
+	/* The rest 784 numbers are pixel values */
+	token = strtok(NULL, delim);
+	while (token != NULL) {
+		pixels[num_pixel] = strtof(token, NULL);
+		num_pixel++;
+		token = strtok(NULL, delim);
+	}
+
+	if (num_pixel != INPUT_NUM_ELEMS) {
+		printf("Wrong number of pixels: %zu (expected:784)\n", num_pixel);
+		free(tmp);
+		return -1;
+	}
+
+	free(tmp);
+	return 0;
+}
+
+static int prepare_run_params(const char *file_name, uint8_t *expected_digit,
+			      odp_ml_data_seg_t *input, odp_ml_data_seg_t *output)
+{
+	input->size = INPUT_NUM_ELEMS * sizeof(float);
+	input->addr = malloc(input->size);
+	memset(input->addr, 0, input->size);
+
+	if (read_digit_csv(file_name, expected_digit, input->addr)) {
+		free(input->addr);
+		return -1;
+	}
+
+	output->size = OUTPUT_NUM_ELEMS * sizeof(float);
+	output->addr = malloc(output->size);
+	memset(output->addr, 0, output->size);
+
+	return 0;
+}
+
+static float array_max(float *arr, uint8_t arr_len)
+{
+	float max = arr[0];
+
+	for (size_t i = 1; i < arr_len; i++) {
+		if (arr[i] > max)
+			max = arr[i];
+	}
+
+	return max;
+}
+
+static void softmax(float *input, uint8_t input_len)
+{
+	float rowmax = array_max(input, input_len);
+
+	float input_exp[input_len];
+	float sum = 0.0f;
+
+	for (size_t i = 0; i != input_len; ++i) {
+		input_exp[i] = exp(input[i] - rowmax);
+		sum += input_exp[i];
+	}
+
+	for (size_t i = 0; i != input_len; ++i)
+		input[i] = input_exp[i] / sum;
+}
+
+static uint8_t index_of_max(float *arr, uint8_t arr_len)
+{
+	uint8_t i = 0;
+	uint8_t max_index = 0;
+	float max = arr[0];
+
+	for (i = 1; i < arr_len; i++) {
+		if (arr[i] > max) {
+			max = arr[i];
+			max_index = i;
+		}
+	}
+
+	return max_index;
+}
+
+int main(int argc, char *argv[])
+{
+	const char *model_file;
+	const char *input_file;
+	float *probabilities;
+	uint8_t expected_digit;
+	uint8_t predicted_digit;
+	odp_instance_t inst;
+	odp_ml_data_t data;
+	odp_ml_model_t ml_model;
+	odp_ml_data_seg_t input;
+	odp_ml_data_seg_t output;
+	odp_ml_capability_t capa;
+	odp_ml_config_t ml_config;
+	odp_ml_model_param_t model_param;
+	int ret = 0;
+
+	if (argc != 3) {
+		printf("Please provide an input image file for classification.\n"
+		       "\nUsage:\n"
+		       "  %s model_file input_image\n"
+		       "\nThis example classifies digit written on the input image.\n\n",
+		       argv[0]);
+		return -1;
+	}
+
+	model_file = argv[1];
+	input_file = argv[2];
+
+	if (odp_init_global(&inst, NULL, NULL)) {
+		printf("Global init failed.\n");
+		return -1;
+	}
+
+	if (odp_init_local(inst, ODP_THREAD_CONTROL)) {
+		printf("Local init failed.\n");
+		return -1;
+	}
+
+	if (odp_ml_capability(&capa)) {
+		printf("odp_ml_capability() failed\n");
+		ret = -1;
+		goto odp_term;
+	}
+
+	if (MAX_MODEL_SIZE > capa.max_model_size) {
+		printf("Configured max model size %d exceeds max mode size %" PRIu64 " in capa\n",
+		       MAX_MODEL_SIZE, capa.max_model_size);
+		ret = -1;
+		goto odp_term;
+	}
+
+	odp_ml_config_init(&ml_config);
+	ml_config.max_model_size = MAX_MODEL_SIZE;
+	ml_config.load_mode_mask = ODP_ML_COMPL_MODE_SYNC;
+	ml_config.run_mode_mask = ODP_ML_COMPL_MODE_SYNC;
+
+	if (odp_ml_config(&ml_config)) {
+		printf("odp_ml_config() failed\n");
+		ret = -1;
+		goto odp_term;
+	}
+
+	odp_ml_model_param_init(&model_param);
+	if (read_model_from_file(model_file, &model_param)) {
+		printf("Read model file failed\n");
+		ret = -1;
+		goto odp_term;
+	}
+
+	ml_model = odp_ml_model_create("mnist", &model_param);
+	free(model_param.model);
+	if (ml_model == ODP_ML_MODEL_INVALID) {
+		printf("odp_ml_model_create() failed\n");
+		ret = -1;
+		goto odp_term;
+	}
+
+	odp_ml_model_print(ml_model);
+
+	if (odp_ml_model_load(ml_model, NULL)) {
+		printf("odp_ml_model_load() failed\n");
+		ret = -1;
+		goto destroy_model;
+	}
+
+	data.num_input_seg = 1;
+	data.num_output_seg = 1;
+	data.input_seg = &input;
+	data.output_seg = &output;
+	if (prepare_run_params(input_file, &expected_digit, &input, &output)) {
+		printf("prepare_run_params() failed\n");
+		ret = -1;
+		goto unload;
+	}
+
+	if (odp_ml_run(ml_model, &data, NULL) != 1) {
+		printf("odp_ml_model_run() failed\n");
+		ret = -1;
+		goto free_model_io;
+	}
+
+	probabilities = output.addr;
+
+	/* Post-process the model output */
+	softmax(probabilities, OUTPUT_NUM_ELEMS);
+	predicted_digit = index_of_max(probabilities, OUTPUT_NUM_ELEMS);
+	printf("predicted_digit: %u, expected_digit: %u\n", predicted_digit, expected_digit);
+
+free_model_io:
+	free(input.addr);
+	free(output.addr);
+
+unload:
+	if (odp_ml_model_unload(ml_model, NULL)) {
+		printf("odp_ml_model_unload() failed\n");
+		ret = -1;
+		goto odp_term;
+	}
+
+destroy_model:
+	/* Destroy the model */
+	if (odp_ml_model_destroy(ml_model)) {
+		printf("odp_ml_model_destroy() failed\n");
+		ret = -1;
+	}
+
+odp_term:
+	if (odp_term_local()) {
+		printf("Local term failed.\n");
+		return -1;
+	}
+
+	if (odp_term_global(inst)) {
+		printf("Global term failed.\n");
+		return -1;
+	}
+
+	return ret;
+}
diff --git a/platform/linux-generic/example/ml/model_explorer.c b/platform/linux-generic/example/ml/model_explorer.c
new file mode 100644
index 000000000..bd449b032
--- /dev/null
+++ b/platform/linux-generic/example/ml/model_explorer.c
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2023 Nokia
+ */
+
+#include <odp_api.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "model_read.h"
+
+/**
+ * Read basic model information, e.g. inputs/outputs.
+ */
+
+int main(int argc, char *argv[])
+{
+	odp_instance_t inst;
+	odp_ml_model_t ml_model;
+	odp_ml_capability_t capa;
+	odp_ml_config_t ml_config;
+	odp_ml_model_param_t model_param;
+	int ret = 0;
+
+	if (argc != 2) {
+		printf("Please specify model path\n"
+		       "\nUsage:\n"
+		       "  %s model_path\n"
+		       "\nThis example prints model information\n\n",
+		       argv[0]);
+		return -1;
+	}
+
+	if (odp_init_global(&inst, NULL, NULL)) {
+		printf("Global init failed.\n");
+		return -1;
+	}
+
+	if (odp_init_local(inst, ODP_THREAD_CONTROL)) {
+		printf("Local init failed.\n");
+		return -1;
+	}
+
+	if (odp_ml_capability(&capa)) {
+		printf("odp_ml_capability() failed\n");
+		ret = -1;
+		goto odp_term;
+	}
+
+	odp_ml_config_init(&ml_config);
+	ml_config.max_model_size = capa.max_model_size;
+	ml_config.load_mode_mask = ODP_ML_COMPL_MODE_SYNC;
+	ml_config.run_mode_mask = ODP_ML_COMPL_MODE_SYNC;
+
+	if (odp_ml_config(&ml_config)) {
+		printf("odp_ml_config() failed\n");
+		ret = -1;
+		goto odp_term;
+	}
+
+	odp_ml_model_param_init(&model_param);
+	if (read_model_from_file(argv[1], &model_param)) {
+		ret = -1;
+		goto odp_term;
+	}
+
+	ml_model = odp_ml_model_create("model-explorer", &model_param);
+	free(model_param.model);
+	if (ml_model == ODP_ML_MODEL_INVALID) {
+		printf("odp_ml_model_create failed.\n");
+		ret = -1;
+		goto odp_term;
+	}
+
+	odp_ml_model_print(ml_model);
+
+odp_term:
+	if (odp_term_local()) {
+		printf("Local term failed.\n");
+		return -1;
+	}
+
+	if (odp_term_global(inst)) {
+		printf("Global term failed.\n");
+		return -1;
+	}
+
+	return ret;
+}
diff --git a/platform/linux-generic/example/ml/model_read.c b/platform/linux-generic/example/ml/model_read.c
new file mode 100644
index 000000000..7aa20bf35
--- /dev/null
+++ b/platform/linux-generic/example/ml/model_read.c
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2023 Nokia
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <odp_api.h>
+
+#include "model_read.h"
+
+int read_model_from_file(const char *file_name, odp_ml_model_param_t *model_param)
+{
+	FILE *model_file;
+	/* Number of elements successfully read */
+	size_t num_elem;
+
+	/* Get the model file size in bytes */
+	model_file = fopen(file_name, "rb");
+	if (model_file == NULL) {
+		perror("Failed to open model file");
+		return -1;
+	}
+
+	fseek(model_file, 0, SEEK_END);
+	model_param->size = ftell(model_file);
+	rewind(model_file);
+
+	/* Allocate memory for model buffer */
+	model_param->model = malloc(model_param->size);
+	memset(model_param->model, 0, model_param->size);
+	if (!model_param->model) {
+		printf("Allocating memory for model buffer failed\n");
+		return -1;
+	}
+
+	/* Read the model file */
+	num_elem = fread(model_param->model, model_param->size, 1, model_file);
+	fclose(model_file);
+	if (num_elem != 1) {
+		printf("Read model file failed\n");
+		free(model_param->model);
+		return -1;
+	}
+
+	return 0;
+}
diff --git a/platform/linux-generic/example/ml/model_read.h b/platform/linux-generic/example/ml/model_read.h
new file mode 100644
index 000000000..df2062d5f
--- /dev/null
+++ b/platform/linux-generic/example/ml/model_read.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2023 Nokia
+ */
+
+#ifndef ODP_MODEL_READ_H_
+#define ODP_MODEL_READ_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <odp_api.h>
+
+/**
+ * Read model binaries from model file
+ *
+ * @param file_name     The name of model file
+ * @param model_param   Model parameter where model content and size are read to
+ *
+ * @retval      0 on success
+ * @retval      < 0 on failure
+ */
+int read_model_from_file(const char *file_name, odp_ml_model_param_t *model_param);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/platform/linux-generic/example/ml/odp_ml_run_mnist.sh b/platform/linux-generic/example/ml/odp_ml_run_mnist.sh
new file mode 100755
index 000000000..f83d6f60d
--- /dev/null
+++ b/platform/linux-generic/example/ml/odp_ml_run_mnist.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+#
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright (c) 2023 Nokia
+#
+set -e
+
+# wget https://github.com/onnx/models/raw/main/validated/vision/classification/mnist/model/mnist-12.onnx
+./mnist${EXEEXT} mnist-12.onnx example_digit.csv
diff --git a/platform/linux-generic/example/ml/odp_ml_run_model_explorer.sh b/platform/linux-generic/example/ml/odp_ml_run_model_explorer.sh
new file mode 100755
index 000000000..7f9fed5a6
--- /dev/null
+++ b/platform/linux-generic/example/ml/odp_ml_run_model_explorer.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+#
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright (c) 2023 Nokia
+#
+set -e
+
+./model_explorer${EXEEXT} simple_linear.onnx
diff --git a/platform/linux-generic/example/ml/odp_ml_run_simple_linear.sh b/platform/linux-generic/example/ml/odp_ml_run_simple_linear.sh
new file mode 100755
index 000000000..b394b61a8
--- /dev/null
+++ b/platform/linux-generic/example/ml/odp_ml_run_simple_linear.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+#
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright (c) 2023 Nokia
+#
+set -e
+
+./simple_linear${EXEEXT} [2,4,5]
diff --git a/platform/linux-generic/example/ml/simple_linear.c b/platform/linux-generic/example/ml/simple_linear.c
new file mode 100644
index 000000000..3417219c7
--- /dev/null
+++ b/platform/linux-generic/example/ml/simple_linear.c
@@ -0,0 +1,281 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2023 Nokia
+ */
+
+#include <odp_api.h>
+#include <odp/helper/odph_api.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+
+#include "model_read.h"
+
+/**
+ * About model simple_linear.onnx used in this example.
+ *
+ * Model info:
+ * Inputs: name: x, type: int32, shape: [1]
+ * Outputs: name: y, type: int32, shape: [1]
+ *
+ * The model is of form y = 3 * x + 4 where x is given as the second argument.
+ * Thus when x = 5, the output y should be 19.
+ */
+
+#define NUM_INPUTS	1
+#define NUM_OUTPUTS	1
+#define MAX_NUM_WORKERS	10
+#define MAX_MODEL_SIZE	500
+
+typedef struct infer_param_t {
+	int32_t x;
+	odp_ml_model_t ml_model;
+} infer_param_t;
+
+typedef struct {
+	odp_shm_t shm;
+	/* Thread specific arguments */
+	infer_param_t infer_param[MAX_NUM_WORKERS];
+} thread_args_t;
+
+/* Global pointer to thread_args */
+static thread_args_t *thread_args;
+
+static int run_inference(void *infer_param)
+{
+	int32_t y;
+	odp_ml_data_t data;
+	odp_ml_data_seg_t input;
+	odp_ml_data_seg_t output;
+	infer_param_t *param = (infer_param_t *)infer_param;
+
+	data.num_input_seg = NUM_INPUTS;
+	data.input_seg = &input;
+	input.addr = &param->x;
+	input.size = sizeof(int32_t);
+
+	data.num_output_seg = NUM_OUTPUTS;
+	data.output_seg = &output;
+	output.addr = &y;
+	output.size = sizeof(int32_t);
+
+	while (1) {
+		int ret = odp_ml_run(param->ml_model, &data, NULL);
+
+		if (ret == 1)
+			break;
+
+		if (ret < 0) {
+			ODPH_ERR("odp_ml_model_run() failed: %d\n", ret);
+			return -1;
+		}
+	}
+
+	printf("y = 3 * %d + 4: %d\n", param->x, y);
+
+	return 0;
+}
+
+static int parse_argv1(char *argv1, uint32_t *num, int32_t *x)
+{
+	char *token;
+	int i;
+
+	if (!strstr(argv1, "[")) {
+		*num = 1;
+		*x = strtol(argv1, NULL, 10);
+		return 0;
+	}
+
+	token = strtok(argv1, "[,]");
+	if (token == NULL) {
+		ODPH_ERR("Invalid argv[1]\n");
+		return -1;
+	}
+	x[0] = strtol(token, NULL, 10);
+
+	for (i = 0; i < MAX_NUM_WORKERS; i++) {
+		token = strtok(NULL, "[,]");
+		if (token == NULL)
+			break;
+
+		x[i + 1] = strtol(token, NULL, 10);
+	}
+
+	if (i == MAX_NUM_WORKERS) {
+		ODPH_ERR("Too much xs, maximum number is: %d\n", MAX_NUM_WORKERS);
+		return -1;
+	}
+
+	*num = i + 1;
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	odp_shm_t shm;
+	int num_workers;
+	odp_instance_t inst;
+	odp_cpumask_t cpumask;
+	odp_ml_model_t ml_model;
+	odp_ml_capability_t capa;
+	odp_ml_config_t ml_config;
+	int32_t x[MAX_NUM_WORKERS];
+	odp_ml_model_param_t model_param;
+	odph_thread_t thread_tbl[MAX_NUM_WORKERS];
+	odph_thread_common_param_t thr_common;
+	odph_thread_param_t thr_param[MAX_NUM_WORKERS];
+	char cpumaskstr[ODP_CPUMASK_STR_SIZE];
+	int ret = 0;
+	uint32_t num = 0;
+
+	if (argc != 2) {
+		ODPH_ERR("Please specify x\n"
+			 "\nUsage:\n"
+			 "  %s x\n"
+			 "\nThis example runs inference on model y = 3x + 4\n\n",
+			 argv[0]);
+		return -1;
+	}
+
+	if (parse_argv1(argv[1], &num, x))
+		return -1;
+
+	if (odp_init_global(&inst, NULL, NULL)) {
+		ODPH_ERR("Global init failed.\n");
+		return -1;
+	}
+
+	if (odp_init_local(inst, ODP_THREAD_CONTROL)) {
+		ODPH_ERR("Local init failed.\n");
+		return -1;
+	}
+
+	if (odp_ml_capability(&capa)) {
+		ODPH_ERR("odp_ml_capability() failed\n");
+		ret = -1;
+		goto odp_term;
+	}
+
+	if (MAX_MODEL_SIZE > capa.max_model_size) {
+		ODPH_ERR("Configured max model size %d exceeds max mode size %" PRIu64 " in capa\n",
+			 MAX_MODEL_SIZE, capa.max_model_size);
+		ret = -1;
+		goto odp_term;
+	}
+
+	/* Set ML configuration parameter */
+	odp_ml_config_init(&ml_config);
+	ml_config.max_model_size = MAX_MODEL_SIZE;
+	ml_config.load_mode_mask = ODP_ML_COMPL_MODE_SYNC;
+	ml_config.run_mode_mask = ODP_ML_COMPL_MODE_SYNC;
+
+	if (odp_ml_config(&ml_config)) {
+		ODPH_ERR("odp_ml_config() failed\n");
+		ret = -1;
+		goto odp_term;
+	}
+
+	odp_ml_model_param_init(&model_param);
+	if (read_model_from_file("simple_linear.onnx", &model_param)) {
+		ret = -1;
+		goto odp_term;
+	}
+
+	ml_model = odp_ml_model_create("simple linear", &model_param);
+	free(model_param.model);
+	if (ml_model == ODP_ML_MODEL_INVALID) {
+		ODPH_ERR("odp_ml_model_create() failed\n");
+		ret = -1;
+		goto odp_term;
+	}
+
+	odp_ml_model_print(ml_model);
+	odp_ml_print();
+
+	if (odp_ml_model_load(ml_model, NULL)) {
+		ODPH_ERR("odp_ml_model_load() failed\n");
+		ret = -1;
+		goto destroy_model;
+	}
+
+	/* Reserve memory for args from shared mem */
+	shm = odp_shm_reserve("_thread_args", sizeof(thread_args_t),
+			      ODP_CACHE_LINE_SIZE, 0);
+	if (shm == ODP_SHM_INVALID) {
+		ODPH_ERR("Error: shared mem reserve failed.\n");
+		ret = -1;
+		goto unload;
+	}
+
+	thread_args = odp_shm_addr(shm);
+	if (thread_args == NULL) {
+		ODPH_ERR("Error: shared mem alloc failed.\n");
+		ret = -1;
+		goto free_shm;
+	}
+	thread_args->shm = shm;
+	memset(thread_args, 0, sizeof(thread_args_t));
+
+	/* Prepare inference parameter */
+	for (uint32_t i = 0; i < num; i++) {
+		thread_args->infer_param[i].x = x[i];
+		thread_args->infer_param[i].ml_model = ml_model;
+	}
+
+	num_workers = odp_cpumask_default_worker(&cpumask, num);
+	(void)odp_cpumask_to_str(&cpumask, cpumaskstr, sizeof(cpumaskstr));
+
+	printf("num worker threads: %i\n", num_workers);
+	printf("first CPU:          %i\n", odp_cpumask_first(&cpumask));
+	printf("cpu mask:           %s\n", cpumaskstr);
+
+	/* Create and init worker threads */
+	memset(thread_tbl, 0, sizeof(thread_tbl));
+	odph_thread_common_param_init(&thr_common);
+	thr_common.instance    = inst;
+	thr_common.cpumask     = &cpumask;
+
+	for (int i = 0; i < num_workers; ++i) {
+		odph_thread_param_init(&thr_param[i]);
+		thr_param[i].start = run_inference;
+		thr_param[i].arg = &thread_args->infer_param[i];
+		thr_param[i].thr_type = ODP_THREAD_WORKER;
+	}
+
+	odph_thread_create(thread_tbl, &thr_common, thr_param, num_workers);
+
+	odph_thread_join(thread_tbl, num_workers);
+
+free_shm:
+	if (odp_shm_free(shm)) {
+		ODPH_ERR("Error: shm free global data\n");
+		return -1;
+	}
+
+unload:
+	/* Unload a model */
+	if (odp_ml_model_unload(ml_model, NULL)) {
+		ODPH_ERR("odp_ml_model_load() failed\n");
+		ret = -1;
+	}
+
+destroy_model:
+	if (odp_ml_model_destroy(ml_model)) {
+		ODPH_ERR("odp_ml_model_destroy() failed\n");
+		ret = -1;
+	}
+
+odp_term:
+	if (odp_term_local()) {
+		ODPH_ERR("Local term failed.\n");
+		return -1;
+	}
+
+	if (odp_term_global(inst)) {
+		ODPH_ERR("Global term failed.\n");
+		return -1;
+	}
+
+	return ret;
+}
diff --git a/platform/linux-generic/example/ml/simple_linear.onnx b/platform/linux-generic/example/ml/simple_linear.onnx
new file mode 100644
index 000000000..45c4b95b9
--- /dev/null
+++ b/platform/linux-generic/example/ml/simple_linear.onnx
diff --git a/platform/linux-generic/include-abi/odp/api/abi/atomic.h b/platform/linux-generic/include-abi/odp/api/abi/atomic.h
index 9c87f9cb8..4f481f913 100644
--- a/platform/linux-generic/include-abi/odp/api/abi/atomic.h
+++ b/platform/linux-generic/include-abi/odp/api/abi/atomic.h
@@ -80,7 +80,7 @@ typedef struct ODP_ALIGNED(sizeof(odp_u128_t)) odp_atomic_u128_s {
 
 #endif
 
-/** @ingroup odp_atomic
+/** @addtogroup odp_atomic
  *  @{
  */
 
diff --git a/platform/linux-generic/include-abi/odp/api/abi/buffer_types.h b/platform/linux-generic/include-abi/odp/api/abi/buffer_types.h
index 1d54bab07..63067268c 100644
--- a/platform/linux-generic/include-abi/odp/api/abi/buffer_types.h
+++ b/platform/linux-generic/include-abi/odp/api/abi/buffer_types.h
@@ -21,7 +21,7 @@ extern "C" {
 #include <odp/api/std_types.h>
 #include <odp/api/plat/strong_types.h>
 
-/** @ingroup odp_buffer
+/** @addtogroup odp_buffer
  *  @{
  */
 
diff --git a/platform/linux-generic/include-abi/odp/api/abi/classification.h b/platform/linux-generic/include-abi/odp/api/abi/classification.h
index 342f4124c..d63763dbd 100644
--- a/platform/linux-generic/include-abi/odp/api/abi/classification.h
+++ b/platform/linux-generic/include-abi/odp/api/abi/classification.h
@@ -19,7 +19,7 @@ extern "C" {
 
 #include <odp/api/plat/strong_types.h>
 
-/** @ingroup odp_classification
+/** @addtogroup odp_classification
  *  @{
  */
 
diff --git a/platform/linux-generic/include-abi/odp/api/abi/comp.h b/platform/linux-generic/include-abi/odp/api/abi/comp.h
index ac3d3a4a9..45681e961 100644
--- a/platform/linux-generic/include-abi/odp/api/abi/comp.h
+++ b/platform/linux-generic/include-abi/odp/api/abi/comp.h
@@ -15,7 +15,7 @@ extern "C" {
 
 #include <stdint.h>
 
-/** @ingroup odp_compression
+/** @addtogroup odp_compression
  *  @{
  */
 
diff --git a/platform/linux-generic/include-abi/odp/api/abi/crypto_types.h b/platform/linux-generic/include-abi/odp/api/abi/crypto_types.h
index d49caf89a..b1e4aa5ae 100644
--- a/platform/linux-generic/include-abi/odp/api/abi/crypto_types.h
+++ b/platform/linux-generic/include-abi/odp/api/abi/crypto_types.h
@@ -22,7 +22,7 @@ extern "C" {
 
 #include <odp/api/plat/strong_types.h>
 
-/** @ingroup odp_crypto
+/** @addtogroup odp_crypto
  *  @{
  */
 
diff --git a/platform/linux-generic/include-abi/odp/api/abi/dma_types.h b/platform/linux-generic/include-abi/odp/api/abi/dma_types.h
index 768591b10..d5bee0374 100644
--- a/platform/linux-generic/include-abi/odp/api/abi/dma_types.h
+++ b/platform/linux-generic/include-abi/odp/api/abi/dma_types.h
@@ -13,7 +13,7 @@ extern "C" {
 
 #include <odp/api/plat/strong_types.h>
 
-/** @ingroup odp_dma
+/** @addtogroup odp_dma
  *  @{
  */
 
diff --git a/platform/linux-generic/include-abi/odp/api/abi/event_types.h b/platform/linux-generic/include-abi/odp/api/abi/event_types.h
index 8ff5acd6b..01ee66cd3 100644
--- a/platform/linux-generic/include-abi/odp/api/abi/event_types.h
+++ b/platform/linux-generic/include-abi/odp/api/abi/event_types.h
@@ -1,5 +1,5 @@
 /* Copyright (c) 2015-2018, Linaro Limited
- * Copyright (c) 2022, Nokia
+ * Copyright (c) 2022-2023, Nokia
  * All rights reserved.
  *
  * SPDX-License-Identifier:     BSD-3-Clause
@@ -20,7 +20,7 @@ extern "C" {
 
 #include <odp/api/plat/strong_types.h>
 
-/** @ingroup odp_event
+/** @addtogroup odp_event
  *  @{
  */
 
@@ -36,6 +36,7 @@ typedef enum odp_event_type_t {
 	ODP_EVENT_PACKET_VECTOR = 6,
 	ODP_EVENT_PACKET_TX_COMPL = 7,
 	ODP_EVENT_DMA_COMPL = 8,
+	ODP_EVENT_ML_COMPL = 9
 } odp_event_type_t;
 
 typedef enum odp_event_subtype_t {
@@ -43,7 +44,9 @@ typedef enum odp_event_subtype_t {
 	ODP_EVENT_PACKET_BASIC = 1,
 	ODP_EVENT_PACKET_CRYPTO = 2,
 	ODP_EVENT_PACKET_IPSEC = 3,
-	ODP_EVENT_PACKET_COMP = 4
+	ODP_EVENT_PACKET_COMP = 4,
+	ODP_EVENT_ML_COMPL_LOAD = 5,
+	ODP_EVENT_ML_COMPL_RUN = 6
 } odp_event_subtype_t;
 
 /**
diff --git a/platform/linux-generic/include-abi/odp/api/abi/ipsec_types.h b/platform/linux-generic/include-abi/odp/api/abi/ipsec_types.h
index 376666ded..1c5501997 100644
--- a/platform/linux-generic/include-abi/odp/api/abi/ipsec_types.h
+++ b/platform/linux-generic/include-abi/odp/api/abi/ipsec_types.h
@@ -22,7 +22,7 @@ extern "C" {
 
 #include <odp/api/plat/strong_types.h>
 
-/** @ingroup odp_ipsec
+/** @addtogroup odp_ipsec
  *  @{
  */
 
diff --git a/platform/linux-generic/include-abi/odp/api/abi/ml_types.h b/platform/linux-generic/include-abi/odp/api/abi/ml_types.h
new file mode 100644
index 000000000..0fdb7a8dc
--- /dev/null
+++ b/platform/linux-generic/include-abi/odp/api/abi/ml_types.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2021-2023 Nokia
+ */
+
+#ifndef ODP_API_ABI_ML_TYPES_H_
+#define ODP_API_ABI_ML_TYPES_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <odp/api/std_types.h>
+#include <odp/api/plat/strong_types.h>
+
+/** @internal Implementation specific ML parameters */
+struct _odp_ml_model_extra_param_t {
+	/** @internal Dummy field to avoid empty struct */
+	char dummy;
+};
+
+/** @addtogroup odp_ml
+ *  @{
+ */
+
+typedef ODP_HANDLE_T(odp_ml_model_t);
+typedef ODP_HANDLE_T(odp_ml_compl_t);
+typedef struct _odp_ml_model_extra_param_t odp_ml_model_extra_param_t;
+
+#define ODP_ML_MODEL_INVALID  _odp_cast_scalar(odp_ml_model_t, 0)
+#define ODP_ML_COMPL_INVALID  _odp_cast_scalar(odp_ml_compl_t, 0)
+
+#define ODP_ML_MODEL_NAME_LEN      64
+#define ODP_ML_MODEL_IO_NAME_LEN   64
+#define ODP_ML_SHAPE_NAME_LEN      16
+#define ODP_ML_EXTRA_STAT_NAME_LEN 64
+
+/**
+ * @}
+ */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/platform/linux-generic/include-abi/odp/api/abi/packet_io_types.h b/platform/linux-generic/include-abi/odp/api/abi/packet_io_types.h
index 87e297f1d..76b162020 100644
--- a/platform/linux-generic/include-abi/odp/api/abi/packet_io_types.h
+++ b/platform/linux-generic/include-abi/odp/api/abi/packet_io_types.h
@@ -21,7 +21,7 @@ extern "C" {
 #include <odp/api/std_types.h>
 #include <odp/api/plat/strong_types.h>
 
-/** @ingroup odp_packet_io
+/** @addtogroup odp_packet_io
  *  @{
  */
 
diff --git a/platform/linux-generic/include-abi/odp/api/abi/packet_types.h b/platform/linux-generic/include-abi/odp/api/abi/packet_types.h
index 4da9332ba..90b2af107 100644
--- a/platform/linux-generic/include-abi/odp/api/abi/packet_types.h
+++ b/platform/linux-generic/include-abi/odp/api/abi/packet_types.h
@@ -21,7 +21,7 @@ extern "C" {
 #include <odp/api/std_types.h>
 #include <odp/api/plat/strong_types.h>
 
-/** @ingroup odp_packet
+/** @addtogroup odp_packet
  *  @{
  */
 
diff --git a/platform/linux-generic/include-abi/odp/api/abi/pool_types.h b/platform/linux-generic/include-abi/odp/api/abi/pool_types.h
index 0c0dbc97f..77b0ff638 100644
--- a/platform/linux-generic/include-abi/odp/api/abi/pool_types.h
+++ b/platform/linux-generic/include-abi/odp/api/abi/pool_types.h
@@ -19,7 +19,7 @@ extern "C" {
 
 #include <odp/api/plat/strong_types.h>
 
-/** @ingroup odp_pool
+/** @addtogroup odp_pool
  *  @{
  */
 
diff --git a/platform/linux-generic/include-abi/odp/api/abi/proto_stats_types.h b/platform/linux-generic/include-abi/odp/api/abi/proto_stats_types.h
index 2ebddce62..d9db29188 100644
--- a/platform/linux-generic/include-abi/odp/api/abi/proto_stats_types.h
+++ b/platform/linux-generic/include-abi/odp/api/abi/proto_stats_types.h
@@ -21,7 +21,7 @@ extern "C" {
 #include <odp/api/std_types.h>
 #include <odp/api/plat/strong_types.h>
 
-/** @ingroup odp_proto_stats
+/** @addtogroup odp_proto_stats
  *  @{
  */
 
diff --git a/platform/linux-generic/include-abi/odp/api/abi/queue_types.h b/platform/linux-generic/include-abi/odp/api/abi/queue_types.h
index 1a56c7682..4eff762bd 100644
--- a/platform/linux-generic/include-abi/odp/api/abi/queue_types.h
+++ b/platform/linux-generic/include-abi/odp/api/abi/queue_types.h
@@ -21,7 +21,7 @@ extern "C" {
 #include <odp/api/std_types.h>
 #include <odp/api/plat/strong_types.h>
 
-/** @ingroup odp_queue
+/** @addtogroup odp_queue
  *  @{
  */
 
diff --git a/platform/linux-generic/include-abi/odp/api/abi/shared_memory.h b/platform/linux-generic/include-abi/odp/api/abi/shared_memory.h
index 551d49e30..bfcb9ebe5 100644
--- a/platform/linux-generic/include-abi/odp/api/abi/shared_memory.h
+++ b/platform/linux-generic/include-abi/odp/api/abi/shared_memory.h
@@ -21,7 +21,7 @@ extern "C" {
 #include <odp/api/std_types.h>
 #include <odp/api/plat/strong_types.h>
 
-/** @ingroup odp_shared_memory
+/** @addtogroup odp_shared_memory
  *  @{
  */
 
diff --git a/platform/linux-generic/include-abi/odp/api/abi/stash_types.h b/platform/linux-generic/include-abi/odp/api/abi/stash_types.h
index 960f3ef17..2a4115886 100644
--- a/platform/linux-generic/include-abi/odp/api/abi/stash_types.h
+++ b/platform/linux-generic/include-abi/odp/api/abi/stash_types.h
@@ -17,7 +17,7 @@ extern "C" {
 
 #include <odp/api/plat/strong_types.h>
 
-/** @ingroup odp_stash
+/** @addtogroup odp_stash
  *  @{
  */
 
diff --git a/platform/linux-generic/include-abi/odp/api/abi/sync.h b/platform/linux-generic/include-abi/odp/api/abi/sync.h
index 236e92c8c..276514b58 100644
--- a/platform/linux-generic/include-abi/odp/api/abi/sync.h
+++ b/platform/linux-generic/include-abi/odp/api/abi/sync.h
@@ -17,7 +17,7 @@
 extern "C" {
 #endif
 
-/** @ingroup odp_barrier
+/** @addtogroup odp_barrier
  *  @{
  */
 
diff --git a/platform/linux-generic/include-abi/odp/api/abi/ticketlock.h b/platform/linux-generic/include-abi/odp/api/abi/ticketlock.h
index 6543a1cf7..b621bea7e 100644
--- a/platform/linux-generic/include-abi/odp/api/abi/ticketlock.h
+++ b/platform/linux-generic/include-abi/odp/api/abi/ticketlock.h
@@ -19,7 +19,7 @@ extern "C" {
 
 #include <odp/api/atomic.h>
 
-/** @ingroup odp_locks
+/** @addtogroup odp_locks
  *  @{
  */
 
diff --git a/platform/linux-generic/include/odp/api/plat/event_inline_types.h b/platform/linux-generic/include/odp/api/plat/event_inline_types.h
index caa075871..cbf01588f 100644
--- a/platform/linux-generic/include/odp/api/plat/event_inline_types.h
+++ b/platform/linux-generic/include/odp/api/plat/event_inline_types.h
@@ -28,6 +28,7 @@ extern "C" {
 typedef struct _odp_event_inline_offset_t {
 	uint16_t event_type;
 	uint16_t base_data;
+	uint16_t subtype;
 	uint16_t flow_id;
 	uint16_t pool;
 
diff --git a/platform/linux-generic/include/odp/api/plat/event_inlines.h b/platform/linux-generic/include/odp/api/plat/event_inlines.h
index b68ced244..990575166 100644
--- a/platform/linux-generic/include/odp/api/plat/event_inlines.h
+++ b/platform/linux-generic/include/odp/api/plat/event_inlines.h
@@ -49,6 +49,15 @@ static inline odp_event_type_t __odp_event_type_get(odp_event_t event)
 	return (odp_event_type_t)type;
 }
 
+static inline odp_event_subtype_t __odp_event_subtype_get(odp_event_t event)
+{
+	int8_t type;
+
+	type = _odp_event_hdr_field(event, int8_t, subtype);
+
+	return (odp_event_subtype_t)type;
+}
+
 _ODP_INLINE odp_event_type_t odp_event_type(odp_event_t event)
 {
 	return __odp_event_type_get(event);
@@ -90,6 +99,7 @@ _ODP_INLINE void *odp_event_user_area(odp_event_t event)
 
 	switch (type) {
 	case ODP_EVENT_BUFFER:
+	case ODP_EVENT_ML_COMPL:
 	case ODP_EVENT_DMA_COMPL:
 		return _odp_buffer_get((odp_buffer_t)event, void *, uarea_addr);
 	case ODP_EVENT_PACKET:
@@ -112,6 +122,7 @@ _ODP_INLINE void *odp_event_user_area_and_flag(odp_event_t event, int *flag)
 	switch (type) {
 	case ODP_EVENT_BUFFER:
 	case ODP_EVENT_DMA_COMPL:
+	case ODP_EVENT_ML_COMPL:
 		*flag = -1;
 		return _odp_buffer_get((odp_buffer_t)event, void *, uarea_addr);
 	case ODP_EVENT_PACKET:
@@ -145,10 +156,7 @@ _ODP_INLINE void *odp_event_user_area_and_flag(odp_event_t event, int *flag)
 
 _ODP_INLINE odp_event_subtype_t odp_event_subtype(odp_event_t event)
 {
-	if (__odp_event_type_get(event) != ODP_EVENT_PACKET)
-		return ODP_EVENT_NO_SUBTYPE;
-
-	return (odp_event_subtype_t)_odp_pkt_get((odp_packet_t)event, int8_t, subtype);
+	return __odp_event_subtype_get(event);
 }
 
 _ODP_INLINE odp_event_type_t odp_event_types(odp_event_t event,
@@ -156,9 +164,7 @@ _ODP_INLINE odp_event_type_t odp_event_types(odp_event_t event,
 {
 	odp_event_type_t event_type = __odp_event_type_get(event);
 
-	*subtype = event_type == ODP_EVENT_PACKET ?
-			(odp_event_subtype_t)_odp_pkt_get((odp_packet_t)event, int8_t, subtype) :
-			ODP_EVENT_NO_SUBTYPE;
+	*subtype = __odp_event_subtype_get(event);
 
 	return event_type;
 }
@@ -172,11 +178,8 @@ _ODP_INLINE void odp_event_types_multi(const odp_event_t event[], odp_event_type
 	if (subtype == NULL)
 		return;
 
-	for (int i = 0; i < num; i++) {
-		subtype[i] = (type[i] == ODP_EVENT_PACKET) ?
-				(odp_event_subtype_t)_odp_pkt_get((odp_packet_t)event[i], int8_t,
-								  subtype) : ODP_EVENT_NO_SUBTYPE;
-	}
+	for (int i = 0; i < num; i++)
+		subtype[i] = __odp_event_subtype_get(event[i]);
 }
 
 _ODP_INLINE uint32_t odp_event_flow_id(odp_event_t event)
diff --git a/platform/linux-generic/include/odp/api/plat/packet_inline_types.h b/platform/linux-generic/include/odp/api/plat/packet_inline_types.h
index eb20ca7d7..691965624 100644
--- a/platform/linux-generic/include/odp/api/plat/packet_inline_types.h
+++ b/platform/linux-generic/include/odp/api/plat/packet_inline_types.h
@@ -50,7 +50,6 @@ typedef struct _odp_packet_inline_offset_t {
 	uint16_t timestamp;
 	uint16_t input_flags;
 	uint16_t flags;
-	uint16_t subtype;
 	uint16_t cls_mark;
 	uint16_t ipsec_ctx;
 	uint16_t crypto_op;
diff --git a/platform/linux-generic/include/odp/api/plat/packet_inlines.h b/platform/linux-generic/include/odp/api/plat/packet_inlines.h
index 960dbc5fc..2dd74fa29 100644
--- a/platform/linux-generic/include/odp/api/plat/packet_inlines.h
+++ b/platform/linux-generic/include/odp/api/plat/packet_inlines.h
@@ -24,6 +24,7 @@
 #include <odp/api/plat/packet_io_inlines.h>
 #include <odp/api/plat/packet_inline_types.h>
 #include <odp/api/plat/pool_inline_types.h>
+#include <odp/api/plat/event_inline_types.h>
 
 #include <stdint.h>
 #include <string.h>
@@ -571,7 +572,8 @@ _ODP_INLINE void odp_packet_to_event_multi(const odp_packet_t pkt[],
 
 _ODP_INLINE odp_event_subtype_t odp_packet_subtype(odp_packet_t pkt)
 {
-	return (odp_event_subtype_t)_odp_pkt_get(pkt, int8_t, subtype);
+	return (odp_event_subtype_t)_odp_event_hdr_field((odp_event_t)(uintptr_t)pkt,
+							 int8_t, subtype);
 }
 
 _ODP_INLINE odp_packet_tx_compl_t odp_packet_tx_compl_from_event(odp_event_t ev)
diff --git a/platform/linux-generic/include/odp_bitset.h b/platform/linux-generic/include/odp_bitset.h
index 0931fb337..e55b9ef1a 100644
--- a/platform/linux-generic/include/odp_bitset.h
+++ b/platform/linux-generic/include/odp_bitset.h
@@ -32,12 +32,6 @@ static inline uint32_t bitset_ffs(bitset_t b)
 	return __builtin_ffsl(b);
 }
 
-/* Load-exclusive with memory ordering */
-static inline bitset_t bitset_monitor(bitset_t *bs, int mo)
-{
-	return monitor32(bs, mo);
-}
-
 #elif ATOM_BITSET_SIZE <= 64
 
 /* Return first-bit-set with StdC ffs() semantics */
@@ -46,12 +40,6 @@ static inline uint32_t bitset_ffs(bitset_t b)
 	return __builtin_ffsll(b);
 }
 
-/* Load-exclusive with memory ordering */
-static inline bitset_t bitset_monitor(bitset_t *bs, int mo)
-{
-	return monitor64(bs, mo);
-}
-
 #elif ATOM_BITSET_SIZE <= 128
 
 /* Return first-bit-set with StdC ffs() semantics */
@@ -65,12 +53,6 @@ static inline uint32_t bitset_ffs(bitset_t b)
 		return 0;
 }
 
-/* Load-exclusive with memory ordering */
-static inline bitset_t bitset_monitor(bitset_t *bs, int mo)
-{
-	return monitor128(bs, mo);
-}
-
 #else
 #error Unsupported size of bit sets (ATOM_BITSET_SIZE)
 #endif
diff --git a/platform/linux-generic/include/odp_buffer_internal.h b/platform/linux-generic/include/odp_buffer_internal.h
index 1cececb99..676b9f116 100644
--- a/platform/linux-generic/include/odp_buffer_internal.h
+++ b/platform/linux-generic/include/odp_buffer_internal.h
@@ -53,6 +53,13 @@ static inline odp_buffer_hdr_t *_odp_buf_hdr(odp_buffer_t buf)
 	return (odp_buffer_hdr_t *)(uintptr_t)buf;
 }
 
+static inline void _odp_buffer_subtype_set(odp_buffer_t buffer, int subtype)
+{
+	odp_buffer_hdr_t *buf_hdr = _odp_buf_hdr(buffer);
+
+	buf_hdr->event_hdr.subtype = subtype;
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/platform/linux-generic/include/odp_config_internal.h b/platform/linux-generic/include/odp_config_internal.h
index 8fd8c4be7..89d89936c 100644
--- a/platform/linux-generic/include/odp_config_internal.h
+++ b/platform/linux-generic/include/odp_config_internal.h
@@ -199,6 +199,15 @@ extern "C" {
 /* Enable timer scan performance benchmark. This works with inline enabled. */
 #define CONFIG_TIMER_PROFILE_INLINE 0
 
+/* Maximum number of ML models that can be created or loaded. */
+#define CONFIG_ML_MAX_MODELS 4
+
+/* Maximum number of inputs for a ML model. */
+#define CONFIG_ML_MAX_INPUTS 4
+
+/* Maximum number of outputs for a ML model. */
+#define CONFIG_ML_MAX_OUTPUTS 4
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/platform/linux-generic/include/odp_event_internal.h b/platform/linux-generic/include/odp_event_internal.h
index d9957e530..1b85d64fc 100644
--- a/platform/linux-generic/include/odp_event_internal.h
+++ b/platform/linux-generic/include/odp_event_internal.h
@@ -65,6 +65,9 @@ typedef struct _odp_event_hdr_t {
 	/* Event type. Maybe different than pool type (crypto compl event) */
 	int8_t    event_type;
 
+	/* Event subtype */
+	int8_t    subtype;
+
 	/* Event flow id */
 	uint8_t   flow_id;
 
diff --git a/platform/linux-generic/include/odp_global_data.h b/platform/linux-generic/include/odp_global_data.h
index f883cefd9..2a87192df 100644
--- a/platform/linux-generic/include/odp_global_data.h
+++ b/platform/linux-generic/include/odp_global_data.h
@@ -21,7 +21,6 @@ extern "C" {
 #include <odp_config_internal.h>
 
 #include <libconfig.h>
-#include <pthread.h>
 #include <stdint.h>
 #include <sys/types.h>
 
@@ -81,6 +80,7 @@ typedef struct odp_global_data_ro_t {
 		uint8_t ipsec;
 		uint8_t stash;
 		uint8_t traffic_mngr;
+		uint8_t ml;
 
 	} disable;
 
diff --git a/platform/linux-generic/include/odp_init_internal.h b/platform/linux-generic/include/odp_init_internal.h
index 24e8346ad..ca5d68c87 100644
--- a/platform/linux-generic/include/odp_init_internal.h
+++ b/platform/linux-generic/include/odp_init_internal.h
@@ -105,6 +105,9 @@ int _odp_stash_term_global(void);
 int _odp_dma_init_global(void);
 int _odp_dma_term_global(void);
 
+int _odp_ml_init_global(void);
+int _odp_ml_term_global(void);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/platform/linux-generic/include/odp_ipsec_internal.h b/platform/linux-generic/include/odp_ipsec_internal.h
index 571796691..b97aa7031 100644
--- a/platform/linux-generic/include/odp_ipsec_internal.h
+++ b/platform/linux-generic/include/odp_ipsec_internal.h
@@ -30,7 +30,7 @@ extern "C" {
 #include <protocols/ip.h>
 #include <stdint.h>
 
-/** @ingroup odp_ipsec
+/** @addtogroup odp_ipsec
  *  @{
  */
 
diff --git a/platform/linux-generic/include/odp_macros_internal.h b/platform/linux-generic/include/odp_macros_internal.h
index abf017aec..047e550f9 100644
--- a/platform/linux-generic/include/odp_macros_internal.h
+++ b/platform/linux-generic/include/odp_macros_internal.h
@@ -1,5 +1,5 @@
 /* Copyright (c) 2014-2018, Linaro Limited
- * Copyright (c) 2022, Nokia
+ * Copyright (c) 2022-2024, Nokia
  * All rights reserved.
  *
  * SPDX-License-Identifier:     BSD-3-Clause
@@ -26,19 +26,35 @@ extern "C" {
 
 #define _ODP_MIN(a, b)				\
 	__extension__ ({			\
-		__typeof__(a) tmp_a = (a);	\
-		__typeof__(b) tmp_b = (b);	\
-		tmp_a < tmp_b ? tmp_a : tmp_b;	\
+		__typeof__(a) min_a = (a);	\
+		__typeof__(b) min_b = (b);	\
+		min_a < min_b ? min_a : min_b;	\
 	})
 
 #define _ODP_MAX(a, b)				\
 	__extension__ ({			\
-		__typeof__(a) tmp_a = (a);	\
-		__typeof__(b) tmp_b = (b);	\
-		tmp_a > tmp_b ? tmp_a : tmp_b;	\
+		__typeof__(a) max_a = (a);	\
+		__typeof__(b) max_b = (b);	\
+		max_a > max_b ? max_a : max_b;	\
 	})
 
-#define _ODP_MAX3(a, b, c) (_ODP_MAX(_ODP_MAX((a), (b)), (c)))
+#define _ODP_MIN3(a, b, c)		\
+__extension__ ({			\
+	__typeof__(a) min3_a = (a);	\
+	__typeof__(b) min3_b = (b);	\
+	__typeof__(c) min3_c = (c);	\
+	(min3_a < min3_b ? (min3_a < min3_c ? min3_a : min3_c) : \
+	(min3_b < min3_c ? min3_b : min3_c)); \
+})
+
+#define _ODP_MAX3(a, b, c)		\
+__extension__ ({			\
+	__typeof__(a) max3_a = (a);	\
+	__typeof__(b) max3_b = (b);	\
+	__typeof__(c) max3_c = (c);	\
+	(max3_a > max3_b ? (max3_a > max3_c ? max3_a : max3_c) : \
+	(max3_b > max3_c ? max3_b : max3_c)); \
+})
 
 /* Macros to calculate ODP_ROUNDUP_POWER2_U32() in five rounds of shift
  * and OR operations. */
diff --git a/platform/linux-generic/include/odp_ml_fp16.h b/platform/linux-generic/include/odp_ml_fp16.h
new file mode 100644
index 000000000..476028cb4
--- /dev/null
+++ b/platform/linux-generic/include/odp_ml_fp16.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2023 Nokia
+ */
+
+#ifndef ODP_ML_FP16_H_
+#define ODP_ML_FP16_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+uint16_t _odp_float32_to_float16(float x);
+float _odp_float16_to_float32(uint16_t f16);
+uint16_t _odp_float32_to_bfloat16(float x);
+float _odp_bfloat16_to_float32(uint16_t f16);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* ODP_ML_FP16_H_ */
diff --git a/platform/linux-generic/include/odp_packet_internal.h b/platform/linux-generic/include/odp_packet_internal.h
index 41a44b83c..0b03aa211 100644
--- a/platform/linux-generic/include/odp_packet_internal.h
+++ b/platform/linux-generic/include/odp_packet_internal.h
@@ -107,8 +107,8 @@ typedef struct ODP_ALIGNED_CACHE odp_packet_hdr_t {
 
 	uint16_t tailroom;
 
-	/* Event subtype */
-	int8_t   subtype;
+	/* Classifier handle index */
+	uint16_t cos;
 
 	/* Used as classifier destination queue, in IPsec inline input processing and as Tx
 	 * completion event queue. */
@@ -134,9 +134,6 @@ typedef struct ODP_ALIGNED_CACHE odp_packet_hdr_t {
 	/* Classifier mark */
 	uint16_t cls_mark;
 
-	/* Classifier handle index */
-	uint16_t cos;
-
 	/* Offset to payload start */
 	uint16_t payload_offset;
 
@@ -214,9 +211,11 @@ static inline odp_packet_hdr_t *packet_last_seg(odp_packet_hdr_t *hdr)
 	return hdr;
 }
 
-static inline void packet_subtype_set(odp_packet_t pkt, int ev)
+static inline void packet_subtype_set(odp_packet_t pkt, int subtype)
 {
-	packet_hdr(pkt)->subtype = ev;
+	odp_packet_hdr_t *pkt_hdr = packet_hdr(pkt);
+
+	pkt_hdr->event_hdr.subtype = subtype;
 }
 
 /**
@@ -258,8 +257,8 @@ static inline void packet_init(odp_packet_hdr_t *pkt_hdr, uint32_t len)
 	pkt_hdr->headroom  = pool->headroom;
 	pkt_hdr->tailroom  = pool->seg_len - seg_len + pool->tailroom;
 
-	if (odp_unlikely(pkt_hdr->subtype != ODP_EVENT_PACKET_BASIC))
-		pkt_hdr->subtype = ODP_EVENT_PACKET_BASIC;
+	if (odp_unlikely(pkt_hdr->event_hdr.subtype != ODP_EVENT_PACKET_BASIC))
+		pkt_hdr->event_hdr.subtype = ODP_EVENT_PACKET_BASIC;
 
 	pkt_hdr->input = ODP_PKTIO_INVALID;
 }
@@ -304,7 +303,7 @@ static inline void _odp_packet_copy_md(odp_packet_hdr_t *dst_hdr,
 				       odp_packet_hdr_t *src_hdr,
 				       odp_bool_t uarea_copy)
 {
-	int8_t subtype = src_hdr->subtype;
+	int8_t subtype = src_hdr->event_hdr.subtype;
 
 	/* Lengths and segmentation data are not copied:
 	 *   .frame_len
@@ -316,7 +315,7 @@ static inline void _odp_packet_copy_md(odp_packet_hdr_t *dst_hdr,
 	 *   .seg_count
 	 */
 	dst_hdr->input = src_hdr->input;
-	dst_hdr->subtype = subtype;
+	dst_hdr->event_hdr.subtype = subtype;
 	dst_hdr->dst_queue = src_hdr->dst_queue;
 	dst_hdr->cos = src_hdr->cos;
 	dst_hdr->cls_mark = src_hdr->cls_mark;
diff --git a/platform/linux-generic/include/odp_timer_internal.h b/platform/linux-generic/include/odp_timer_internal.h
index 01ee4a0f3..38192d917 100644
--- a/platform/linux-generic/include/odp_timer_internal.h
+++ b/platform/linux-generic/include/odp_timer_internal.h
@@ -22,6 +22,12 @@
 #include <odp_global_data.h>
 #include <odp_pool_internal.h>
 
+/*
+ * Use as the argument to timer_run() to force a scan and to ignore rate
+ * limit.
+ */
+#define TIMER_SCAN_FORCE INT32_MAX
+
 /**
  * Internal Timeout header
  */
@@ -48,13 +54,15 @@ ODP_STATIC_ASSERT(sizeof(odp_timeout_hdr_t) <= ODP_CACHE_LINE_SIZE,
 
 /* A larger decrement value should be used after receiving events compared to
  * an 'empty' call. */
-void _odp_timer_run_inline(int dec);
+uint64_t _odp_timer_run_inline(int dec);
 
 /* Static inline wrapper to minimize modification of schedulers. */
-static inline void timer_run(int dec)
+static inline uint64_t timer_run(int dec)
 {
 	if (odp_global_rw->inline_timers)
-		_odp_timer_run_inline(dec);
+		return _odp_timer_run_inline(dec);
+
+	return UINT64_MAX;
 }
 
 #endif
diff --git a/platform/linux-generic/libodp-linux.pc.in b/platform/linux-generic/libodp-linux.pc.in
index 05ba5b9d6..62589c1a3 100644
--- a/platform/linux-generic/libodp-linux.pc.in
+++ b/platform/linux-generic/libodp-linux.pc.in
@@ -8,5 +8,5 @@ Description: The ODP packet processing engine
 Version: @PKGCONFIG_VERSION@
 Requires.private: libconfig@AARCH64CRYPTO_PKG@
 Libs: -L${libdir} -l@ODP_LIB_NAME@ @ATOMIC_LIBS_NON_ABI_COMPAT@
-Libs.private: @OPENSSL_STATIC_LIBS@ @DPDK_LIBS@ @PCAP_LIBS@ @PTHREAD_LIBS@ @TIMER_LIBS@ @LIBXDP_LIBS@ -lpthread @ATOMIC_LIBS_ABI_COMPAT@ @IPSEC_MB_LIBS@
+Libs.private: @OPENSSL_STATIC_LIBS@ @DPDK_LIBS@ @PCAP_LIBS@ @PTHREAD_LIBS@ @TIMER_LIBS@ @LIBXDP_LIBS@ -lpthread @ATOMIC_LIBS_ABI_COMPAT@ @IPSEC_MB_LIBS@ @ORT_LIBS@
 Cflags: -I${includedir}
diff --git a/platform/linux-generic/m4/configure.m4 b/platform/linux-generic/m4/configure.m4
index 61b65540f..3306849d2 100644
--- a/platform/linux-generic/m4/configure.m4
+++ b/platform/linux-generic/m4/configure.m4
@@ -31,10 +31,11 @@ m4_include([platform/linux-generic/m4/odp_pcapng.m4])
 m4_include([platform/linux-generic/m4/odp_dpdk.m4])
 m4_include([platform/linux-generic/m4/odp_wfe.m4])
 m4_include([platform/linux-generic/m4/odp_xdp.m4])
+m4_include([platform/linux-generic/m4/odp_ml.m4])
 ODP_EVENT_VALIDATION
 ODP_SCHEDULER
 
-AS_VAR_APPEND([PLAT_DEP_LIBS], ["${ATOMIC_LIBS} ${AARCH64CRYPTO_LIBS} ${LIBCONFIG_LIBS} ${OPENSSL_LIBS} ${IPSEC_MB_LIBS} ${DPDK_LIBS_LT} ${LIBCLI_LIBS} ${LIBXDP_LIBS}"])
+AS_VAR_APPEND([PLAT_DEP_LIBS], ["${ATOMIC_LIBS} ${AARCH64CRYPTO_LIBS} ${LIBCONFIG_LIBS} ${OPENSSL_LIBS} ${IPSEC_MB_LIBS} ${DPDK_LIBS_LT} ${LIBCLI_LIBS} ${LIBXDP_LIBS} ${ORT_LIBS}"])
 
 # Add text to the end of configure with platform specific settings.
 # Make sure it's aligned same as other lines in configure.ac.
@@ -46,6 +47,7 @@ AS_VAR_APPEND([PLAT_CFG_TEXT], ["
 	pcap:                   ${have_pcap}
 	pcapng:                 ${have_pcapng}
 	wfe_locks:              ${use_wfe_locks}
+	ml_support:             ${ml_support}
 	default_config_path:    ${default_config_path}"])
 
 # Ignore Clang specific errors about fields with variable sized type not at the
@@ -59,6 +61,8 @@ AM_CONDITIONAL([PLATFORM_IS_LINUX_GENERIC],
 AC_CONFIG_FILES([platform/linux-generic/Makefile
 		 platform/linux-generic/libodp-linux.pc
 		 platform/linux-generic/dumpconfig/Makefile
+		 platform/linux-generic/example/Makefile
+		 platform/linux-generic/example/ml/Makefile
 		 platform/linux-generic/test/Makefile
 		 platform/linux-generic/test/example/Makefile
 		 platform/linux-generic/test/example/classifier/Makefile
@@ -73,6 +77,7 @@ AC_CONFIG_FILES([platform/linux-generic/Makefile
 		 platform/linux-generic/test/example/switch/Makefile
 		 platform/linux-generic/test/validation/api/shmem/Makefile
 		 platform/linux-generic/test/validation/api/pktio/Makefile
+		 platform/linux-generic/test/validation/api/ml/Makefile
 		 platform/linux-generic/test/performance/Makefile
 		 platform/linux-generic/test/performance/dmafwd/Makefile
 		 platform/linux-generic/test/pktio_ipc/Makefile])
diff --git a/platform/linux-generic/m4/odp_libconfig.m4 b/platform/linux-generic/m4/odp_libconfig.m4
index a6d19f661..77095e0fe 100644
--- a/platform/linux-generic/m4/odp_libconfig.m4
+++ b/platform/linux-generic/m4/odp_libconfig.m4
@@ -3,7 +3,7 @@
 ##########################################################################
 m4_define([_odp_config_version_generation], [0])
 m4_define([_odp_config_version_major], [1])
-m4_define([_odp_config_version_minor], [27])
+m4_define([_odp_config_version_minor], [28])
 
 m4_define([_odp_config_version],
           [_odp_config_version_generation._odp_config_version_major._odp_config_version_minor])
diff --git a/platform/linux-generic/m4/odp_ml.m4 b/platform/linux-generic/m4/odp_ml.m4
new file mode 100644
index 000000000..a7b9a4fd6
--- /dev/null
+++ b/platform/linux-generic/m4/odp_ml.m4
@@ -0,0 +1,46 @@
+##########################################################################
+# Onnxruntime library path and name
+##########################################################################
+# Optional configure parameter for a non-standard install prefix of onnxruntime
+AC_ARG_WITH([ort-path],
+        [AS_HELP_STRING([--with-ort-path=DIR],
+		[path to onnxruntime libs and headers [default=system]])],
+        [ort_path_given=yes
+                ORT_CPPFLAGS="-I$withval/include"
+                ORT_LIBS="-L$withval/lib"
+                ORT_RPATH="-R$withval/lib"],
+        [])
+
+##########################################################################
+# Save and set temporary compilation flags
+##########################################################################
+OLD_CPPFLAGS=$CPPFLAGS
+OLD_LIBS=$LIBS
+CPPFLAGS="$ORT_CPPFLAGS $CPPFLAGS"
+LIBS="$ORT_LIBS $LIBS"
+
+#########################################################################
+# If ort is available, enable ML API
+#########################################################################
+ml_support=no
+AC_CHECK_HEADERS([onnxruntime_c_api.h],
+        [AC_CHECK_LIB(onnxruntime, OrtGetApiBase, [ml_support=yes], [], [])],
+        [AS_IF([test "x$ort_path_given" = "xyes"],
+                [AC_MSG_ERROR([ort not found at the specified path (--with-ort-path)])])])
+
+AS_IF([test "x$ml_support" != "xno"],
+    [ORT_LIBS="$ORT_RPATH $ORT_LIBS -lonnxruntime -lm"],
+    [ORT_CPPFLAGS="" ORT_LIBS="-lm"])
+
+AC_CONFIG_COMMANDS_PRE([dnl
+AM_CONDITIONAL([WITH_ML], [test x$ml_support = xyes ])
+])
+
+##########################################################################
+# Restore old saved variables
+##########################################################################
+LIBS=$OLD_LIBS
+CPPFLAGS=$OLD_CPPFLAGS
+
+AC_SUBST([ORT_CPPFLAGS])
+AC_SUBST([ORT_LIBS])
diff --git a/platform/linux-generic/odp_classification.c b/platform/linux-generic/odp_classification.c
index 0e6eea3ae..016a8f0c5 100644
--- a/platform/linux-generic/odp_classification.c
+++ b/platform/linux-generic/odp_classification.c
@@ -299,10 +299,11 @@ odp_cos_t odp_cls_cos_create(const char *name, const odp_cls_cos_param_t *param_
 							   param.hash_proto);
 				tbl_index = i * CLS_COS_QUEUE_MAX;
 				for (j = 0; j < param.num_queue; j++) {
-					char name[ODP_QUEUE_NAME_LEN];
+					char hq_name[ODP_QUEUE_NAME_LEN];
 
-					snprintf(name, sizeof(name), "_odp_cos_hq_%u_%u", i, j);
-					queue = odp_queue_create(name, &cos->queue_param);
+					snprintf(hq_name, sizeof(hq_name), "_odp_cos_hq_%u_%u",
+						 i, j);
+					queue = odp_queue_create(hq_name, &cos->queue_param);
 					if (queue == ODP_QUEUE_INVALID) {
 						/* unwind the queues */
 						_cls_queue_unwind(tbl_index, j);
diff --git a/platform/linux-generic/odp_cpumask.c b/platform/linux-generic/odp_cpumask.c
index d0a9953f7..7d7575f51 100644
--- a/platform/linux-generic/odp_cpumask.c
+++ b/platform/linux-generic/odp_cpumask.c
@@ -7,7 +7,6 @@
 #include <odp_posix_extensions.h>
 
 #include <sched.h>
-#include <pthread.h>
 
 #include <odp/api/cpumask.h>
 #include <odp/api/init.h>
diff --git a/platform/linux-generic/odp_cpumask_task.c b/platform/linux-generic/odp_cpumask_task.c
index 0807e231e..a579b2e7e 100644
--- a/platform/linux-generic/odp_cpumask_task.c
+++ b/platform/linux-generic/odp_cpumask_task.c
@@ -12,7 +12,6 @@
 #include <odp_debug_internal.h>
 #include <odp_global_data.h>
 
-#include <pthread.h>
 #include <sched.h>
 
 int odp_cpumask_default_worker(odp_cpumask_t *mask, int max_num)
diff --git a/platform/linux-generic/odp_event.c b/platform/linux-generic/odp_event.c
index e15cb1c50..f3644f02b 100644
--- a/platform/linux-generic/odp_event.c
+++ b/platform/linux-generic/odp_event.c
@@ -12,6 +12,7 @@
 #include <odp/api/packet.h>
 #include <odp/api/timer.h>
 #include <odp/api/pool.h>
+#include <odp/api/ml.h>
 
 #include <odp_buffer_internal.h>
 #include <odp_ipsec_internal.h>
@@ -36,6 +37,7 @@ const _odp_event_inline_offset_t
 _odp_event_inline_offset ODP_ALIGNED_CACHE = {
 	.event_type = offsetof(_odp_event_hdr_t, event_type),
 	.base_data  = offsetof(_odp_event_hdr_t, base_data),
+	.subtype    = offsetof(_odp_event_hdr_t, subtype),
 	.flow_id    = offsetof(_odp_event_hdr_t, flow_id),
 	.pool       = offsetof(_odp_event_hdr_t, pool),
 };
@@ -68,6 +70,9 @@ static inline void event_free(odp_event_t event, _odp_ev_id_t id)
 	case ODP_EVENT_DMA_COMPL:
 		odp_dma_compl_free(odp_dma_compl_from_event(event));
 		break;
+	case ODP_EVENT_ML_COMPL:
+		odp_ml_compl_free(odp_ml_compl_from_event(event));
+		break;
 	default:
 		_ODP_ABORT("Invalid event type: %d\n", odp_event_type(event));
 	}
@@ -116,6 +121,8 @@ int odp_event_is_valid(odp_event_t event)
 		/* Fall through */
 	case ODP_EVENT_DMA_COMPL:
 		/* Fall through */
+	case ODP_EVENT_ML_COMPL:
+		/* Fall through */
 	case ODP_EVENT_PACKET_TX_COMPL:
 		break;
 	default:
diff --git a/platform/linux-generic/odp_init.c b/platform/linux-generic/odp_init.c
index 05b693c94..795252df1 100644
--- a/platform/linux-generic/odp_init.c
+++ b/platform/linux-generic/odp_init.c
@@ -51,6 +51,7 @@ enum init_stage {
 	IPSEC_SAD_INIT,
 	IPSEC_INIT,
 	DMA_INIT,
+	ML_INIT,
 	ALL_INIT      /* All init stages completed */
 };
 
@@ -95,6 +96,7 @@ static void disable_features(odp_global_data_ro_t *global_ro,
 
 	global_ro->disable.traffic_mngr = init_param->not_used.feat.tm;
 	global_ro->disable.compress = init_param->not_used.feat.compress;
+	global_ro->disable.ml = init_param->not_used.feat.ml;
 }
 
 void odp_init_param_init(odp_init_t *param)
@@ -145,6 +147,13 @@ static int term_global(enum init_stage stage)
 
 	switch (stage) {
 	case ALL_INIT:
+	case ML_INIT:
+		if (_odp_ml_term_global()) {
+			_ODP_ERR("ODP ML term failed.\n");
+			rc = -1;
+		}
+		/* Fall through */
+
 	case DMA_INIT:
 		if (_odp_dma_term_global()) {
 			_ODP_ERR("ODP DMA term failed.\n");
@@ -509,6 +518,12 @@ int odp_init_global(odp_instance_t *instance,
 	}
 	stage = DMA_INIT;
 
+	if (_odp_ml_init_global()) {
+		_ODP_ERR("ODP ML init failed.\n");
+		goto init_failed;
+	}
+	stage = ML_INIT;
+
 	*instance = (odp_instance_t)odp_global_ro.main_pid;
 
 	return 0;
diff --git a/platform/linux-generic/odp_ipsec.c b/platform/linux-generic/odp_ipsec.c
index 8c97a0f55..ee402b935 100644
--- a/platform/linux-generic/odp_ipsec.c
+++ b/platform/linux-generic/odp_ipsec.c
@@ -2180,7 +2180,7 @@ finish:
 int odp_ipsec_in(const odp_packet_t pkt_in[], int num_in, odp_packet_t pkt_out[], int *num_out,
 		 const odp_ipsec_in_param_t *param)
 {
-	int max_out = _ODP_MIN(_ODP_MIN(num_in, *num_out), MAX_BURST), num_crypto;
+	int max_out = _ODP_MIN3(num_in, *num_out, MAX_BURST), num_crypto;
 	odp_packet_t crypto_pkts[MAX_BURST];
 	odp_crypto_packet_op_param_t crypto_param[MAX_BURST];
 	ipsec_op_t ops[MAX_BURST], *crypto_ops[MAX_BURST];
@@ -2288,7 +2288,7 @@ finish:
 int odp_ipsec_out(const odp_packet_t pkt_in[], int num_in, odp_packet_t pkt_out[], int *num_out,
 		  const odp_ipsec_out_param_t *param)
 {
-	int max_out = _ODP_MIN(_ODP_MIN(num_in, *num_out), MAX_BURST), num_crypto;
+	int max_out = _ODP_MIN3(num_in, *num_out, MAX_BURST), num_crypto;
 	odp_packet_t crypto_pkts[MAX_BURST];
 	odp_crypto_packet_op_param_t crypto_param[MAX_BURST];
 	ipsec_op_t ops[MAX_BURST], *crypto_ops[MAX_BURST];
diff --git a/platform/linux-generic/odp_ishmpool.c b/platform/linux-generic/odp_ishmpool.c
index 9b6340d7e..89ec10695 100644
--- a/platform/linux-generic/odp_ishmpool.c
+++ b/platform/linux-generic/odp_ishmpool.c
@@ -135,8 +135,8 @@ static inline uintptr_t get_bblock_nr(pool_t *bpool, void *addr)
 static inline void remove_from_list(pool_t *bpool, uint8_t order,
 				    bblock_t *bblock)
 {
-	bblock_t *curr;       /* current bblock (when parsing list) */
-	bblock_t *prev;       /* previous bblock (when parsing list) */
+	bblock_t *curr;
+	bblock_t *prev = NULL;
 
 	curr = bpool->ctrl.free_heads[order];
 	if (!curr)
diff --git a/platform/linux-generic/odp_ml.c b/platform/linux-generic/odp_ml.c
new file mode 100644
index 000000000..6ab9e7177
--- /dev/null
+++ b/platform/linux-generic/odp_ml.c
@@ -0,0 +1,2646 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2023 Nokia
+ */
+
+#include <odp/autoheader_external.h>
+
+#include <odp/api/atomic.h>
+#include <odp/api/buffer.h>
+#include <odp/api/event.h>
+#include <odp/api/hints.h>
+#include <odp/api/ml.h>
+#include <odp/api/pool.h>
+#include <odp/api/queue.h>
+#include <odp/api/shared_memory.h>
+#include <odp/api/std_types.h>
+#include <odp/api/ticketlock.h>
+
+#include <odp/api/plat/event_inline_types.h>
+#include <odp/api/plat/strong_types.h>
+
+#include <odp_buffer_internal.h>
+#include <odp_config_internal.h>
+#include <odp_debug_internal.h>
+#include <odp_global_data.h>
+#include <odp_init_internal.h>
+#include <odp_libconfig_internal.h>
+#include <odp_macros_internal.h>
+#include <odp_pool_internal.h>
+
+#include <onnxruntime_c_api.h>
+
+#include <inttypes.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#define ML_MAX_IO_SEGS UINT32_MAX
+#define ML_MAX_COMPL_ID 32
+#define ML_MAX_CONFIG_STR_LEN 65
+#define ML_MAX_MODEL_SIZE (1024 * 1024 * 1024)
+#define ML_MAX_MODELS_CREATED CONFIG_ML_MAX_MODELS
+#define ML_MAX_MODELS_LOADED CONFIG_ML_MAX_MODELS
+
+/* Error codes */
+enum {
+	/* Feature not supported */
+	ML_FEATURE_NOT_SUPPORTED = 1,
+
+	/* Model is not created */
+	ML_NOT_CREATED,
+
+	/* Model was not loaded */
+	ML_NOT_LOADED,
+
+	/* Model has already loaded */
+	ML_LOADED,
+
+	/* Bad input */
+	ML_BAD_INPUT,
+
+	/* Fail from underlying library onnxruntime */
+	ML_LIB_FAILED,
+
+	/* Bad output */
+	ML_BAD_OUTPUT,
+
+	/* Bad handle */
+	ML_BAD_HDL
+};
+
+typedef struct ort_run_opts_t {
+	int enable_profiling;
+
+	ExecutionMode execution_mode;
+
+	int inter_op_num_threads;
+
+	int intra_op_num_threads;
+
+	GraphOptimizationLevel graph_opt_level;
+
+	char opt_model_filepath[ML_MAX_CONFIG_STR_LEN];
+} ort_run_opts_t;
+
+typedef struct ml_input_t {
+	/* Combined input start address */
+	void *addr;
+	/* Data size in bytes */
+	uint64_t size;
+} ml_input_t;
+
+/* Onnxruntime model info */
+typedef struct ml_model_t {
+	/* Guards state, which must be accessed atomically */
+	odp_ticketlock_t	lock;
+
+	enum {
+		ML_STATE_FREE = 0, /* Not allocated */
+		ML_STATE_CREATED, /* Model is created */
+		ML_STATE_LOADED, /* Model is loaded */
+		ML_STATE_INFERENCING, /* Model is inferencing */
+	} state;
+
+	OrtSession		*session;
+	OrtSessionOptions	*session_opts;
+	uint32_t		max_compl_id;
+	odp_atomic_u32_t	compl_status[ML_MAX_COMPL_ID];
+
+	odp_ml_model_info_t	info;
+	odp_ml_input_info_t	input_info[CONFIG_ML_MAX_INPUTS];
+	uint64_t		input_sizes[CONFIG_ML_MAX_INPUTS];
+	odp_ml_output_info_t	output_info[CONFIG_ML_MAX_OUTPUTS];
+	uint64_t		output_sizes[CONFIG_ML_MAX_OUTPUTS];
+
+	struct {
+		void *user_ptr;
+	} result[ML_MAX_COMPL_ID];
+} ml_model_t;
+
+typedef struct ml_global_t {
+	odp_shm_t		shm;
+
+	odp_ml_capability_t	capa;
+	odp_ml_config_t		ml_config;
+
+	odp_pool_param_t	pool_param;
+
+	const OrtApi		*ort_api;
+	OrtEnv			*env;
+	ort_run_opts_t		ort_run_opts;
+
+	ml_model_t		models[ML_MAX_MODELS_CREATED];
+
+} ml_global_t;
+
+static ml_global_t *_odp_ml_glb;
+
+static inline ml_model_t *ml_model_from_handle(odp_ml_model_t model)
+{
+	return (ml_model_t *)(uintptr_t)model;
+}
+
+int odp_ml_capability(odp_ml_capability_t *capa)
+{
+	odp_pool_capability_t pool_capa;
+
+	memset(capa, 0, sizeof(odp_ml_capability_t));
+
+	if (odp_global_ro.disable.ml) {
+		_ODP_PRINT("ML is disabled\n");
+		return 0;
+	}
+
+	capa->max_model_size = ML_MAX_MODEL_SIZE;
+	capa->max_models = ML_MAX_MODELS_CREATED;
+	capa->max_models_loaded = ML_MAX_MODELS_LOADED;
+	capa->max_compl_id = ML_MAX_COMPL_ID;
+	capa->max_inputs = CONFIG_ML_MAX_INPUTS;
+	capa->max_outputs = CONFIG_ML_MAX_OUTPUTS;
+	capa->max_segs_per_input = ML_MAX_IO_SEGS;
+	capa->max_segs_per_output = ML_MAX_IO_SEGS;
+	capa->min_input_align = 1;
+	capa->min_output_align = 1;
+
+	capa->load.compl_mode_mask = ODP_ML_COMPL_MODE_SYNC |
+				     ODP_ML_COMPL_MODE_POLL |
+				     ODP_ML_COMPL_MODE_EVENT;
+	capa->load.compl_queue_plain = 1;
+	capa->load.compl_queue_sched = 1;
+
+	capa->run.compl_mode_mask =  ODP_ML_COMPL_MODE_SYNC |
+				     ODP_ML_COMPL_MODE_POLL |
+				     ODP_ML_COMPL_MODE_EVENT;
+	capa->run.compl_queue_plain = 1;
+	capa->run.compl_queue_sched = 1;
+
+	if (odp_pool_capability(&pool_capa)) {
+		_ODP_ERR("Pool capability failed\n");
+		return -1;
+	}
+
+	capa->pool.max_pools = pool_capa.buf.max_pools;
+	capa->pool.max_num = pool_capa.buf.max_num;
+	capa->pool.max_uarea_size = pool_capa.buf.max_uarea_size;
+	capa->pool.uarea_persistence = pool_capa.buf.uarea_persistence;
+	capa->pool.max_cache_size = pool_capa.buf.max_cache_size;
+	capa->pool.min_cache_size = pool_capa.buf.min_cache_size;
+
+	return 0;
+}
+
+void odp_ml_config_init(odp_ml_config_t *config)
+{
+	memset(config, 0, sizeof(odp_ml_config_t));
+	config->max_models_created = 1;
+	config->max_models_loaded = 1;
+}
+
+int odp_ml_config(const odp_ml_config_t *config)
+{
+	if (!config) {
+		_ODP_ERR("Error: config must not be NULL\n");
+		return -1;
+	}
+
+	if (config->max_model_size == 0 || config->max_models_created == 0 ||
+	    config->max_models_loaded == 0) {
+		_ODP_ERR("Error: max_model_size, max_models_created and max_models_loaded"
+			 " must be bigger than 0\n");
+		return -1;
+	}
+
+	if (config->max_models_loaded > config->max_models_created) {
+		_ODP_ERR("Error: max_models_loaded %d exceeds max_models_created %d\n",
+			 config->max_models_loaded, config->max_models_created);
+		return -1;
+	}
+
+	if (config->max_models_created > ML_MAX_MODELS_CREATED) {
+		_ODP_ERR("Error: max_models_created %d exceeds maximum number"
+			 " of models that can be created in this driver %d\n",
+			 config->max_models_created, ML_MAX_MODELS_CREATED);
+		return -1;
+	}
+
+	if (config->max_models_loaded > ML_MAX_MODELS_LOADED) {
+		_ODP_ERR("Error: max_models_loaded %d exceeds maximum number"
+			 " of models that can be loaded in this driver %d\n",
+			 config->max_models_loaded, ML_MAX_MODELS_LOADED);
+		return -1;
+	}
+
+	if (config->max_model_size > ML_MAX_MODEL_SIZE) {
+		_ODP_ERR("max_model_size %" PRIu64 " exceeds supported maximum model size %d\n",
+			 config->max_model_size, ML_MAX_MODEL_SIZE);
+		return -1;
+	}
+
+	_odp_ml_glb->ml_config = *config;
+	return 0;
+}
+
+void odp_ml_model_param_init(odp_ml_model_param_t *param)
+{
+	memset(param, 0, sizeof(odp_ml_model_param_t));
+}
+
+static int check_ortstatus(OrtStatus * const status)
+{
+	if (status != NULL) {
+		const char *msg = _odp_ml_glb->ort_api->GetErrorMessage(status);
+
+		_ODP_ERR("%s\n", msg);
+		_odp_ml_glb->ort_api->ReleaseStatus(status);
+		return -1;
+	}
+
+	return 0;
+}
+
+/* Get model input and output count */
+static int get_model_io_count(OrtSession *model, uint32_t *num_inputs, uint32_t *num_outputs)
+{
+	size_t num = 0;
+	OrtStatus *status = NULL;
+	const OrtApi *ort_api = _odp_ml_glb->ort_api;
+
+	status = ort_api->SessionGetInputCount(model, &num);
+	if (check_ortstatus(status)) {
+		_ODP_ERR("Get model input count failed\n");
+		return -1;
+	}
+
+	*num_inputs = num;
+	_ODP_DBG("num_inputs: %u\n", *num_inputs);
+
+	status = ort_api->SessionGetOutputCount(model, &num);
+	if (check_ortstatus(status)) {
+		_ODP_ERR("Get model output count failed\n");
+		return -1;
+	}
+
+	*num_outputs = num;
+	_ODP_DBG("num_outputs: %u\n", *num_outputs);
+
+	return 0;
+}
+
+static odp_ml_data_type_t onnx_dtype_to_odp_dtype(ONNXTensorElementDataType onnx_dtype)
+{
+	switch (onnx_dtype) {
+	case ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT:
+		return ODP_ML_DATA_TYPE_FP32;
+	case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8:
+		return ODP_ML_DATA_TYPE_UINT8;
+	case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8:
+		return ODP_ML_DATA_TYPE_INT8;
+	case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT16:
+		return ODP_ML_DATA_TYPE_UINT16;
+	case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT16:
+		return ODP_ML_DATA_TYPE_INT16;
+	case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32:
+		return ODP_ML_DATA_TYPE_INT32;
+	case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT32:
+		return ODP_ML_DATA_TYPE_UINT32;
+	case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64:
+		return ODP_ML_DATA_TYPE_INT64;
+	case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT64:
+		return ODP_ML_DATA_TYPE_UINT64;
+	case ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16:
+		return ODP_ML_DATA_TYPE_FP16;
+	case ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16:
+		return ODP_ML_DATA_TYPE_BFP16;
+	case ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE:
+		return ODP_ML_DATA_TYPE_FP64;
+	case ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL:
+		/* Fall through */
+	case ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX64:
+		/* Fall through */
+	case ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX128:
+		/* Fall through */
+	default:
+		_ODP_ERR("onnx_dtype %d not supported by odp_ml\n", onnx_dtype);
+		return ODP_ML_DATA_TYPE_NONE;
+	}
+}
+
+/* Get the size of given odp_ml_data_type_t in bytes */
+static uint32_t size_of_odp_ml_data_type(odp_ml_data_type_t data_type)
+{
+	switch (data_type) {
+	case ODP_ML_DATA_TYPE_NONE:
+		return 0;
+
+	case ODP_ML_DATA_TYPE_INT8:
+		/* Fall through */
+	case ODP_ML_DATA_TYPE_UINT8:
+		return 1;
+
+	case ODP_ML_DATA_TYPE_INT16:
+		/* Fall through */
+	case ODP_ML_DATA_TYPE_UINT16:
+		/* Fall through */
+	case ODP_ML_DATA_TYPE_FP16:
+		/* Fall through */
+	case ODP_ML_DATA_TYPE_BFP16:
+		return 2;
+
+	case ODP_ML_DATA_TYPE_INT24:
+		/* Fall through */
+	case ODP_ML_DATA_TYPE_UINT24:
+		return 3;
+
+	case ODP_ML_DATA_TYPE_INT32:
+		/* Fall through */
+	case ODP_ML_DATA_TYPE_UINT32:
+		/* Fall through */
+	case ODP_ML_DATA_TYPE_FP32:
+		return 4;
+
+	case ODP_ML_DATA_TYPE_INT64:
+		/* Fall through */
+	case ODP_ML_DATA_TYPE_UINT64:
+		/* Fall through */
+	case ODP_ML_DATA_TYPE_FP64:
+		return 8;
+
+	default:
+		return 0;
+	}
+}
+
+static int get_shape(int64_t dims[], odp_ml_shape_info_t *shape)
+{
+	uint32_t dyn_cnt = 0;
+
+	for (uint32_t i = 0; i < shape->num_dim; i++) {
+		if (dims[i] == 0) {
+			_ODP_ERR("Dimension value: %" PRId64 " must be at least 1\n", dims[i]);
+			return -1;
+		} else if (dims[i] == -1) { /* Symbolic dimension */
+			dyn_cnt++;
+			shape->dim[i] = ODP_ML_DIM_DYNAMIC;
+			shape->dim_min[i] = 0; /*unknown*/
+			shape->dim_max[i] = 0; /*unknown*/
+		} else if (dims[i] > 0 && dims[i] < UINT32_MAX) {
+			shape->dim[i] = dims[i];
+			shape->dim_min[i] = dims[i];
+			shape->dim_max[i] = dims[i];
+		} else {
+			_ODP_ERR("Dimension value: %" PRId64 " invalid\n", dims[i]);
+			return -1;
+		}
+	}
+
+	if (dyn_cnt == 0) {
+		shape->type = ODP_ML_SHAPE_STATIC;
+	} else if (dyn_cnt == 1) {
+		shape->type = ODP_ML_SHAPE_BATCH;
+	} else {
+		_ODP_ERR("Data shape type not supported by ODP\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static inline void calculate_model_io_size(const odp_ml_shape_info_t *shape, uint64_t *size)
+{
+	/* Calculate the data size in bytes of this tensor, 0 for tensors with
+	 * dynamic batch sizes */
+	for (size_t i = 0; i < shape->num_dim; i++) {
+		/* Skip dynamic dimension size */
+		if (shape->dim[i] == ODP_ML_DIM_DYNAMIC) {
+			*size = 0;
+			break;
+		}
+		(*size) *= shape->dim[i];
+	}
+}
+
+static int get_model_io_type_shape_size(OrtTypeInfo *type_info, odp_ml_shape_info_t *shape,
+					odp_ml_data_type_t *data_type, uint32_t *data_type_size,
+					uint64_t *size)
+{
+	ONNXTensorElementDataType tensor_type;
+	const OrtTensorTypeAndShapeInfo *tensor_info;
+	size_t num_dim = 0;
+	OrtStatus *status = NULL;
+	int64_t dims[ODP_ML_MAX_DIMS] = {0};
+	const OrtApi *ort_api = _odp_ml_glb->ort_api;
+
+	status = ort_api->CastTypeInfoToTensorInfo(type_info, &tensor_info);
+	if (check_ortstatus(status)) {
+		_ODP_ERR("CastTypeInfoToTensorInfo failed\n");
+		return -1;
+	}
+
+	status = ort_api->GetTensorElementType(tensor_info, &tensor_type);
+	if (check_ortstatus(status)) {
+		_ODP_ERR("GetTensorElementType failed\n");
+		return -1;
+	}
+
+	*data_type = onnx_dtype_to_odp_dtype(tensor_type);
+	if (*data_type == ODP_ML_DATA_TYPE_NONE) /* Type not supported by odp */
+		return -1;
+
+	status = ort_api->GetDimensionsCount(tensor_info, &num_dim);
+	if (check_ortstatus(status)) {
+		_ODP_ERR("GetDimensionsCount failed\n");
+		return -1;
+	}
+
+	if (num_dim > ODP_ML_MAX_DIMS) {
+		_ODP_ERR("Number of dimensions: %zu exceeds supported maximum number"
+			" of dimensions: %d\n", num_dim, ODP_ML_MAX_DIMS);
+		return -1;
+	}
+	shape->num_dim = num_dim;
+
+	status = ort_api->GetDimensions(tensor_info, dims, num_dim);
+	if (check_ortstatus(status)) {
+		_ODP_ERR("GetDimensions failed\n");
+		return -1;
+	}
+
+	if (get_shape(dims, shape))
+		return -1;
+
+	*data_type_size = size_of_odp_ml_data_type(*data_type);
+
+	*size = *data_type_size;
+	calculate_model_io_size(shape, size);
+
+	return 0;
+}
+
+/* Get model input and output info */
+static int get_model_io_info(OrtSession *session, ml_model_t *mdl,
+			     const odp_ml_model_param_t *param)
+{
+	char *name;
+	OrtTypeInfo *type_info;
+	const odp_ml_data_format_t *data_format;
+	OrtStatus *status = NULL;
+	OrtAllocator *allocator = NULL;
+	const OrtApi *ort_api = _odp_ml_glb->ort_api;
+	odp_ml_input_info_t *input_info = mdl->input_info;
+	odp_ml_output_info_t *output_info = mdl->output_info;
+
+	status = ort_api->GetAllocatorWithDefaultOptions(&allocator);
+	if (check_ortstatus(status)) {
+		_ODP_ERR("GetAllocatorWithDefaultOptions failed\n");
+		return -1;
+	}
+
+	/* Retrieve info about input array. */
+	memset(input_info, 0, sizeof(mdl->input_info));
+	for (uint32_t i = 0; i < mdl->info.num_inputs; i++) {
+		name = NULL;
+		status = ort_api->SessionGetInputName(session, i, allocator, &name);
+		if (check_ortstatus(status)) {
+			_ODP_ERR("Get %uth input name failed\n", i);
+			return -1;
+		}
+
+		strncpy(input_info[i].name, name, ODP_ML_MODEL_IO_NAME_LEN - 1);
+		input_info[i].name[ODP_ML_MODEL_IO_NAME_LEN - 1] = 0;
+
+		/* Free memory allocated by SessionGetInputName */
+		status = ort_api->AllocatorFree(allocator, name);
+		if (check_ortstatus(status)) {
+			_ODP_ERR("AllocatorFree %uth input_name failed\n", i);
+			return -1;
+		}
+
+		if (param->extra_info.num_inputs) {
+			data_format = &param->extra_info.input_format[i];
+
+			input_info[i].shape = data_format->shape;
+			input_info[i].data_type = data_format->data_type;
+			input_info[i].data_type_size = data_format->data_type_size;
+
+			mdl->input_sizes[i] = input_info[i].data_type_size;
+			calculate_model_io_size(&data_format->shape, &mdl->input_sizes[i]);
+			continue;
+		}
+
+		type_info = NULL;
+		status = ort_api->SessionGetInputTypeInfo(session, i, &type_info);
+		if (check_ortstatus(status)) {
+			_ODP_ERR("SessionGetInputTypeInfo failed\n");
+			return -1;
+		}
+
+		if (get_model_io_type_shape_size(type_info, &input_info[i].shape,
+						 &input_info[i].data_type,
+						 &input_info[i].data_type_size,
+						 &mdl->input_sizes[i])) {
+			_ODP_ERR("get_model_io_type_shape_size() for input failed\n");
+			ort_api->ReleaseTypeInfo(type_info);
+			return -1;
+		}
+
+		ort_api->ReleaseTypeInfo(type_info);
+	}
+
+	/* Retrieve info about output array. */
+	memset(output_info, 0, sizeof(mdl->output_info));
+	for (uint32_t i = 0; i < mdl->info.num_outputs; i++) {
+		name = NULL;
+		status = ort_api->SessionGetOutputName(session, i, allocator, &name);
+		if (check_ortstatus(status)) {
+			_ODP_ERR("Get %uth output name failed\n", i);
+			return -1;
+		}
+
+		strncpy(output_info[i].name, name, ODP_ML_MODEL_IO_NAME_LEN - 1);
+		output_info[i].name[ODP_ML_MODEL_IO_NAME_LEN - 1] = 0;
+
+		/* Free memory allocated by SessionGetOutputName */
+		status = ort_api->AllocatorFree(allocator, name);
+		if (check_ortstatus(status)) {
+			_ODP_ERR("AllocatorFree %uth output_name failed\n", i);
+			return -1;
+		}
+
+		if (param->extra_info.num_outputs) {
+			data_format = &param->extra_info.output_format[i];
+
+			output_info[i].shape = data_format->shape;
+			output_info[i].data_type = data_format->data_type;
+			output_info[i].data_type_size = data_format->data_type_size;
+
+			mdl->output_sizes[i] = output_info[i].data_type_size;
+			calculate_model_io_size(&data_format->shape, &mdl->output_sizes[i]);
+			continue;
+		}
+
+		type_info = NULL;
+		status = ort_api->SessionGetOutputTypeInfo(session, i, &type_info);
+		if (check_ortstatus(status)) {
+			_ODP_ERR("SessionGetOutputTypeInfo failed\n");
+			return -1;
+		}
+
+		if (get_model_io_type_shape_size(type_info, &output_info[i].shape,
+						 &output_info[i].data_type,
+						 &output_info[i].data_type_size,
+						 &mdl->output_sizes[i])) {
+			_ODP_ERR("get_model_io_type_shape_size() for output failed\n");
+			ort_api->ReleaseTypeInfo(type_info);
+			return -1;
+		}
+
+		ort_api->ReleaseTypeInfo(type_info);
+	}
+
+	return 0;
+}
+
+static inline int check_model_io_num(const odp_ml_model_param_t *param,
+				     uint32_t num_inputs, uint32_t num_outputs)
+{
+	/* Make sure the number of inputs/outputs not exceeding the supported
+	 * model max inputs/outputs */
+	if (num_inputs > CONFIG_ML_MAX_INPUTS) {
+		_ODP_ERR("The model's number of inputs %u exceeds the maximum "
+			 "number of inputs supported in a model %u\n",
+			 num_inputs, CONFIG_ML_MAX_INPUTS);
+		return -1;
+	}
+
+	if (num_outputs > CONFIG_ML_MAX_OUTPUTS) {
+		_ODP_ERR("The model's number of outputs %u exceeds the maximum "
+			 "number of outputs supported in a model %u\n",
+			 num_outputs, CONFIG_ML_MAX_OUTPUTS);
+
+		return -1;
+	}
+
+	/* Make sure the numbers of inputs/outputs provided in the extra_info of
+	 * param match the numbers defined in model metadata. */
+	if (param->extra_info.num_inputs &&
+	    param->extra_info.num_inputs != num_inputs) {
+		_ODP_ERR("Provided param->extra_info.num_inputs %u does not match the"
+			 " number of inputs defined in model metadata: %u\n",
+			 param->extra_info.num_inputs, num_inputs);
+		return -1;
+	}
+
+	if (param->extra_info.num_outputs && param->extra_info.num_outputs != num_outputs) {
+		_ODP_ERR("Provided param->extra_info.num_outputs %u does not match the"
+			 " number of outputs defined in model metadata: %u\n",
+			 param->extra_info.num_outputs, num_outputs);
+		return -1;
+	}
+
+	if (param->extra_info.num_inputs && !param->extra_info.input_format) {
+		_ODP_ERR("num_inputs is provided but not input_format in param->extra_info\n");
+		return -1;
+	}
+
+	if (param->extra_info.num_outputs && !param->extra_info.output_format) {
+		_ODP_ERR("num_outputs is provided but not output_format in param->extra_info\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int create_ort_model(const odp_ml_model_param_t *param, OrtSession **session,
+			    ml_model_t *mdl, OrtSessionOptions *session_opts)
+{
+	OrtStatus *status;
+	int64_t model_version;
+	uint32_t num_inputs = 0;
+	uint32_t num_outputs = 0;
+	OrtModelMetadata *metadata = {0};
+	const OrtApi *ort_api = _odp_ml_glb->ort_api;
+
+	status = ort_api->CreateSessionFromArray(_odp_ml_glb->env,
+						 param->model,
+						 param->size,
+						 session_opts,
+						 session);
+	if (check_ortstatus(status) || !(*session)) {
+		_ODP_ERR("CreateSessionFromArray failed\n");
+		return -1;
+	}
+
+	if (get_model_io_count(*session, &num_inputs, &num_outputs)) {
+		_ODP_ERR("get_model_io_count() failed\n");
+		ort_api->ReleaseSession(*session);
+		return -1;
+	}
+
+	if (check_model_io_num(param, num_inputs, num_outputs)) {
+		ort_api->ReleaseSession(*session);
+		return -1;
+	}
+
+	mdl->max_compl_id = param->max_compl_id;
+	mdl->info.num_inputs = num_inputs;
+	mdl->info.num_outputs = num_outputs;
+
+	/* Get metadata */
+	status = ort_api->SessionGetModelMetadata(*session, &metadata);
+	if (check_ortstatus(status) || !metadata) {
+		_ODP_ERR("SessionGetModelMetadata failed\n");
+		ort_api->ReleaseSession(*session);
+		return -1;
+	}
+
+	/* Get model version */
+	status = ort_api->ModelMetadataGetVersion(metadata, &model_version);
+	if (check_ortstatus(status)) {
+		_ODP_ERR("ModelMetadataGetVersion failed\n");
+		ort_api->ReleaseModelMetadata(metadata);
+		ort_api->ReleaseSession(*session);
+		return -1;
+	}
+	mdl->info.model_version = model_version;
+	mdl->info.interface_version = 0;
+
+	if (get_model_io_info(*session, mdl, param)) {
+		_ODP_ERR("get_model_io_info() failed\n");
+		ort_api->ReleaseModelMetadata(metadata);
+		ort_api->ReleaseSession(*session);
+		return -1;
+	}
+
+	ort_api->ReleaseModelMetadata(metadata);
+	return 0;
+}
+
+static int set_ort_run_opts(const char *name, OrtSessionOptions *se_opts)
+{
+	OrtStatus *status;
+	ort_run_opts_t *opts = &_odp_ml_glb->ort_run_opts;
+	const OrtApi *ort_api = _odp_ml_glb->ort_api;
+
+	if (opts->enable_profiling) {
+		status = ort_api->EnableProfiling(se_opts, name);
+		if (check_ortstatus(status)) {
+			_ODP_ERR("Enable profiling failed\n");
+			return -1;
+		}
+	}
+
+	status = ort_api->SetSessionExecutionMode(se_opts, opts->execution_mode);
+	if (check_ortstatus(status)) {
+		_ODP_ERR("SetSessionExecutionMode failed\n");
+		return -1;
+	}
+
+	if (opts->intra_op_num_threads) {
+		status = ort_api->SetIntraOpNumThreads(se_opts, opts->intra_op_num_threads);
+		if (check_ortstatus(status)) {
+			_ODP_ERR("SetIntraOpNumThreads failed\n");
+			return -1;
+		}
+	}
+
+	if (opts->inter_op_num_threads) {
+		status = ort_api->SetInterOpNumThreads(se_opts, opts->inter_op_num_threads);
+		if (check_ortstatus(status)) {
+			_ODP_ERR("SetInterOpNumThreads failed\n");
+			return -1;
+		}
+	}
+
+	status = ort_api->SetSessionGraphOptimizationLevel(se_opts, opts->graph_opt_level);
+	if (check_ortstatus(status)) {
+		_ODP_ERR("SetSessionGraphOptimizationLevel failed\n");
+		return -1;
+	}
+
+	/* Optimized model file path is not provided */
+	if (opts->opt_model_filepath[0] == '\0')
+		return 0;
+
+	status = ort_api->SetOptimizedModelFilePath(se_opts, opts->opt_model_filepath);
+	if (check_ortstatus(status)) {
+		_ODP_ERR("SetOptimizedModelFilePath failed\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static inline void reset_mdl_info_sizes(ml_model_t *mdl)
+{
+	memset(&mdl->info, 0, sizeof(odp_ml_model_info_t));
+	memset(mdl->input_info, 0, sizeof(mdl->input_info));
+	memset(mdl->output_info, 0, sizeof(mdl->output_info));
+	memset(mdl->input_sizes, 0, sizeof(mdl->input_sizes));
+	memset(mdl->output_sizes, 0, sizeof(mdl->output_sizes));
+}
+
+static int check_io_shape(ml_model_t *mdl)
+{
+	odp_ml_shape_info_t *shape;
+
+	for (uint32_t i = 0; i < mdl->info.num_inputs; i++) {
+		shape = &mdl->input_info[i].shape;
+
+		if (shape->type == ODP_ML_SHAPE_NONE) {
+			_ODP_ERR("Undefined shape type for model input[%u]\n", i);
+			return -1;
+		}
+
+		if (shape->type == ODP_ML_SHAPE_STATIC)
+			continue;
+
+		/* shape->type == ODP_ML_SHAPE_BATCH */
+		for (uint32_t j = 0; j < shape->num_dim; j++) {
+			if (shape->dim[j] == ODP_ML_DIM_DYNAMIC && !shape->dim_max[j]) {
+				_ODP_ERR("Missing dim_max[%u] for dynamic sized input[%u], please"
+					 " provide via the extra_info of model param\n", j, i);
+				return -1;
+			}
+		}
+	}
+
+	for (uint32_t i = 0; i < mdl->info.num_outputs; i++) {
+		if (mdl->output_info[i].shape.type == ODP_ML_SHAPE_NONE) {
+			_ODP_ERR("Undefined shape type for model output[%u]\n", i);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+odp_ml_model_t odp_ml_model_create(const char *name, const odp_ml_model_param_t *param)
+{
+	OrtStatus *status;
+	odp_ml_model_info_t *info;
+	OrtSessionOptions *session_opts;
+	uint32_t i = 0;
+	ml_model_t *mdl = NULL;
+	OrtSession *session = NULL;
+	const OrtApi *ort_api = _odp_ml_glb->ort_api;
+
+	if (odp_unlikely(odp_global_ro.disable.ml)) {
+		_ODP_ERR("ML is disabled\n");
+		return ODP_ML_MODEL_INVALID;
+	}
+
+	if (odp_unlikely(param->size > _odp_ml_glb->ml_config.max_model_size)) {
+		_ODP_ERR("Model size %" PRIu64 " exceeds maximum model size configured %" PRIu64 "\n",
+			 param->size, _odp_ml_glb->ml_config.max_model_size);
+		return ODP_ML_MODEL_INVALID;
+	}
+
+	if (odp_unlikely(!param->size || !param->model)) {
+		_ODP_ERR("Invalid model param: param->model: %p, param->size: %" PRIu64 "\n",
+			 param->model, param->size);
+		return ODP_ML_MODEL_INVALID;
+	}
+
+	if (odp_unlikely(param->max_compl_id > ML_MAX_COMPL_ID)) {
+		_ODP_ERR("param->max_compl_id: %u exceeds maximum completion id supported: %d\n",
+			 param->max_compl_id, ML_MAX_COMPL_ID);
+		return ODP_ML_MODEL_INVALID;
+	}
+
+	/* Find an emtpy slot to store the new model */
+	for (i = 0; i < ML_MAX_MODELS_CREATED; i++) {
+		if (_odp_ml_glb->models[i].state)
+			continue;
+
+		odp_ticketlock_lock(&_odp_ml_glb->models[i].lock);
+
+		if (_odp_ml_glb->models[i].state) {
+			odp_ticketlock_unlock(&_odp_ml_glb->models[i].lock);
+			continue;
+		}
+
+		mdl = &_odp_ml_glb->models[i];
+		break;
+	}
+
+	if (i == ML_MAX_MODELS_CREATED) {
+		_ODP_ERR("Maximum number of models has already been created!\n");
+		return ODP_ML_MODEL_INVALID;
+	}
+
+	/* Free model entry was found and is now locked */
+	mdl->state = ML_STATE_CREATED;
+
+	status = ort_api->CreateSessionOptions(&session_opts);
+	if (check_ortstatus(status) || !session_opts) {
+		_ODP_ERR("Error: CreateSessionOptions failed.\n");
+		mdl->state = ML_STATE_FREE;
+		odp_ticketlock_unlock(&mdl->lock);
+		return ODP_ML_MODEL_INVALID;
+	}
+
+	if (set_ort_run_opts(name, session_opts)) {
+		_odp_ml_glb->ort_api->ReleaseSessionOptions(session_opts);
+		mdl->state = ML_STATE_FREE;
+		odp_ticketlock_unlock(&mdl->lock);
+		return ODP_ML_MODEL_INVALID;
+	}
+
+	/* Store model info */
+	info = &mdl->info;
+	memset(info, 0, sizeof(odp_ml_model_info_t));
+
+	if (create_ort_model(param, &session, mdl, session_opts)) {
+		mdl->state = ML_STATE_FREE;
+
+		/* Initialize info back to 0 when some fields have been filled
+		 * while later failed */
+		reset_mdl_info_sizes(mdl);
+		odp_ticketlock_unlock(&mdl->lock);
+
+		_odp_ml_glb->ort_api->ReleaseSessionOptions(session_opts);
+		_ODP_ERR("create_ort_model() failed\n");
+		return ODP_ML_MODEL_INVALID;
+	}
+
+	if (check_io_shape(mdl)) {
+		mdl->state = ML_STATE_FREE;
+		reset_mdl_info_sizes(mdl);
+		odp_ticketlock_unlock(&mdl->lock);
+
+		ort_api->ReleaseSession(session);
+		_odp_ml_glb->ort_api->ReleaseSessionOptions(session_opts);
+		return ODP_ML_MODEL_INVALID;
+	}
+
+	mdl->session = session;
+	mdl->session_opts = session_opts;
+	info->index = i;
+
+	if (name) {
+		strncpy(info->name, name, ODP_ML_MODEL_NAME_LEN - 1);
+		info->name[ODP_ML_MODEL_NAME_LEN - 1] = 0;
+	}
+
+	mdl->max_compl_id = param->max_compl_id;
+	for (uint32_t j = 0; j < ML_MAX_COMPL_ID; j++)
+		odp_atomic_init_u32(&mdl->compl_status[j], 1);
+
+	odp_ticketlock_unlock(&mdl->lock);
+	return (odp_ml_model_t)mdl;
+}
+
+int odp_ml_model_destroy(odp_ml_model_t model)
+{
+	ml_model_t *mdl = ml_model_from_handle(model);
+
+	if (model == ODP_ML_MODEL_INVALID) {
+		_ODP_ERR("Bad ML model handle\n");
+		return -1;
+	}
+
+	odp_ticketlock_lock(&mdl->lock);
+
+	if (mdl->state != ML_STATE_CREATED) {
+		_ODP_ERR("Model not created\n");
+		odp_ticketlock_unlock(&mdl->lock);
+		return -1;
+	}
+
+	_odp_ml_glb->ort_api->ReleaseSessionOptions(mdl->session_opts);
+	_odp_ml_glb->ort_api->ReleaseSession(mdl->session);
+	mdl->state = ML_STATE_FREE;
+	mdl->session = NULL;
+	odp_ticketlock_unlock(&mdl->lock);
+
+	return 0;
+}
+
+int odp_ml_model_info(odp_ml_model_t model, odp_ml_model_info_t *info)
+{
+	ml_model_t *mdl = ml_model_from_handle(model);
+
+	if (odp_unlikely(model == ODP_ML_MODEL_INVALID)) {
+		_ODP_ERR("Bad ML model handle\n");
+		return -1;
+	}
+
+	if (odp_unlikely(!info)) {
+		_ODP_ERR("info must not be NULL\n");
+		return -1;
+	}
+
+	odp_ticketlock_lock(&mdl->lock);
+	if (odp_unlikely(mdl->state == ML_STATE_FREE)) {
+		_ODP_ERR("Model not created\n");
+		odp_ticketlock_unlock(&mdl->lock);
+		return -1;
+	}
+
+	*info = mdl->info;
+
+	odp_ticketlock_unlock(&mdl->lock);
+	return 0;
+}
+
+uint32_t odp_ml_model_input_info(odp_ml_model_t model, odp_ml_input_info_t info[], uint32_t num)
+{
+	uint32_t num_model_inputs;
+	uint32_t num_written;
+	ml_model_t *mdl = ml_model_from_handle(model);
+
+	if (odp_unlikely(model == ODP_ML_MODEL_INVALID)) {
+		_ODP_ERR("Bad ML model handle\n");
+		return 0;
+	}
+
+	odp_ticketlock_lock(&mdl->lock);
+	num_model_inputs = mdl->info.num_inputs;
+	num_written = num_model_inputs >= num ? num : num_model_inputs;
+
+	if (num == 0) {
+		odp_ticketlock_unlock(&mdl->lock);
+		return num_model_inputs;
+	}
+
+	for (uint32_t i = 0; i < num_written; i++)
+		info[i] = mdl->input_info[i];
+
+	odp_ticketlock_unlock(&mdl->lock);
+	return num_model_inputs;
+}
+
+uint32_t odp_ml_model_output_info(odp_ml_model_t model, odp_ml_output_info_t info[], uint32_t num)
+{
+	uint32_t num_model_outputs;
+	uint32_t num_written;
+	ml_model_t *mdl = ml_model_from_handle(model);
+
+	if (odp_unlikely(model == ODP_ML_MODEL_INVALID)) {
+		_ODP_ERR("Bad ML model handle\n");
+		return 0;
+	}
+
+	odp_ticketlock_lock(&mdl->lock);
+	num_model_outputs = mdl->info.num_outputs;
+	num_written = num_model_outputs >= num ? num : num_model_outputs;
+
+	if (num == 0) {
+		odp_ticketlock_unlock(&mdl->lock);
+		return num_model_outputs;
+	}
+
+	for (uint32_t i = 0; i < num_written; i++)
+		info[i] = mdl->output_info[i];
+
+	odp_ticketlock_unlock(&mdl->lock);
+	return num_model_outputs;
+}
+
+odp_ml_model_t odp_ml_model_lookup(const char *name)
+{
+	uint32_t i;
+	ml_model_t *mdl;
+
+	for (i = 0; i < ML_MAX_MODELS_CREATED; i++) {
+		mdl = &_odp_ml_glb->models[i];
+
+		odp_ticketlock_lock(&mdl->lock);
+
+		if (mdl->state == ML_STATE_FREE) {
+			odp_ticketlock_unlock(&mdl->lock);
+			continue;
+		}
+
+		if (!strcmp(mdl->info.name, name)) {
+			/* found it */
+			odp_ticketlock_unlock(&mdl->lock);
+			return (odp_ml_model_t)mdl;
+		}
+		odp_ticketlock_unlock(&mdl->lock);
+	}
+
+	return ODP_ML_MODEL_INVALID;
+}
+
+uint64_t odp_ml_model_to_u64(odp_ml_model_t model)
+{
+	return _odp_pri(model);
+}
+
+static const char *data_type_str(odp_ml_data_type_t data_type)
+{
+	switch (data_type) {
+	case ODP_ML_DATA_TYPE_INT8:
+		return "int8";
+	case ODP_ML_DATA_TYPE_UINT8:
+		return "uint8";
+	case ODP_ML_DATA_TYPE_UINT16:
+		return "uint16";
+	case ODP_ML_DATA_TYPE_INT16:
+		return "int16";
+	case ODP_ML_DATA_TYPE_INT32:
+		return "int32";
+	case ODP_ML_DATA_TYPE_UINT32:
+		return "uint32";
+	case ODP_ML_DATA_TYPE_INT64:
+		return "int64";
+	case ODP_ML_DATA_TYPE_UINT64:
+		return "uint64";
+	case ODP_ML_DATA_TYPE_FP16:
+		return "fp16";
+	case ODP_ML_DATA_TYPE_FP32:
+		return "fp32";
+	case ODP_ML_DATA_TYPE_BFP16:
+		return "bfp16";
+	default:
+		return "unknown";
+	}
+}
+
+static const char *shape_type_str(odp_ml_shape_type_t shape_type)
+{
+	switch (shape_type) {
+	case ODP_ML_SHAPE_NONE:
+		return "none";
+	case ODP_ML_SHAPE_STATIC:
+		return "static";
+	case ODP_ML_SHAPE_BATCH:
+		return "batch";
+	default:
+		return "Unknown";
+	}
+}
+
+static void print_shape(const odp_ml_shape_info_t *shape)
+{
+	/* Print shape */
+	_ODP_PRINT("Shape: %s [", shape_type_str(shape->type));
+
+	for (uint32_t i = 0; i < shape->num_dim; i++) {
+		if (shape->dim[i] == ODP_ML_DIM_DYNAMIC)
+			_ODP_PRINT("Dyn");
+		else
+			_ODP_PRINT("%" PRIu32, shape->dim[i]);
+
+		if (i == (shape->num_dim - 1))
+			_ODP_PRINT("]\n");
+		else
+			_ODP_PRINT(", ");
+	}
+
+	/* The number of dimensions for a scalar input is 0, in which case did not
+	 * go into above for loop */
+	if (shape->num_dim == 0)
+		_ODP_PRINT("]\n");
+}
+
+void odp_ml_model_print(odp_ml_model_t model)
+{
+	ml_model_t *mdl	= ml_model_from_handle(model);
+	const odp_ml_model_info_t * const info	= &mdl->info;
+	const odp_ml_input_info_t * const input_info = mdl->input_info;
+	const odp_ml_output_info_t * const output_info = mdl->output_info;
+
+	if (odp_unlikely(model == ODP_ML_MODEL_INVALID)) {
+		_ODP_ERR("Bad ML model handle\n");
+		return;
+	}
+
+	odp_ticketlock_lock(&mdl->lock);
+	if (odp_unlikely(mdl->state == ML_STATE_FREE)) {
+		odp_ticketlock_unlock(&mdl->lock);
+		_ODP_ERR("Model not created\n");
+		return;
+	}
+
+	_ODP_PRINT("\nModel info\n");
+	_ODP_PRINT("----------\n");
+	_ODP_PRINT("  Model handle: 0x%" PRIx64 "\n", odp_ml_model_to_u64(model));
+	_ODP_PRINT("  Name: %s\n", info->name);
+	_ODP_PRINT("  Model version: %" PRIu64 "\n", info->model_version);
+	_ODP_PRINT("  Model interface version: %" PRIu64 "\n", info->interface_version);
+	_ODP_PRINT("  Index: %u\n", info->index);
+	_ODP_PRINT("  Number of inputs: %u\n", info->num_inputs);
+
+	for (uint32_t i = 0; i < info->num_inputs; i++) {
+		_ODP_PRINT("    Input[%u]: ", i);
+		_ODP_PRINT("Name: %s, ", input_info[i].name);
+		_ODP_PRINT("Data_type: %s, ", data_type_str(input_info[i].data_type));
+		print_shape(&input_info[i].shape);
+	}
+
+	_ODP_PRINT("  Number of outputs: %u\n", info->num_outputs);
+	for (uint32_t i = 0; i < info->num_outputs; i++) {
+		_ODP_PRINT("    Output[%u]: ", i);
+		_ODP_PRINT("Name: %s, ", output_info[i].name);
+		_ODP_PRINT("Data_type: %s, ", data_type_str(output_info[i].data_type));
+		print_shape(&output_info[i].shape);
+	}
+
+	odp_ticketlock_unlock(&mdl->lock);
+
+	_ODP_PRINT("\n");
+}
+
+static inline void mode_print(odp_ml_compl_mode_t compl_mode_mask)
+{
+	if (compl_mode_mask & ODP_ML_COMPL_MODE_SYNC)
+		_ODP_PRINT(" syn");
+
+	if (compl_mode_mask & ODP_ML_COMPL_MODE_POLL)
+		_ODP_PRINT(" poll");
+
+	if (compl_mode_mask & ODP_ML_COMPL_MODE_EVENT)
+		_ODP_PRINT(" event");
+}
+
+void odp_ml_print(void)
+{
+	_ODP_PRINT("\nML info\n");
+	_ODP_PRINT("-----------\n");
+	_ODP_PRINT("  max_model_size: %u\n", ML_MAX_MODEL_SIZE);
+	_ODP_PRINT("  max_compl_id: %u\n", ML_MAX_COMPL_ID);
+	_ODP_PRINT("  max_models_created: %u\n", ML_MAX_MODELS_CREATED);
+	_ODP_PRINT("  max_models_loaded: %u\n", ML_MAX_MODELS_LOADED);
+	_ODP_PRINT("  model_max_inputs: %u\n", CONFIG_ML_MAX_INPUTS);
+	_ODP_PRINT("  model_max_outputs: %u\n", CONFIG_ML_MAX_OUTPUTS);
+
+	_ODP_PRINT("  load:\n");
+	_ODP_PRINT("    completion mode: ");
+	mode_print(_odp_ml_glb->capa.load.compl_mode_mask);
+	_ODP_PRINT(", plain queue: %c, schedule queue: %c\n",
+		   _odp_ml_glb->capa.load.compl_queue_plain ? 'Y' : 'N',
+		   _odp_ml_glb->capa.load.compl_queue_sched ? 'Y' : 'N');
+
+	_ODP_PRINT("  run:\n");
+	_ODP_PRINT("    completion mode:");
+	mode_print(_odp_ml_glb->capa.run.compl_mode_mask);
+	_ODP_PRINT(", plain queue: %c, schedule queue: %c\n",
+		   _odp_ml_glb->capa.run.compl_queue_plain ? 'Y' : 'N',
+		   _odp_ml_glb->capa.run.compl_queue_sched ? 'Y' : 'N');
+	_ODP_PRINT("\n");
+}
+
+int odp_ml_model_extra_stat_info(odp_ml_model_t model,
+				 odp_ml_extra_stat_info_t info[] ODP_UNUSED,
+				 int num ODP_UNUSED)
+{
+	if (odp_unlikely(model == ODP_ML_MODEL_INVALID)) {
+		_ODP_ERR("Bad ML model handle\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+int odp_ml_model_extra_stats(odp_ml_model_t model, uint64_t stats[] ODP_UNUSED, int num ODP_UNUSED)
+{
+	if (odp_unlikely(model == ODP_ML_MODEL_INVALID)) {
+		_ODP_ERR("Bad ML model handle\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+void odp_ml_compl_pool_param_init(odp_ml_compl_pool_param_t *pool_param)
+{
+	if (odp_unlikely(!pool_param)) {
+		_ODP_ERR("Param 'pool_param' must not NULL\n");
+		return;
+	}
+
+	memset(pool_param, 0, sizeof(odp_ml_compl_pool_param_t));
+
+	pool_param->cache_size = _odp_ml_glb->pool_param.buf.cache_size;
+}
+
+odp_pool_t odp_ml_compl_pool_create(const char *name, const odp_ml_compl_pool_param_t *pool_param)
+{
+	odp_pool_t pool;
+	odp_pool_param_t ml_pool_param;
+	uint32_t num = pool_param->num;
+	uint32_t uarea_size = pool_param->uarea_size;
+	uint32_t cache_size = pool_param->cache_size;
+	uint32_t buf_size = _ODP_MAX(sizeof(odp_ml_run_result_t),
+				     sizeof(odp_ml_load_result_t));
+
+	if (num > _odp_ml_glb->capa.pool.max_num) {
+		_ODP_ERR("Too many ML completion events: %u\n", num);
+		return ODP_POOL_INVALID;
+	}
+
+	if (uarea_size > _odp_ml_glb->capa.pool.max_uarea_size) {
+		_ODP_ERR("Bad uarea size: %u\n", uarea_size);
+		return ODP_POOL_INVALID;
+	}
+
+	if (cache_size < _odp_ml_glb->capa.pool.min_cache_size ||
+	    cache_size > _odp_ml_glb->capa.pool.max_cache_size) {
+		_ODP_ERR("Bad cache size: %u\n", cache_size);
+		return ODP_POOL_INVALID;
+	}
+
+	odp_pool_param_init(&ml_pool_param);
+	ml_pool_param.type               = ODP_POOL_BUFFER;
+	ml_pool_param.uarea_init.init_fn = pool_param->uarea_init.init_fn;
+	ml_pool_param.uarea_init.args    = pool_param->uarea_init.args;
+	ml_pool_param.buf.num            = num;
+	ml_pool_param.buf.cache_size     = cache_size;
+	ml_pool_param.buf.size           = buf_size;
+	ml_pool_param.buf.uarea_size     = uarea_size;
+
+	pool = _odp_pool_create(name, &ml_pool_param, ODP_POOL_ML_COMPL);
+
+	return pool;
+}
+
+odp_ml_compl_t odp_ml_compl_alloc(odp_pool_t pool)
+{
+	odp_buffer_t buf;
+	odp_event_t ev;
+	odp_ml_run_result_t *result;
+	uint32_t buf_size = _ODP_MAX(sizeof(odp_ml_run_result_t),
+				     sizeof(odp_ml_load_result_t));
+
+	buf = odp_buffer_alloc(pool);
+
+	if (odp_unlikely(buf == ODP_BUFFER_INVALID))
+		return ODP_ML_COMPL_INVALID;
+
+	result = odp_buffer_addr(buf);
+	memset(result, 0, buf_size);
+
+	ev = odp_buffer_to_event(buf);
+	_odp_event_type_set(ev, ODP_EVENT_ML_COMPL);
+
+	return (odp_ml_compl_t)(uintptr_t)buf;
+}
+
+void odp_ml_compl_free(odp_ml_compl_t ml_compl)
+{
+	odp_event_t ev;
+	odp_buffer_t buf = (odp_buffer_t)(uintptr_t)ml_compl;
+
+	if (odp_unlikely(ml_compl == ODP_ML_COMPL_INVALID)) {
+		_ODP_ERR("Bad ML job completion handle\n");
+		return;
+	}
+
+	ev = odp_buffer_to_event(buf);
+	_odp_event_type_set(ev, ODP_EVENT_BUFFER);
+
+	odp_buffer_free(buf);
+}
+
+int odp_ml_compl_run_result(odp_ml_compl_t ml_compl, odp_ml_run_result_t *result)
+{
+	odp_event_subtype_t subtype;
+	odp_ml_run_result_t *run_result;
+	odp_buffer_t buf = (odp_buffer_t)(uintptr_t)ml_compl;
+	odp_event_t ev = odp_buffer_to_event(buf);
+
+	if (odp_unlikely(ml_compl == ODP_ML_COMPL_INVALID)) {
+		_ODP_ERR("Given ML completion event is invalid\n");
+		return -2;
+	}
+
+	if (odp_event_types(ev, &subtype) != ODP_EVENT_ML_COMPL ||
+	    subtype != ODP_EVENT_ML_COMPL_RUN) {
+		_ODP_ERR("Given completion event has wrong event type or subtype\n");
+		return -2;
+	}
+
+	run_result = odp_buffer_addr(buf);
+	if (result)
+		*result = *run_result;
+
+	return run_result->error_code ? -1 : 0;
+}
+
+int odp_ml_compl_load_result(odp_ml_compl_t ml_compl, odp_ml_load_result_t *result)
+{
+	odp_event_subtype_t subtype;
+	odp_ml_load_result_t *load_result;
+	odp_buffer_t buf = (odp_buffer_t)(uintptr_t)ml_compl;
+	odp_event_t ev = odp_buffer_to_event(buf);
+
+	if (odp_unlikely(ml_compl == ODP_ML_COMPL_INVALID)) {
+		_ODP_ERR("Given ML completion event is invalid\n");
+		return -2;
+	}
+
+	if (odp_event_types(ev, &subtype) != ODP_EVENT_ML_COMPL ||
+	    subtype != ODP_EVENT_ML_COMPL_LOAD) {
+		_ODP_ERR("Given completion event has wrong event type or subtype\n");
+		return -2;
+	}
+
+	load_result = odp_buffer_addr(buf);
+	if (result)
+		*result = *load_result;
+
+	return load_result->error_code ? -1 : 0;
+}
+
+void *odp_ml_compl_user_area(odp_ml_compl_t ml_compl)
+{
+	return odp_buffer_user_area((odp_buffer_t)(uintptr_t)ml_compl);
+}
+
+odp_ml_compl_t odp_ml_compl_from_event(odp_event_t event)
+{
+	_ODP_ASSERT(_odp_event_hdr_field(event, int8_t, event_type) == ODP_EVENT_ML_COMPL);
+
+	return (odp_ml_compl_t)(uintptr_t)event;
+}
+
+odp_event_t odp_ml_compl_to_event(odp_ml_compl_t ml_compl)
+{
+	return (odp_event_t)(uintptr_t)ml_compl;
+}
+
+uint64_t odp_ml_compl_to_u64(odp_ml_compl_t ml_compl)
+{
+	return (uint64_t)(uintptr_t)ml_compl;
+}
+
+void odp_ml_compl_param_init(odp_ml_compl_param_t *compl_param)
+{
+	memset(compl_param, 0, sizeof(odp_ml_compl_param_t));
+
+	compl_param->queue	= ODP_QUEUE_INVALID;
+	compl_param->event	= ODP_EVENT_INVALID;
+}
+
+int odp_ml_model_load(odp_ml_model_t model, odp_ml_load_result_t *result)
+{
+	odp_ml_load_result_t result_local;
+	int ret = -1;
+	ml_model_t *mdl = ml_model_from_handle(model);
+
+	memset(&result_local, 0, sizeof(result_local));
+
+	if (odp_unlikely(model == ODP_ML_MODEL_INVALID)) {
+		_ODP_ERR("Bad ML model handle\n");
+		result_local.error_code = ML_BAD_HDL;
+		goto load_fail;
+	}
+
+	odp_ticketlock_lock(&mdl->lock);
+	if (odp_unlikely(mdl->state != ML_STATE_CREATED)) {
+		_ODP_ERR("Model has not been created yet or is already loaded\n");
+		odp_ticketlock_unlock(&mdl->lock);
+		result_local.error_code = ML_NOT_CREATED;
+		goto load_fail;
+	}
+
+	mdl->state = ML_STATE_LOADED;
+	odp_ticketlock_unlock(&mdl->lock);
+	ret = 0;
+
+load_fail:
+	if (result)
+		*result = result_local;
+
+	return ret;
+}
+
+static inline int check_compl_param(const odp_ml_compl_param_t *compl_param,
+				    uint32_t max_compl_id, odp_bool_t is_load)
+{
+	odp_ml_config_t *config = &_odp_ml_glb->ml_config;
+
+	switch (compl_param->mode) {
+	case ODP_ML_COMPL_MODE_POLL:
+		if (is_load && !(config->load_mode_mask & ODP_ML_COMPL_MODE_POLL)) {
+			_ODP_ERR("Poll mode loading/unloading is not configured\n");
+			return -1;
+		}
+
+		if (!is_load && !(config->run_mode_mask & ODP_ML_COMPL_MODE_POLL)) {
+			_ODP_ERR("Poll mode run is not configured\n");
+			return -1;
+		}
+
+		if (compl_param->compl_id > max_compl_id) {
+			_ODP_ERR("Bad compl_id: %u, exceeding model max completion id %u\n",
+				 compl_param->compl_id, max_compl_id);
+			return -1;
+		}
+		break;
+	case ODP_ML_COMPL_MODE_EVENT:
+		if (is_load && !(config->load_mode_mask & ODP_ML_COMPL_MODE_EVENT)) {
+			_ODP_ERR("Event mode loading/unloading is not configured\n");
+			return -1;
+		}
+
+		if (!is_load && !(config->run_mode_mask & ODP_ML_COMPL_MODE_EVENT)) {
+			_ODP_ERR("Event mode run is not configured\n");
+			return -1;
+		}
+
+		if (compl_param->event == ODP_EVENT_INVALID ||
+		    compl_param->queue == ODP_QUEUE_INVALID) {
+			_ODP_ERR("Bad event or queue\n");
+			return -1;
+		}
+
+		if (odp_event_type(compl_param->event) != ODP_EVENT_ML_COMPL) {
+			_ODP_ERR("Bad completion event type\n");
+			return -1;
+		}
+		break;
+	default:
+		/* Including ODP_ML_COMPL_MODE_SYNC, which is not supported by
+		 * asynchrous functions (e.g. *_start()) either.
+		 */
+		_ODP_ERR("Invalid completion mode %u\n", compl_param->mode);
+		return -1;
+	}
+
+	return 0;
+}
+
+int odp_ml_model_load_start(odp_ml_model_t model, const odp_ml_compl_param_t *compl_param)
+{
+	int ret;
+	ml_model_t *mdl = ml_model_from_handle(model);
+
+	if (odp_unlikely(model == ODP_ML_MODEL_INVALID)) {
+		_ODP_ERR("Bad model handle\n");
+		return -1;
+	}
+
+	if (odp_unlikely(check_compl_param(compl_param, mdl->max_compl_id, true)))
+		return -1;
+
+	if (compl_param->mode == ODP_ML_COMPL_MODE_POLL)
+		odp_atomic_store_rel_u32(&mdl->compl_status[compl_param->compl_id], 0);
+
+	ret = odp_ml_model_load(model, NULL);
+
+	if (odp_unlikely(ret))
+		return -1;
+
+	/* Send a completion event to the given queue */
+	if (compl_param->mode == ODP_ML_COMPL_MODE_EVENT) {
+		odp_ml_load_result_t *result;
+		odp_buffer_t buf = (odp_buffer_t)(uintptr_t)compl_param->event;
+
+		_odp_buffer_subtype_set(buf, ODP_EVENT_ML_COMPL_LOAD);
+
+		result = odp_buffer_addr(buf);
+		result->error_code = 0;
+		result->user_ptr = compl_param->user_ptr;
+
+		if (odp_unlikely(odp_queue_enq(compl_param->queue, compl_param->event))) {
+			_ODP_ERR("Completion event enqueue failed %" PRIu64 "\n",
+				 odp_queue_to_u64(compl_param->queue));
+			if (odp_ml_model_unload(model, NULL))
+				_ODP_ERR("Failed to unload model\n");
+			return -1;
+		}
+
+		return 0;
+	}
+
+	mdl->result[compl_param->compl_id].user_ptr = compl_param->user_ptr;
+	odp_atomic_store_rel_u32(&mdl->compl_status[compl_param->compl_id], 1);
+	return 0;
+}
+
+int odp_ml_model_load_status(odp_ml_model_t model, uint32_t compl_id, odp_ml_load_result_t *result)
+{
+	int ret;
+	ml_model_t *mdl = ml_model_from_handle(model);
+
+	if (odp_unlikely(model == ODP_ML_MODEL_INVALID || compl_id > mdl->max_compl_id)) {
+		_ODP_ERR("Invalid model or compl_id: %u\n", compl_id);
+		return -2;
+	}
+
+	ret = odp_atomic_load_acq_u32(&mdl->compl_status[compl_id]);
+
+	if (ret && result) {
+		result->error_code = 0;
+		result->user_ptr = mdl->result[compl_id].user_ptr;
+	}
+
+	return ret;
+}
+
+int odp_ml_model_unload(odp_ml_model_t model, odp_ml_load_result_t *result)
+{
+	odp_ml_load_result_t result_local;
+	int ret = -1;
+	ml_model_t *mdl = ml_model_from_handle(model);
+
+	memset(&result_local, 0, sizeof(result_local));
+
+	if (odp_unlikely(model == ODP_ML_MODEL_INVALID)) {
+		result_local.error_code = ML_BAD_HDL;
+		_ODP_ERR("Bad ML model handle\n");
+		goto unload_fail;
+	}
+
+	odp_ticketlock_lock(&mdl->lock);
+	/* mdl->state == ML_STATE_FREE, ML_STATE_CREATED, ML_STATE_INFERENCING */
+	if (odp_unlikely(mdl->state != ML_STATE_LOADED)) {
+		_ODP_ERR("Model has not been created/loaded or inferencing has not finished yet\n");
+		odp_ticketlock_unlock(&mdl->lock);
+		result_local.error_code = ML_NOT_LOADED;
+		goto unload_fail;
+	}
+
+	mdl->state = ML_STATE_CREATED;
+	odp_ticketlock_unlock(&mdl->lock);
+
+	ret = 0;
+
+unload_fail:
+	if (result)
+		*result = result_local;
+
+	return ret;
+}
+
+int odp_ml_model_unload_start(odp_ml_model_t model, const odp_ml_compl_param_t *compl_param)
+{
+	int ret;
+	ml_model_t *mdl = ml_model_from_handle(model);
+
+	if (odp_unlikely(model == ODP_ML_MODEL_INVALID)) {
+		_ODP_ERR("Bad model handle\n");
+		return -1;
+	}
+
+	if (odp_unlikely(check_compl_param(compl_param, mdl->max_compl_id, true)))
+		return -1;
+
+	if (compl_param->mode == ODP_ML_COMPL_MODE_POLL)
+		odp_atomic_store_rel_u32(&mdl->compl_status[compl_param->compl_id], 0);
+
+	ret = odp_ml_model_unload(model, NULL);
+
+	if (odp_unlikely(ret))
+		return -1;
+
+	/* Upon successful unloading, send a completion event to the given queue */
+	if (compl_param->mode == ODP_ML_COMPL_MODE_EVENT) {
+		odp_ml_load_result_t *result;
+		odp_buffer_t buf = (odp_buffer_t)(uintptr_t)compl_param->event;
+
+		_odp_buffer_subtype_set(buf, ODP_EVENT_ML_COMPL_LOAD);
+
+		result = odp_buffer_addr(buf);
+		result->error_code = 0;
+		result->user_ptr = compl_param->user_ptr;
+
+		if (odp_unlikely(odp_queue_enq(compl_param->queue, compl_param->event))) {
+			_ODP_ERR("Completion event enqueue failed %" PRIu64 "\n",
+				 odp_queue_to_u64(compl_param->queue));
+			return -1;
+		}
+
+		return 0;
+	}
+
+	mdl->result[compl_param->compl_id].user_ptr = compl_param->user_ptr;
+	odp_atomic_store_rel_u32(&mdl->compl_status[compl_param->compl_id], 1);
+	return 0;
+}
+
+int odp_ml_model_unload_status(odp_ml_model_t model, uint32_t compl_id,
+			       odp_ml_load_result_t *result)
+{
+	return odp_ml_model_load_status(model, compl_id, result);
+}
+
+void odp_ml_run_param_init(odp_ml_run_param_t *param)
+{
+	memset(param, 0, sizeof(odp_ml_run_param_t));
+}
+
+static void ml_shape_to_int64(const odp_ml_shape_info_t *shape, uint32_t batch_size, int64_t *array)
+{
+	for (uint32_t i = 0; i < shape->num_dim; i++) {
+		/* Replace dynamic dimension size with provided batch_size */
+		if (shape->dim[i] == ODP_ML_DIM_DYNAMIC)
+			array[i] = batch_size;
+		else
+			array[i] = shape->dim[i];
+	}
+}
+
+/* Get the number of elements in given shape */
+static inline uint64_t get_num_elem(uint32_t batch_size, const odp_ml_shape_info_t *shape)
+{
+	uint64_t num_elements = 1;
+	int64_t dim[ODP_ML_MAX_DIMS] = {0};
+
+	ml_shape_to_int64(shape, batch_size, dim);
+
+	for (uint32_t i = 0; i < shape->num_dim; i++)
+		num_elements *= (uint64_t)dim[i];
+
+	return num_elements;
+}
+
+static inline uint32_t dyn_io_size(const odp_ml_shape_info_t *shape, uint32_t data_type_size,
+				   const odp_ml_run_param_t *param)
+{
+	uint32_t size;
+
+	if (!param || !param->batch_size) {
+		_ODP_ERR("Parameter 'param' must not be NULL and batch_size must be "
+			 "provided when a input/output has dynamic dimension size\n");
+		return 0;
+	}
+
+	size = get_num_elem(param->batch_size, shape);
+	size *= data_type_size;
+
+	return size;
+}
+
+static int verify_run_params(odp_ml_model_t model, const odp_ml_data_t *data,
+			     const odp_ml_run_param_t *param)
+{
+	const ml_model_t *mdl = ml_model_from_handle(model);
+
+	if (odp_unlikely(model == ODP_ML_MODEL_INVALID)) {
+		_ODP_ERR("Bad ML model handle\n");
+		return -1;
+	}
+
+	if (odp_unlikely(!data)) {
+		_ODP_ERR("Parameter 'data' must not be NULL\n");
+		return -1;
+	}
+
+	/* Make sure that the number of input data segments equals or bigger than
+	 * the number of model inputs. */
+	if (mdl->info.num_inputs > data->num_input_seg) {
+		_ODP_ERR("The num of input data segments %u must not less than "
+			 "the number of model inputs %u\n", data->num_input_seg,
+			 mdl->info.num_inputs);
+		return -1;
+	}
+
+	if (mdl->info.num_outputs > data->num_output_seg) {
+		_ODP_ERR("The num of output data segments %u must not less than "
+			 "the number of model outputs %u\n", data->num_output_seg,
+			 mdl->info.num_outputs);
+		return -1;
+	}
+
+	if (data->num_input_seg > mdl->info.num_inputs &&
+	    (_odp_ml_glb->capa.max_segs_per_input == 1)) {
+		_ODP_ERR("Segmented input data is not supported\n");
+		return -1;
+	}
+
+	if (data->num_output_seg > mdl->info.num_outputs &&
+	    (_odp_ml_glb->capa.max_segs_per_output == 1)) {
+		_ODP_ERR("Segmented output data is not supported");
+		return -1;
+	}
+
+	uint32_t size = 0;
+	uint32_t input_index = 0;
+	uint32_t seg_size_sum = 0;
+	odp_bool_t index_new = true;
+	uint32_t segs_per_input = 1;
+
+	for (uint32_t i = 0; i < data->num_input_seg; i++) {
+		if (data->input_seg[i].addr == NULL) {
+			_ODP_ERR("data->input_seg[%u].addr must not NULL\n", i);
+			return -1;
+		};
+
+		if (index_new) {
+			if (input_index > mdl->info.num_inputs - 1) {
+				_ODP_ERR("Too much number of input segments given\n");
+				return -1;
+			}
+
+			/* Input with dynamic batch size */
+			if (mdl->input_info[input_index].shape.type == ODP_ML_SHAPE_BATCH)
+				size = dyn_io_size(&mdl->input_info[input_index].shape,
+						   mdl->input_info[input_index].data_type_size,
+						   param);
+			else
+				size = mdl->input_sizes[input_index];
+
+			if (!size) {
+				_ODP_ERR("Size for %uth input is 0\n", input_index);
+				return -1;
+			}
+		}
+
+		seg_size_sum += data->input_seg[i].size;
+
+		if (seg_size_sum > size) {
+			_ODP_ERR("Sum of segment sizes %u exceeds %uth input data size %u\n",
+				 seg_size_sum, input_index, size);
+			return -1;
+		}
+
+		if (seg_size_sum == size) {
+			if (segs_per_input > _odp_ml_glb->capa.max_segs_per_input) {
+				_ODP_ERR("Number of segments %u for input[%u] exceeds maximum"
+					 " number of data segments per model input %u\n",
+					 segs_per_input, input_index,
+					 _odp_ml_glb->capa.max_segs_per_input);
+				return -1;
+			}
+			input_index++;
+			index_new = true;
+			seg_size_sum = 0;
+			segs_per_input = 1;
+		} else {
+			segs_per_input++;
+			index_new = false;
+		}
+	}
+
+	if (input_index != mdl->info.num_inputs) {
+		_ODP_ERR("Data is not provided for all model inputs\n");
+		return -1;
+	}
+
+	seg_size_sum = 0;
+	index_new = true;
+	uint32_t output_index = 0;
+	uint32_t segs_per_output = 1;
+
+	for (uint32_t i = 0; i < data->num_output_seg; i++) {
+		if (data->output_seg[i].addr == NULL) {
+			_ODP_ERR("data->output_seg[%u].addr must not NULL\n", i);
+			return -1;
+		}
+
+		if (index_new) {
+			if (output_index > mdl->info.num_outputs - 1) {
+				_ODP_ERR("Too much number of output segments given\n");
+				return -1;
+			}
+
+			/* Output with dynamic batch size */
+			if (mdl->output_info[output_index].shape.type == ODP_ML_SHAPE_BATCH)
+				size = dyn_io_size(&mdl->output_info[output_index].shape,
+						   mdl->output_info[output_index].data_type_size,
+						   param);
+			else
+				size = mdl->output_sizes[output_index];
+
+			if (!size) {
+				_ODP_ERR("Size for %uth output is 0\n", output_index);
+				return -1;
+			}
+		}
+
+		seg_size_sum += data->output_seg[i].size;
+
+		if (seg_size_sum > size) {
+			_ODP_ERR("Sum of segment sizes %u exceeds %uth output data size %u\n",
+				 seg_size_sum, output_index, size);
+			return -1;
+		}
+
+		if (seg_size_sum >= size) {
+			if (segs_per_output > _odp_ml_glb->capa.max_segs_per_output) {
+				_ODP_ERR("Number of segments %u for output[%u] exceeds maximum"
+					 " number of data segments per model output %u\n",
+					 segs_per_output, output_index,
+					 _odp_ml_glb->capa.max_segs_per_output);
+				return -1;
+			}
+			output_index++;
+			index_new = true;
+			seg_size_sum = 0;
+			segs_per_output = 1;
+		} else {
+			segs_per_output++;
+			index_new = false;
+		}
+	}
+
+	if (output_index != mdl->info.num_outputs) {
+		_ODP_ERR("Not enough output_segs to hold all output data\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static ONNXTensorElementDataType onnx_dtype_from_odp_dtype(odp_ml_data_type_t data_type)
+{
+	switch (data_type) {
+	case ODP_ML_DATA_TYPE_NONE:
+		return ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED;
+	case ODP_ML_DATA_TYPE_INT8:
+		return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8;
+	case ODP_ML_DATA_TYPE_UINT8:
+		return ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8;
+	case ODP_ML_DATA_TYPE_INT16:
+		return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT16;
+	case ODP_ML_DATA_TYPE_UINT16:
+		return ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT16;
+	case ODP_ML_DATA_TYPE_INT24:
+		/* Fall through*/
+	case ODP_ML_DATA_TYPE_UINT24:
+		return ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED;
+	case ODP_ML_DATA_TYPE_FP64:
+		return ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE;
+	case ODP_ML_DATA_TYPE_INT32:
+		return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32;
+	case ODP_ML_DATA_TYPE_UINT32:
+		return ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT32;
+	case ODP_ML_DATA_TYPE_INT64:
+		return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64;
+	case ODP_ML_DATA_TYPE_UINT64:
+		return ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT64;
+	case ODP_ML_DATA_TYPE_FP16:
+		return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16;
+	case ODP_ML_DATA_TYPE_FP32:
+		return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
+	case ODP_ML_DATA_TYPE_BFP16:
+		return ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16;
+	default:
+		return ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED;
+	}
+}
+
+static int verify_tensor(const OrtValue *tensor, odp_ml_data_type_t expected_type,
+			 const odp_ml_shape_info_t *expected_shape, uint32_t batch_size)
+{
+	OrtTensorTypeAndShapeInfo *tensor_info;
+	ONNXTensorElementDataType tensor_type;
+	size_t dim_count;
+	OrtStatus *status = NULL;
+	int64_t dims[ODP_ML_MAX_DIMS] = {0};
+	int64_t shape_arr[ODP_ML_MAX_DIMS] = {0};
+	const OrtApi *ort_api = _odp_ml_glb->ort_api;
+
+	status = ort_api->GetTensorTypeAndShape(tensor, &tensor_info);
+	if (check_ortstatus(status)) {
+		_ODP_ERR("GetTensorTypeAndShape() failed\n");
+		return -1;
+	}
+
+	status = ort_api->GetTensorElementType(tensor_info, &tensor_type);
+	if (check_ortstatus(status)) {
+		ort_api->ReleaseTensorTypeAndShapeInfo(tensor_info);
+		_ODP_ERR("GetTensorElementType() failed\n");
+		return -1;
+	}
+
+	if (onnx_dtype_to_odp_dtype(tensor_type) != expected_type) {
+		ort_api->ReleaseTensorTypeAndShapeInfo(tensor_info);
+		_ODP_ERR("Tensor type does not match model type\n");
+		return -1;
+	}
+
+	status = ort_api->GetDimensionsCount(tensor_info, &dim_count);
+	if (check_ortstatus(status)) {
+		ort_api->ReleaseTensorTypeAndShapeInfo(tensor_info);
+		_ODP_ERR("GetDimensionsCount() failed\n");
+		return -1;
+	}
+
+	if (dim_count != expected_shape->num_dim) {
+		ort_api->ReleaseTensorTypeAndShapeInfo(tensor_info);
+		_ODP_ERR("Tensor dimension does not match shape_dim\n");
+		return -1;
+	}
+
+	status = ort_api->GetDimensions(tensor_info, dims, dim_count);
+	if (check_ortstatus(status)) {
+		ort_api->ReleaseTensorTypeAndShapeInfo(tensor_info);
+		_ODP_ERR("GetDimensions() failed\n");
+		return -1;
+	}
+
+	ml_shape_to_int64(expected_shape, batch_size, shape_arr);
+
+	for (uint32_t i = 0; i < dim_count; i++) {
+		if (dims[i] != shape_arr[i]) {
+			ort_api->ReleaseTensorTypeAndShapeInfo(tensor_info);
+			_ODP_ERR("Shape[%u]: %" PRIu64 " does not match expected: %" PRIu64 "\n",
+				 i, dims[i], shape_arr[i]);
+			return -1;
+		}
+	}
+
+	ort_api->ReleaseTensorTypeAndShapeInfo(tensor_info);
+	return 0;
+}
+
+static int input_data_to_tensor(const odp_ml_input_info_t *input_info, uint32_t num_seg,
+				const odp_ml_data_seg_t *input_seg, uint32_t *seg_idx,
+				uint32_t batch_size, OrtValue **input_tensor)
+{
+	int is_tensor;
+	uint64_t input_size;
+	OrtAllocator *allocator;
+	void *data = NULL;
+	OrtStatus *status = NULL;
+	int64_t shape[ODP_ML_MAX_DIMS] = {0};
+	const OrtApi *ort_api = _odp_ml_glb->ort_api;
+	ONNXTensorElementDataType onnx_dtype = ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED;
+
+	ml_shape_to_int64(&input_info->shape, batch_size, shape);
+
+	onnx_dtype = onnx_dtype_from_odp_dtype(input_info->data_type);
+	_ODP_ASSERT(onnx_dtype != ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED);
+
+	status = ort_api->GetAllocatorWithDefaultOptions(&allocator);
+	if (check_ortstatus(status)) {
+		_ODP_ERR("GetAllocatorWithDefaultOptions() failed\n");
+		return -1;
+	}
+
+	status = ort_api->CreateTensorAsOrtValue(allocator,
+						 shape,
+						 input_info->shape.num_dim,
+						 onnx_dtype,
+						 input_tensor);
+	if (check_ortstatus(status) || !input_tensor[0]) {
+		_ODP_ERR("CreateTensorWithDataAsOrtValue() failed\n");
+		return -1;
+	}
+
+	input_size = input_info->data_type_size * get_num_elem(batch_size, &input_info->shape);
+
+	status = ort_api->GetTensorMutableData(input_tensor[0], &data);
+	if (check_ortstatus(status) || !data) {
+		_ODP_ERR("GetTensorMutableData() failed\n");
+		return -1;
+	}
+
+	for (uint64_t i = 0; i < input_size; ) {
+		if (*seg_idx >= num_seg) {
+			_ODP_ERR("Insufficient input data\n");
+			return -1;
+		}
+
+		uint64_t seg_size = input_seg[*seg_idx].size;
+
+		if (i + seg_size > input_size) {
+			_ODP_ERR("Excess input data in segment %" PRIu32 "\n", *seg_idx);
+			return -1;
+		}
+
+		memcpy((uint8_t *)data + i, input_seg[(*seg_idx)++].addr, seg_size);
+		i += seg_size;
+	}
+
+	if (!ODP_DEBUG)
+		return 0;
+
+	status = ort_api->IsTensor(input_tensor[0], &is_tensor);
+	if (check_ortstatus(status) || !is_tensor) {
+		_ODP_ERR("input_tensor IsTensor failed\n");
+		return -1;
+	}
+
+	/* Make sure tensor shape matches input_shape */
+	if (verify_tensor(input_tensor[0], input_info->data_type,
+			  &input_info->shape, batch_size)) {
+		_ODP_ERR("Verify input_tensor failed\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int verify_output_tensor(OrtValue *output_tensor, odp_ml_data_type_t expected_type,
+				const odp_ml_shape_info_t *expected_shape, uint32_t batch_size)
+{
+	int is_tensor = 0;
+	const OrtApi *ort_api = _odp_ml_glb->ort_api;
+	OrtStatus *status = ort_api->IsTensor(output_tensor, &is_tensor);
+
+	if (check_ortstatus(status) || !is_tensor) {
+		_ODP_ERR("output_tensor IsTensor failed\n");
+		return -1;
+	}
+
+	/* Make sure tensor shape matches output_shape */
+	if (verify_tensor(output_tensor, expected_type, expected_shape, batch_size)) {
+		_ODP_ERR("Verify output_tensor failed\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int get_tensor_data_size(OrtValue *tensor, uint32_t *size, uint32_t data_type_size)
+{
+	size_t num_elem;
+	OrtStatus *status;
+	OrtTensorTypeAndShapeInfo *tensor_info;
+	const OrtApi *ort_api = _odp_ml_glb->ort_api;
+
+	status = ort_api->GetTensorTypeAndShape(tensor, &tensor_info);
+	if (check_ortstatus(status)) {
+		_ODP_ERR("GetTensorTypeAndShape() failed\n");
+		return -1;
+	}
+
+	status = ort_api->GetTensorShapeElementCount(tensor_info, &num_elem);
+	if (check_ortstatus(status)) {
+		ort_api->ReleaseTensorTypeAndShapeInfo(tensor_info);
+		_ODP_ERR("GetTensorShapeElementCount() failed\n");
+		return -1;
+	}
+	*size = data_type_size * num_elem;
+
+	ort_api->ReleaseTensorTypeAndShapeInfo(tensor_info);
+	return 0;
+}
+
+static int check_output_size(odp_bool_t is_segmented, uint32_t output_idx, uint32_t seg_idx,
+			     uint64_t out_tensor_data_size, const odp_ml_data_t data[])
+{
+	uint64_t output_size = 0;
+
+	/* Output is not segmented */
+	if (!is_segmented) {
+		/* Make sure tensor data size does not exceed size allocated for
+		 * data->output_seg[seg_idx].addr */
+		if (out_tensor_data_size > data->output_seg[seg_idx].size) {
+			_ODP_ERR("Malloc at least %" PRIu64 " bytes for %dth output tensor\n",
+				 out_tensor_data_size, output_idx);
+			return -1;
+		}
+
+		return 0;
+	}
+
+	/* Output is segmented, first calculate total size for one tensor */
+	for (; seg_idx < data->num_output_seg; seg_idx++) {
+		output_size += data->output_seg[seg_idx].size;
+		if (output_size >= out_tensor_data_size)
+			break;
+	}
+
+	if (0 == output_size) {
+		_ODP_ERR("No output data segments for %uth output tensor\n", output_idx);
+		return -1;
+	}
+
+	if (out_tensor_data_size > output_size) {
+		_ODP_ERR("Output segments (%" PRIu64 " bytes in total) for %uth output"
+			 " is expected to be at least %" PRIu64 " bytes\n",
+			 output_size, output_idx, out_tensor_data_size);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int output_tensors_to_data(OrtValue **output_tensors,
+				  uint32_t model_num_outputs,
+				  const odp_ml_run_param_t *param,
+				  const odp_ml_output_info_t *output_info,
+				  const odp_ml_data_t *data,
+				  odp_ml_run_result_t *result_local)
+{
+	uint32_t seg_idx;
+	uint64_t seg_size;
+	uint64_t cpy_size;
+	uint64_t left_size;
+	uint64_t output_val_offset;
+	uint32_t out_tensor_data_size;
+	void *output_val = NULL; /* Pointer to store one raw output value */
+	OrtStatus *status = NULL;
+	uint32_t batch_size = (param && param->batch_size) ? param->batch_size : 0;
+	const OrtApi *ort_api = _odp_ml_glb->ort_api;
+	odp_bool_t is_segmented = (data->num_output_seg != model_num_outputs);
+
+	seg_idx = 0;
+	for (uint32_t i = 0; i < model_num_outputs; i++) {
+		if (ODP_DEBUG &&
+		    verify_output_tensor(output_tensors[i], output_info[i].data_type,
+					 &output_info[i].shape, batch_size)){
+			result_local->error_code = ML_BAD_OUTPUT;
+			return -1;
+		}
+
+		/* Get tensor data size */
+		if (get_tensor_data_size(output_tensors[i], &out_tensor_data_size,
+					 output_info[i].data_type_size)) {
+			result_local->error_code = ML_LIB_FAILED;
+			return -1;
+		}
+
+		/* When output_tensor is an empty tensor [], skip getting data */
+		if (out_tensor_data_size == 0)
+			continue;
+
+		if (ODP_DEBUG && check_output_size(is_segmented, i, seg_idx,
+						   out_tensor_data_size, data)) {
+			result_local->error_code = ML_BAD_OUTPUT;
+			return -1;
+		}
+
+		/* Following assumes param and data->output_seg are valid */
+		/* Get tensor data */
+		output_val = NULL;
+		status = ort_api->GetTensorMutableData(output_tensors[i], &output_val);
+		if (check_ortstatus(status) || !output_val) {
+			result_local->error_code = ML_LIB_FAILED;
+			return -1;
+		}
+
+		/* Output is not segmented */
+		if (!is_segmented) {
+			/* Store output data to data->output_seg[i].addr */
+			memcpy(data->output_seg[i].addr, output_val, out_tensor_data_size);
+			seg_idx++;
+			continue;
+		}
+
+		/* Output is segmented */
+		output_val_offset = 0;
+		left_size = out_tensor_data_size;
+		for (; seg_idx < data->num_output_seg; seg_idx++) {
+			seg_size = data->output_seg[seg_idx].size;
+			cpy_size = left_size > seg_size ? seg_size : left_size;
+			memcpy(data->output_seg[seg_idx].addr,
+			       ((char *)output_val) + output_val_offset, cpy_size);
+
+			output_val_offset += cpy_size;
+			left_size = out_tensor_data_size - output_val_offset;
+
+			if (!left_size) {
+				seg_idx++;
+				break;
+			}
+		}
+	}
+
+	return 0;
+}
+
+int odp_ml_run(odp_ml_model_t model, const odp_ml_data_t *data, const odp_ml_run_param_t *param)
+{
+	odp_ml_run_result_t result_local;
+
+	int retval		= -1; /* Return value of this function */
+	int ret			= 0;
+	OrtStatus *status	= NULL;
+	uint32_t batch_size	= 0;
+
+	OrtValue *input_tensor[CONFIG_ML_MAX_INPUTS]	= {0};
+	OrtValue *output_tensors[CONFIG_ML_MAX_OUTPUTS]	= {0};
+	const char *input_names[CONFIG_ML_MAX_INPUTS]	= {0};
+	const char *output_names[CONFIG_ML_MAX_OUTPUTS]	= {0};
+
+	const OrtApi *ort_api			= _odp_ml_glb->ort_api;
+	ml_model_t *mdl				= ml_model_from_handle(model);
+	const odp_ml_model_info_t *ml_info	= &mdl->info;
+	const odp_ml_input_info_t *input_info	= mdl->input_info;
+	const odp_ml_output_info_t *output_info = mdl->output_info;
+	OrtSession *session			= mdl->session;
+
+	odp_ticketlock_lock(&mdl->lock);
+	if (odp_unlikely(mdl->state == ML_STATE_INFERENCING)) {
+		odp_ticketlock_unlock(&mdl->lock);
+		return 0;
+	}
+	if (odp_unlikely(mdl->state != ML_STATE_LOADED)) {
+		_ODP_ERR("Wrong model state: not created or not loaded\n");
+		odp_ticketlock_unlock(&mdl->lock);
+		return -1;
+	}
+	mdl->state = ML_STATE_INFERENCING;
+	odp_ticketlock_unlock(&mdl->lock);
+
+	memset(&result_local, 0, sizeof(result_local));
+
+	if (ODP_DEBUG && verify_run_params(model, data, param)) {
+		result_local.error_code = ML_BAD_INPUT;
+		goto init_fail;
+	}
+
+	if (param && param->batch_size)
+		batch_size = param->batch_size;
+
+	uint32_t seg_idx = 0;
+
+	/* Transfer input data to tensor */
+	for (uint32_t i = 0; i < ml_info->num_inputs; i++) {
+		ret = input_data_to_tensor(&input_info[i],
+					   data->num_input_seg,
+					   data->input_seg,
+					   &seg_idx,
+					   batch_size,
+					   &input_tensor[i]);
+		if (ret) {
+			_ODP_ERR("%uth input data to tensor failed\n", i);
+			result_local.error_code = ML_LIB_FAILED;
+			goto release_input_tensors;
+		}
+
+		_ODP_DBG("input_tensor[%u]: %p\n", i, input_tensor[i]);
+
+		/* Model input names */
+		input_names[i] = input_info[i].name;
+	}
+
+	if (seg_idx < data->num_input_seg) {
+		_ODP_ERR("Excess input segments\n");
+		ret = -1;
+	}
+
+	for (uint32_t i = 0; i < ml_info->num_outputs; i++)
+		output_names[i] = output_info[i].name;
+
+	/* Run inference */
+	status = ort_api->Run(session,
+			      NULL,
+			      (const char * const *)input_names,
+			      (const OrtValue * const*)input_tensor,
+			      ml_info->num_inputs,
+			      (const char * const *)output_names,
+			      ml_info->num_outputs,
+			      output_tensors);
+
+	if (check_ortstatus(status)) {
+		_ODP_ERR("Run inference failed\n");
+		result_local.error_code = ML_LIB_FAILED;
+		goto release_all_tensors;
+	}
+
+	/* Verify output tensors and store them to output */
+	if (output_tensors_to_data(output_tensors, ml_info->num_outputs, param,
+				   output_info, data, &result_local)) {
+		_ODP_ERR("Output tensors to data failed\n");
+		goto release_all_tensors;
+	}
+
+	retval = 1;
+
+release_all_tensors:
+	for (uint32_t i = 0; i < ml_info->num_outputs; i++)
+		ort_api->ReleaseValue(output_tensors[i]);
+
+release_input_tensors:
+	for (uint32_t i = 0; i < ml_info->num_inputs; i++)
+		ort_api->ReleaseValue(input_tensor[i]);
+
+init_fail:
+	if (param && param->result)
+		*param->result = result_local;
+
+	odp_ticketlock_lock(&mdl->lock);
+	mdl->state = ML_STATE_LOADED;
+	odp_ticketlock_unlock(&mdl->lock);
+
+	return retval;
+}
+
+int odp_ml_run_multi(odp_ml_model_t model, const odp_ml_data_t data[],
+		     const odp_ml_run_param_t param[], int num)
+{
+	int i;
+	int ret;
+
+	if (odp_unlikely(num < 1)) {
+		_ODP_ERR("Bad number of runs\n");
+		return -1;
+	}
+
+	for (i = 0; i < num; i++) {
+		if (param)
+			ret = odp_ml_run(model, &data[i], &param[i]);
+		else
+			ret = odp_ml_run(model, &data[i], NULL);
+
+		if (odp_unlikely(ret != 1))
+			break;
+	}
+
+	if (odp_unlikely(i == 0))
+		return ret;
+
+	return i;
+}
+
+int odp_ml_run_start(odp_ml_model_t model, const odp_ml_data_t *data,
+		     const odp_ml_compl_param_t *compl_param,
+		     const odp_ml_run_param_t *run_param)
+{
+	int ret;
+	ml_model_t *mdl = ml_model_from_handle(model);
+
+	if (odp_unlikely(model == ODP_ML_MODEL_INVALID)) {
+		_ODP_ERR("Bad model handle\n");
+		return -1;
+	}
+
+	if (odp_unlikely(!compl_param)) {
+		_ODP_ERR("Completion parameter is NULL\n");
+		return -1;
+	}
+
+	/* Check completion mode */
+	if (odp_unlikely(check_compl_param(compl_param, mdl->max_compl_id, false))) {
+		_ODP_ERR("Bad ML job completion parameter\n");
+		return -1;
+	}
+
+	if (compl_param->mode == ODP_ML_COMPL_MODE_POLL)
+		odp_atomic_store_rel_u32(&mdl->compl_status[compl_param->compl_id], 0);
+
+	ret = odp_ml_run(model, data, run_param);
+
+	if (odp_unlikely(ret < 1))
+		return ret;
+
+	/* Send a completion event to the given queue */
+	if (compl_param->mode == ODP_ML_COMPL_MODE_EVENT) {
+		odp_ml_run_result_t *result;
+		odp_buffer_t buf = (odp_buffer_t)(uintptr_t)compl_param->event;
+
+		_odp_buffer_subtype_set(buf, ODP_EVENT_ML_COMPL_RUN);
+
+		result = odp_buffer_addr(buf);
+		result->error_code = 0;
+		result->user_ptr = compl_param->user_ptr;
+
+		if (odp_unlikely(odp_queue_enq(compl_param->queue, compl_param->event))) {
+			_ODP_ERR("Completion event enqueue failed %" PRIu64 "\n",
+				 odp_queue_to_u64(compl_param->queue));
+			return -1;
+		}
+
+		return 1;
+	}
+
+	/* compl_param->mode == ODP_ML_COMPL_MODE_POLL */
+	mdl->result[compl_param->compl_id].user_ptr = compl_param->user_ptr;
+	odp_atomic_store_rel_u32(&mdl->compl_status[compl_param->compl_id], 1);
+
+	return 1;
+}
+
+int odp_ml_run_start_multi(odp_ml_model_t model, const odp_ml_data_t data[],
+			   const odp_ml_compl_param_t compl_param[],
+			   const odp_ml_run_param_t run_param[], int num)
+{
+	int i;
+	int ret = 0;
+
+	if (odp_unlikely(num < 1)) {
+		_ODP_ERR("Bad number of runs\n");
+		return -1;
+	}
+
+	for (i = 0; i < num; i++) {
+		if (run_param)
+			ret = odp_ml_run_start(model, &data[i], &compl_param[i], &run_param[i]);
+		else
+			ret = odp_ml_run_start(model, &data[i], &compl_param[i], NULL);
+
+		if (odp_unlikely(ret != 1))
+			break;
+	}
+
+	if (odp_unlikely(i == 0))
+		return ret;
+
+	return i;
+}
+
+int odp_ml_run_status(odp_ml_model_t model, uint32_t compl_id, odp_ml_run_result_t *result)
+{
+	int ret;
+	ml_model_t *mdl = ml_model_from_handle(model);
+
+	if (odp_unlikely(model == ODP_ML_MODEL_INVALID ||
+			 compl_id > mdl->max_compl_id)) {
+		_ODP_ERR("Invalid model handle or completion id: %u\n", compl_id);
+		return -2;
+	}
+
+	ret = odp_atomic_load_acq_u32(&mdl->compl_status[compl_id]);
+
+	if (result) {
+		result->error_code = 0;
+		result->user_ptr = mdl->result[compl_id].user_ptr;
+	}
+
+	return ret;
+}
+
+static int opt_level_from_str(const char *level_str, GraphOptimizationLevel *level)
+{
+	if (strcmp(level_str, "DISABLE_ALL") == 0)
+		*level = ORT_DISABLE_ALL;
+	else if (strcmp(level_str, "ENABLE_BASIC") == 0)
+		*level = ORT_ENABLE_BASIC;
+	else if (strcmp(level_str, "ENABLE_EXTENDED") == 0)
+		*level = ORT_ENABLE_EXTENDED;
+	else if (strcmp(level_str, "ENABLE_ALL") == 0)
+		*level = ORT_ENABLE_ALL;
+	else
+		return -1;
+
+	return 0;
+}
+
+static int execution_mode_from_str(const char *mode_str, ExecutionMode *mode)
+{
+	if (strcmp(mode_str, "SEQUENTIAL") == 0)
+		*mode = ORT_SEQUENTIAL;
+	else if (strcmp(mode_str, "PARALLEL") == 0)
+		*mode = ORT_PARALLEL;
+	else
+		return -1;
+
+	return 0;
+}
+
+static int read_config_file(ort_run_opts_t *opts)
+{
+	const char *conf_str;
+	char mode_str[ML_MAX_CONFIG_STR_LEN];
+	char opt_level_str[ML_MAX_CONFIG_STR_LEN];
+
+	_ODP_PRINT("ML config:\n");
+
+	conf_str =  "ml.enable_profiling";
+	if (!_odp_libconfig_lookup_int(conf_str, &opts->enable_profiling)) {
+		_ODP_ERR("Config option '%s' not found.\n", conf_str);
+		return -1;
+	}
+	_ODP_PRINT("  %s: %i\n", conf_str, opts->enable_profiling);
+
+	conf_str =  "ml.execution_mode";
+	if (_odp_libconfig_lookup_str(conf_str, mode_str, ML_MAX_CONFIG_STR_LEN) < 0) {
+		_ODP_ERR("Config option '%s' not found.\n", conf_str);
+		return -1;
+	}
+
+	if (execution_mode_from_str(mode_str, &opts->execution_mode)) {
+		_ODP_ERR("Unsupported execution mode: %s\n", mode_str);
+		return -1;
+	}
+	_ODP_PRINT("  %s: %s\n", conf_str, mode_str);
+
+	conf_str =  "ml.inter_op_num_threads";
+	if (!_odp_libconfig_lookup_int(conf_str, &opts->inter_op_num_threads)) {
+		_ODP_ERR("Config option '%s' not found.\n", conf_str);
+		return -1;
+	}
+	_ODP_PRINT("  %s: %i\n", conf_str, opts->inter_op_num_threads);
+
+	conf_str =  "ml.intra_op_num_threads";
+	if (!_odp_libconfig_lookup_int(conf_str, &opts->intra_op_num_threads)) {
+		_ODP_ERR("Config option '%s' not found.\n", conf_str);
+		return -1;
+	}
+	_ODP_PRINT("  %s: %i\n", conf_str, opts->intra_op_num_threads);
+
+	conf_str =  "ml.graph_optimization_level";
+	if (_odp_libconfig_lookup_str(conf_str, opt_level_str,
+				      ML_MAX_CONFIG_STR_LEN) < 0) {
+		_ODP_ERR("Config option '%s' not found.\n", conf_str);
+		return -1;
+	}
+
+	if (opt_level_from_str(opt_level_str, &opts->graph_opt_level)) {
+		_ODP_ERR("Graph optimize level %s not supported\n", opt_level_str);
+		return -1;
+	}
+	_ODP_PRINT("  %s: %s\n", conf_str, opt_level_str);
+
+	conf_str =  "ml.optimized_model_filepath";
+	if (_odp_libconfig_lookup_str(conf_str, opts->opt_model_filepath,
+				      ML_MAX_CONFIG_STR_LEN) < 0) {
+		_ODP_ERR("Config option '%s' not found.\n", conf_str);
+		return -1;
+	}
+	_ODP_PRINT("  %s: %s\n", conf_str, opts->opt_model_filepath);
+
+	return 0;
+}
+
+int _odp_ml_init_global(void)
+{
+	int i;
+	OrtEnv *env;
+	odp_shm_t shm;
+	OrtStatus *status;
+	const OrtApi *ort_api;
+
+	if (odp_global_ro.disable.ml) {
+		_ODP_ERR("ML is disabled\n");
+		return 0;
+	}
+
+	shm = odp_shm_reserve("_odp_ml_global", sizeof(ml_global_t), ODP_CACHE_LINE_SIZE, 0);
+	_odp_ml_glb = odp_shm_addr(shm);
+
+	if (_odp_ml_glb == NULL) {
+		_ODP_ERR("SHM reserve failed for odp_ml\n");
+		return -1;
+	}
+
+	memset(_odp_ml_glb, 0, sizeof(ml_global_t));
+	_odp_ml_glb->shm = shm;
+
+	if (odp_ml_capability(&_odp_ml_glb->capa)) {
+		_ODP_ERR("ML capability failed\n");
+		return -1;
+	}
+
+	odp_pool_param_init(&_odp_ml_glb->pool_param);
+
+	if (read_config_file(&_odp_ml_glb->ort_run_opts))
+		return -1;
+
+	ort_api = OrtGetApiBase()->GetApi(ORT_API_VERSION);
+	if (!ort_api) {
+		_ODP_ERR("Failed to init ONNX Runtime engine.\n");
+		return -1;
+	}
+	_odp_ml_glb->ort_api = ort_api;
+
+	status = ort_api->CreateEnv(ORT_LOGGING_LEVEL_WARNING, "Default", &env);
+	if (check_ortstatus(status) || !env) {
+		_ODP_ERR("ort_api->CreateEnv() failed.\n");
+		return -1;
+	}
+	_odp_ml_glb->env = env;
+
+	for (i = 0; i < ML_MAX_MODELS_CREATED; i++)
+		odp_ticketlock_init(&_odp_ml_glb->models[i].lock);
+
+	return 0;
+}
+
+int _odp_ml_term_global(void)
+{
+	if (odp_global_ro.disable.ml)
+		return 0;
+
+	if (_odp_ml_glb == NULL)
+		return 0;
+
+	if (_odp_ml_glb->env)
+		_odp_ml_glb->ort_api->ReleaseEnv(_odp_ml_glb->env);
+
+	if (odp_shm_free(_odp_ml_glb->shm)) {
+		_ODP_ERR("Shm free failed for odp_ml\n");
+		return -1;
+	}
+
+	return 0;
+}
diff --git a/platform/linux-generic/odp_ml_fp16.c b/platform/linux-generic/odp_ml_fp16.c
new file mode 100644
index 000000000..47b10f841
--- /dev/null
+++ b/platform/linux-generic/odp_ml_fp16.c
@@ -0,0 +1,425 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2022-2023 Marvell.
+ * Copyright (c) 2023 Nokia
+ *
+ * Based on
+ * - dpdk/lib/mldev/mldev_utils_scalar.h
+ * - dpdk/lib/mldev/mldev_utils_scalar.c
+ * - dpdk/lib/mldev/mldev_utils_scalar_bfloat16.c
+ */
+
+#include <odp_ml_fp16.h>
+
+#include <errno.h>
+#include <stdint.h>
+
+#ifndef BIT
+#define BIT(nr) (1UL << (nr))
+#endif
+
+#ifndef BITS_PER_LONG
+#define BITS_PER_LONG (__SIZEOF_LONG__ * 8)
+#endif
+
+#ifndef GENMASK_U32
+#define GENMASK_U32(h, l) (((~0UL) << (l)) & (~0UL >> (BITS_PER_LONG - 1 - (h))))
+#endif
+
+/* float32: bit index of MSB & LSB of sign, exponent and mantissa */
+#define FP32_LSB_M 0
+#define FP32_MSB_M 22
+#define FP32_LSB_E 23
+#define FP32_MSB_E 30
+#define FP32_LSB_S 31
+#define FP32_MSB_S 31
+
+/* float32: bitmask for sign, exponent and mantissa */
+#define FP32_MASK_S GENMASK_U32(FP32_MSB_S, FP32_LSB_S)
+#define FP32_MASK_E GENMASK_U32(FP32_MSB_E, FP32_LSB_E)
+#define FP32_MASK_M GENMASK_U32(FP32_MSB_M, FP32_LSB_M)
+
+/* float16: bit index of MSB & LSB of sign, exponent and mantissa */
+#define FP16_LSB_M 0
+#define FP16_MSB_M 9
+#define FP16_LSB_E 10
+#define FP16_MSB_E 14
+#define FP16_LSB_S 15
+#define FP16_MSB_S 15
+
+/* float16: bitmask for sign, exponent and mantissa */
+#define FP16_MASK_S GENMASK_U32(FP16_MSB_S, FP16_LSB_S)
+#define FP16_MASK_E GENMASK_U32(FP16_MSB_E, FP16_LSB_E)
+#define FP16_MASK_M GENMASK_U32(FP16_MSB_M, FP16_LSB_M)
+
+/* bfloat16: bit index of MSB & LSB of sign, exponent and mantissa */
+#define BF16_LSB_M 0
+#define BF16_MSB_M 6
+#define BF16_LSB_E 7
+#define BF16_MSB_E 14
+#define BF16_LSB_S 15
+#define BF16_MSB_S 15
+
+/* bfloat16: bitmask for sign, exponent and mantissa */
+#define BF16_MASK_S GENMASK_U32(BF16_MSB_S, BF16_LSB_S)
+#define BF16_MASK_E GENMASK_U32(BF16_MSB_E, BF16_LSB_E)
+#define BF16_MASK_M GENMASK_U32(BF16_MSB_M, BF16_LSB_M)
+
+/* Exponent bias */
+#define FP32_BIAS_E 127
+#define FP16_BIAS_E 15
+#define BF16_BIAS_E 127
+
+#define FP32_PACK(sign, exponent, mantissa)                                                        \
+	(((sign) << FP32_LSB_S) | ((exponent) << FP32_LSB_E) | (mantissa))
+
+#define FP16_PACK(sign, exponent, mantissa)                                                        \
+	(((sign) << FP16_LSB_S) | ((exponent) << FP16_LSB_E) | (mantissa))
+
+#define BF16_PACK(sign, exponent, mantissa)                                                        \
+	(((sign) << BF16_LSB_S) | ((exponent) << BF16_LSB_E) | (mantissa))
+
+/* Represent float32 as float and uint32_t */
+union float32 {
+	float f;
+	uint32_t u;
+};
+
+/* Convert a single precision floating point number (float32) into a half precision
+ * floating point number (float16) using round to nearest rounding mode.
+ */
+static uint16_t
+__float32_to_float16_scalar_rtn(float x)
+{
+	union float32 f32; /* float32 input */
+	uint32_t f32_s;	   /* float32 sign */
+	uint32_t f32_e;	   /* float32 exponent */
+	uint32_t f32_m;	   /* float32 mantissa */
+	uint16_t f16_s;	   /* float16 sign */
+	uint16_t f16_e;	   /* float16 exponent */
+	uint16_t f16_m;	   /* float16 mantissa */
+	uint32_t tbits;	   /* number of truncated bits */
+	uint32_t tmsb;	   /* MSB position of truncated bits */
+	uint32_t m_32;	   /* temporary float32 mantissa */
+	uint16_t m_16;	   /* temporary float16 mantissa */
+	uint16_t u16;	   /* float16 output */
+	int be_16;	   /* float16 biased exponent, signed */
+
+	f32.f = x;
+	f32_s = (f32.u & FP32_MASK_S) >> FP32_LSB_S;
+	f32_e = (f32.u & FP32_MASK_E) >> FP32_LSB_E;
+	f32_m = (f32.u & FP32_MASK_M) >> FP32_LSB_M;
+
+	f16_s = f32_s;
+	f16_e = 0;
+	f16_m = 0;
+
+	switch (f32_e) {
+	case (0): /* float32: zero or subnormal number */
+		f16_e = 0;
+		f16_m = 0; /* convert to zero */
+		break;
+	case (FP32_MASK_E >> FP32_LSB_E): /* float32: infinity or nan */
+		f16_e = FP16_MASK_E >> FP16_LSB_E;
+		if (f32_m == 0) { /* infinity */
+			f16_m = 0;
+		} else { /* nan, propagate mantissa and set MSB of mantissa to 1 */
+			f16_m = f32_m >> (FP32_MSB_M - FP16_MSB_M);
+			f16_m |= BIT(FP16_MSB_M);
+		}
+		break;
+	default: /* float32: normal number */
+		/* compute biased exponent for float16 */
+		be_16 = (int)f32_e - FP32_BIAS_E + FP16_BIAS_E;
+
+		/* overflow, be_16 = [31-INF], set to infinity */
+		if (be_16 >= (int)(FP16_MASK_E >> FP16_LSB_E)) {
+			f16_e = FP16_MASK_E >> FP16_LSB_E;
+			f16_m = 0;
+		} else if ((be_16 >= 1) && (be_16 < (int)(FP16_MASK_E >> FP16_LSB_E))) {
+			/* normal float16, be_16 = [1:30]*/
+			f16_e = be_16;
+			m_16 = f32_m >> (FP32_LSB_E - FP16_LSB_E);
+			tmsb = FP32_MSB_M - FP16_MSB_M - 1;
+			if ((f32_m & GENMASK_U32(tmsb, 0)) > BIT(tmsb)) {
+				/* round: non-zero truncated bits except MSB */
+				m_16++;
+
+				/* overflow into exponent */
+				if (((m_16 & FP16_MASK_E) >> FP16_LSB_E) == 0x1)
+					f16_e++;
+			} else if ((f32_m & GENMASK_U32(tmsb, 0)) == BIT(tmsb)) {
+				/* round: MSB of truncated bits and LSB of m_16 is set */
+				if ((m_16 & 0x1) == 0x1) {
+					m_16++;
+
+					/* overflow into exponent */
+					if (((m_16 & FP16_MASK_E) >> FP16_LSB_E) == 0x1)
+						f16_e++;
+				}
+			}
+			f16_m = m_16 & FP16_MASK_M;
+		} else if ((be_16 >= -(int)(FP16_MSB_M)) && (be_16 < 1)) {
+			/* underflow: zero / subnormal, be_16 = [-9:0] */
+			f16_e = 0;
+
+			/* add implicit leading zero */
+			m_32 = f32_m | BIT(FP32_LSB_E);
+			tbits = FP32_LSB_E - FP16_LSB_E - be_16 + 1;
+			m_16 = m_32 >> tbits;
+
+			/* if non-leading truncated bits are set */
+			if ((f32_m & GENMASK_U32(tbits - 1, 0)) > BIT(tbits - 1)) {
+				m_16++;
+
+				/* overflow into exponent */
+				if (((m_16 & FP16_MASK_E) >> FP16_LSB_E) == 0x1)
+					f16_e++;
+			} else if ((f32_m & GENMASK_U32(tbits - 1, 0)) == BIT(tbits - 1)) {
+				/* if leading truncated bit is set */
+				if ((m_16 & 0x1) == 0x1) {
+					m_16++;
+
+					/* overflow into exponent */
+					if (((m_16 & FP16_MASK_E) >> FP16_LSB_E) == 0x1)
+						f16_e++;
+				}
+			}
+			f16_m = m_16 & FP16_MASK_M;
+		} else if (be_16 == -(int)(FP16_MSB_M + 1)) {
+			/* underflow: zero, be_16 = [-10] */
+			f16_e = 0;
+			if (f32_m != 0)
+				f16_m = 1;
+			else
+				f16_m = 0;
+		} else {
+			/* underflow: zero, be_16 = [-INF:-11] */
+			f16_e = 0;
+			f16_m = 0;
+		}
+
+		break;
+	}
+
+	u16 = FP16_PACK(f16_s, f16_e, f16_m);
+
+	return u16;
+}
+
+/* Convert a half precision floating point number (float16) into a single precision
+ * floating point number (float32).
+ */
+static float
+__float16_to_float32_scalar_rtx(uint16_t f16)
+{
+	union float32 f32; /* float32 output */
+	uint16_t f16_s;	   /* float16 sign */
+	uint16_t f16_e;	   /* float16 exponent */
+	uint16_t f16_m;	   /* float16 mantissa */
+	uint32_t f32_s;	   /* float32 sign */
+	uint32_t f32_e;	   /* float32 exponent */
+	uint32_t f32_m;	   /* float32 mantissa*/
+	uint8_t shift;	   /* number of bits to be shifted */
+	uint32_t clz;	   /* count of leading zeroes */
+	int e_16;	   /* float16 exponent unbiased */
+
+	f16_s = (f16 & FP16_MASK_S) >> FP16_LSB_S;
+	f16_e = (f16 & FP16_MASK_E) >> FP16_LSB_E;
+	f16_m = (f16 & FP16_MASK_M) >> FP16_LSB_M;
+
+	f32_s = f16_s;
+	switch (f16_e) {
+	case (FP16_MASK_E >> FP16_LSB_E): /* float16: infinity or nan */
+		f32_e = FP32_MASK_E >> FP32_LSB_E;
+		if (f16_m == 0x0) { /* infinity */
+			f32_m = f16_m;
+		} else { /* nan, propagate mantissa, set MSB of mantissa to 1 */
+			f32_m = f16_m;
+			shift = FP32_MSB_M - FP16_MSB_M;
+			f32_m = (f32_m << shift) & FP32_MASK_M;
+			f32_m |= BIT(FP32_MSB_M);
+		}
+		break;
+	case 0: /* float16: zero or sub-normal */
+		f32_m = f16_m;
+		if (f16_m == 0) { /* zero signed */
+			f32_e = 0;
+		} else { /* subnormal numbers */
+			clz = __builtin_clz((uint32_t)f16_m) - sizeof(uint32_t) * 8 + FP16_LSB_E;
+			e_16 = (int)f16_e - clz;
+			f32_e = FP32_BIAS_E + e_16 - FP16_BIAS_E;
+
+			shift = clz + (FP32_MSB_M - FP16_MSB_M) + 1;
+			f32_m = (f32_m << shift) & FP32_MASK_M;
+		}
+		break;
+	default: /* normal numbers */
+		f32_m = f16_m;
+		e_16 = (int)f16_e;
+		f32_e = FP32_BIAS_E + e_16 - FP16_BIAS_E;
+
+		shift = (FP32_MSB_M - FP16_MSB_M);
+		f32_m = (f32_m << shift) & FP32_MASK_M;
+	}
+
+	f32.u = FP32_PACK(f32_s, f32_e, f32_m);
+
+	return f32.f;
+}
+
+/* Convert a single precision floating point number (float32) into a
+ * brain float number (bfloat16) using round to nearest rounding mode.
+ */
+static uint16_t
+__float32_to_bfloat16_scalar_rtn(float x)
+{
+	union float32 f32; /* float32 input */
+	uint32_t f32_s;	   /* float32 sign */
+	uint32_t f32_e;	   /* float32 exponent */
+	uint32_t f32_m;	   /* float32 mantissa */
+	uint16_t b16_s;	   /* float16 sign */
+	uint16_t b16_e;	   /* float16 exponent */
+	uint16_t b16_m;	   /* float16 mantissa */
+	uint32_t tbits;	   /* number of truncated bits */
+	uint16_t u16;	   /* float16 output */
+
+	f32.f = x;
+	f32_s = (f32.u & FP32_MASK_S) >> FP32_LSB_S;
+	f32_e = (f32.u & FP32_MASK_E) >> FP32_LSB_E;
+	f32_m = (f32.u & FP32_MASK_M) >> FP32_LSB_M;
+
+	b16_s = f32_s;
+	b16_e = 0;
+	b16_m = 0;
+
+	switch (f32_e) {
+	case (0): /* float32: zero or subnormal number */
+		b16_e = 0;
+		if (f32_m == 0) /* zero */
+			b16_m = 0;
+		else /* subnormal float32 number, normal bfloat16 */
+			goto bf16_normal;
+		break;
+	case (FP32_MASK_E >> FP32_LSB_E): /* float32: infinity or nan */
+		b16_e = BF16_MASK_E >> BF16_LSB_E;
+		if (f32_m == 0) { /* infinity */
+			b16_m = 0;
+		} else { /* nan, propagate mantissa and set MSB of mantissa to 1 */
+			b16_m = f32_m >> (FP32_MSB_M - BF16_MSB_M);
+			b16_m |= BIT(BF16_MSB_M);
+		}
+		break;
+	default: /* float32: normal number, normal bfloat16 */
+		goto bf16_normal;
+	}
+
+	goto bf16_pack;
+
+bf16_normal:
+	b16_e = f32_e;
+	tbits = FP32_MSB_M - BF16_MSB_M;
+	b16_m = f32_m >> tbits;
+
+	/* if non-leading truncated bits are set */
+	if ((f32_m & GENMASK_U32(tbits - 1, 0)) > BIT(tbits - 1)) {
+		b16_m++;
+
+		/* if overflow into exponent */
+		if (((b16_m & BF16_MASK_E) >> BF16_LSB_E) == 0x1)
+			b16_e++;
+	} else if ((f32_m & GENMASK_U32(tbits - 1, 0)) == BIT(tbits - 1)) {
+		/* if only leading truncated bit is set */
+		if ((b16_m & 0x1) == 0x1) {
+			b16_m++;
+
+			/* if overflow into exponent */
+			if (((b16_m & BF16_MASK_E) >> BF16_LSB_E) == 0x1)
+				b16_e++;
+		}
+	}
+	b16_m = b16_m & BF16_MASK_M;
+
+bf16_pack:
+	u16 = BF16_PACK(b16_s, b16_e, b16_m);
+
+	return u16;
+}
+
+/* Convert a brain float number (bfloat16) into a
+ * single precision floating point number (float32).
+ */
+static float
+__bfloat16_to_float32_scalar_rtx(uint16_t f16)
+{
+	union float32 f32; /* float32 output */
+	uint16_t b16_s;	   /* float16 sign */
+	uint16_t b16_e;	   /* float16 exponent */
+	uint16_t b16_m;	   /* float16 mantissa */
+	uint32_t f32_s;	   /* float32 sign */
+	uint32_t f32_e;	   /* float32 exponent */
+	uint32_t f32_m;	   /* float32 mantissa*/
+	uint8_t shift;	   /* number of bits to be shifted */
+
+	b16_s = (f16 & BF16_MASK_S) >> BF16_LSB_S;
+	b16_e = (f16 & BF16_MASK_E) >> BF16_LSB_E;
+	b16_m = (f16 & BF16_MASK_M) >> BF16_LSB_M;
+
+	f32_s = b16_s;
+	switch (b16_e) {
+	case (BF16_MASK_E >> BF16_LSB_E): /* bfloat16: infinity or nan */
+		f32_e = FP32_MASK_E >> FP32_LSB_E;
+		if (b16_m == 0x0) { /* infinity */
+			f32_m = 0;
+		} else { /* nan, propagate mantissa, set MSB of mantissa to 1 */
+			f32_m = b16_m;
+			shift = FP32_MSB_M - BF16_MSB_M;
+			f32_m = (f32_m << shift) & FP32_MASK_M;
+			f32_m |= BIT(FP32_MSB_M);
+		}
+		break;
+	case 0: /* bfloat16: zero or subnormal */
+		f32_m = b16_m;
+		if (b16_m == 0) { /* zero signed */
+			f32_e = 0;
+		} else { /* subnormal numbers */
+			goto fp32_normal;
+		}
+		break;
+	default: /* bfloat16: normal number */
+		goto fp32_normal;
+	}
+
+	goto fp32_pack;
+
+fp32_normal:
+	f32_m = b16_m;
+	f32_e = FP32_BIAS_E + b16_e - BF16_BIAS_E;
+
+	shift = (FP32_MSB_M - BF16_MSB_M);
+	f32_m = (f32_m << shift) & FP32_MASK_M;
+
+fp32_pack:
+	f32.u = FP32_PACK(f32_s, f32_e, f32_m);
+
+	return f32.f;
+}
+
+uint16_t _odp_float32_to_float16(float x)
+{
+	return __float32_to_float16_scalar_rtn(x);
+}
+
+float _odp_float16_to_float32(uint16_t f16)
+{
+	return __float16_to_float32_scalar_rtx(f16);
+}
+
+uint16_t _odp_float32_to_bfloat16(float x)
+{
+	return __float32_to_bfloat16_scalar_rtn(x);
+}
+
+float _odp_bfloat16_to_float32(uint16_t f16)
+{
+	return __bfloat16_to_float32_scalar_rtx(f16);
+}
diff --git a/platform/linux-generic/odp_ml_null.c b/platform/linux-generic/odp_ml_null.c
new file mode 100644
index 000000000..718e80d76
--- /dev/null
+++ b/platform/linux-generic/odp_ml_null.c
@@ -0,0 +1,232 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2023 Nokia
+ */
+
+#include <odp/api/hints.h>
+#include <odp/api/ml.h>
+
+#include <odp_init_internal.h>
+
+#include <stdint.h>
+#include <string.h>
+
+/* Dummy ML API implementation, no capability and just return error for
+ * other functions.
+ */
+int _odp_ml_init_global(void)
+{
+	return 0;
+}
+
+int _odp_ml_term_global(void)
+{
+	return 0;
+}
+
+int odp_ml_capability(odp_ml_capability_t *capa)
+{
+	memset(capa, 0, sizeof(odp_ml_capability_t));
+	return 0;
+}
+
+void odp_ml_config_init(odp_ml_config_t *config ODP_UNUSED)
+{
+}
+
+int odp_ml_config(const odp_ml_config_t *config ODP_UNUSED)
+{
+	return -1;
+}
+
+void odp_ml_model_param_init(odp_ml_model_param_t *param ODP_UNUSED)
+{
+}
+
+odp_ml_model_t odp_ml_model_create(const char *name ODP_UNUSED,
+				   const odp_ml_model_param_t *param ODP_UNUSED)
+{
+	return ODP_ML_MODEL_INVALID;
+}
+
+int odp_ml_model_destroy(odp_ml_model_t model ODP_UNUSED)
+{
+	return -1;
+}
+
+int odp_ml_model_info(odp_ml_model_t model ODP_UNUSED, odp_ml_model_info_t *info ODP_UNUSED)
+{
+	return -1;
+}
+
+uint32_t odp_ml_model_input_info(odp_ml_model_t model ODP_UNUSED,
+				 odp_ml_input_info_t info[] ODP_UNUSED,
+				 uint32_t num ODP_UNUSED)
+{
+	return 0;
+}
+
+uint32_t odp_ml_model_output_info(odp_ml_model_t model ODP_UNUSED,
+				  odp_ml_output_info_t info[] ODP_UNUSED,
+				  uint32_t num ODP_UNUSED)
+{
+	return 0;
+}
+
+odp_ml_model_t odp_ml_model_lookup(const char *name ODP_UNUSED)
+{
+	return ODP_ML_MODEL_INVALID;
+}
+
+uint64_t odp_ml_model_to_u64(odp_ml_model_t model ODP_UNUSED)
+{
+	return 0;
+}
+
+void odp_ml_model_print(odp_ml_model_t model ODP_UNUSED)
+{
+}
+
+void odp_ml_print(void)
+{
+}
+
+void odp_ml_compl_pool_param_init(odp_ml_compl_pool_param_t *pool_param)
+{
+	memset(pool_param, 0, sizeof(odp_ml_compl_pool_param_t));
+}
+
+odp_pool_t odp_ml_compl_pool_create(const char *name ODP_UNUSED,
+				    const odp_ml_compl_pool_param_t *pool_param ODP_UNUSED)
+{
+	return ODP_POOL_INVALID;
+}
+
+odp_ml_compl_t odp_ml_compl_alloc(odp_pool_t pool ODP_UNUSED)
+{
+	return ODP_ML_COMPL_INVALID;
+}
+
+void odp_ml_compl_free(odp_ml_compl_t ml_compl ODP_UNUSED)
+{
+}
+
+int odp_ml_compl_run_result(odp_ml_compl_t ml_compl ODP_UNUSED,
+			    odp_ml_run_result_t *result ODP_UNUSED)
+{
+	return -1;
+}
+
+int odp_ml_compl_load_result(odp_ml_compl_t ml_compl ODP_UNUSED,
+			     odp_ml_load_result_t *result ODP_UNUSED)
+{
+	return -1;
+}
+
+void *odp_ml_compl_user_area(odp_ml_compl_t ml_compl ODP_UNUSED)
+{
+	return NULL;
+}
+
+odp_ml_compl_t odp_ml_compl_from_event(odp_event_t event ODP_UNUSED)
+{
+	return ODP_ML_COMPL_INVALID;
+}
+
+odp_event_t odp_ml_compl_to_event(odp_ml_compl_t ml_compl ODP_UNUSED)
+{
+	return ODP_EVENT_INVALID;
+}
+
+uint64_t odp_ml_compl_to_u64(odp_ml_compl_t ml_compl ODP_UNUSED)
+{
+	return 0;
+}
+
+void odp_ml_compl_param_init(odp_ml_compl_param_t *compl_param ODP_UNUSED)
+{
+}
+
+int odp_ml_model_load(odp_ml_model_t model ODP_UNUSED, odp_ml_load_result_t *result ODP_UNUSED)
+{
+	return -1;
+}
+
+int odp_ml_model_load_start(odp_ml_model_t model ODP_UNUSED,
+			    const odp_ml_compl_param_t *compl_param ODP_UNUSED)
+{
+	return -1;
+}
+
+int odp_ml_model_load_status(odp_ml_model_t model ODP_UNUSED, uint32_t compl_id ODP_UNUSED,
+			     odp_ml_load_result_t *result ODP_UNUSED)
+{
+	return -1;
+}
+
+int odp_ml_model_unload(odp_ml_model_t model ODP_UNUSED, odp_ml_load_result_t *result ODP_UNUSED)
+{
+	return -1;
+}
+
+int odp_ml_model_unload_start(odp_ml_model_t model ODP_UNUSED,
+			      const odp_ml_compl_param_t *compl_param ODP_UNUSED)
+{
+	return -1;
+}
+
+int odp_ml_model_unload_status(odp_ml_model_t model ODP_UNUSED, uint32_t compl_id ODP_UNUSED,
+			       odp_ml_load_result_t *result ODP_UNUSED)
+{
+	return -1;
+}
+
+void odp_ml_run_param_init(odp_ml_run_param_t *param ODP_UNUSED)
+{
+}
+
+int odp_ml_run(odp_ml_model_t model ODP_UNUSED, const odp_ml_data_t *data ODP_UNUSED,
+	       const odp_ml_run_param_t *param ODP_UNUSED)
+{
+	return -1;
+}
+
+int odp_ml_run_multi(odp_ml_model_t model ODP_UNUSED, const odp_ml_data_t data[] ODP_UNUSED,
+		     const odp_ml_run_param_t param[] ODP_UNUSED, int num ODP_UNUSED)
+{
+	return -1;
+}
+
+int odp_ml_run_start(odp_ml_model_t model ODP_UNUSED, const odp_ml_data_t *data ODP_UNUSED,
+		     const odp_ml_compl_param_t *compl_param ODP_UNUSED,
+		     const odp_ml_run_param_t *run_param ODP_UNUSED)
+{
+	return -1;
+}
+
+int odp_ml_run_start_multi(odp_ml_model_t model ODP_UNUSED,
+			   const odp_ml_data_t data[] ODP_UNUSED,
+			   const odp_ml_compl_param_t compl_param[] ODP_UNUSED,
+			   const odp_ml_run_param_t run_param[] ODP_UNUSED,
+			   int num ODP_UNUSED)
+{
+	return -1;
+}
+
+int odp_ml_run_status(odp_ml_model_t model ODP_UNUSED, uint32_t compl_id ODP_UNUSED,
+		      odp_ml_run_result_t *result ODP_UNUSED)
+{
+	return -1;
+}
+
+int odp_ml_model_extra_stat_info(odp_ml_model_t model ODP_UNUSED,
+				 odp_ml_extra_stat_info_t info[] ODP_UNUSED,
+				 int num ODP_UNUSED)
+{
+	return -1;
+}
+
+int odp_ml_model_extra_stats(odp_ml_model_t model ODP_UNUSED,
+			     uint64_t stats[] ODP_UNUSED, int num ODP_UNUSED)
+{
+	return -1;
+}
diff --git a/platform/linux-generic/odp_ml_quantize.c b/platform/linux-generic/odp_ml_quantize.c
new file mode 100644
index 000000000..d3f3601e3
--- /dev/null
+++ b/platform/linux-generic/odp_ml_quantize.c
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2023 Nokia
+ */
+
+#include <odp/api/ml_quantize.h>
+
+#include <odp_debug_internal.h>
+#include <odp_macros_internal.h>
+#include <odp_ml_fp16.h>
+
+#include <math.h>
+#include <stdint.h>
+
+void odp_ml_fp32_to_uint8(uint8_t *u8, const float *fp32, uint32_t num, float scale,
+			  uint8_t zerop)
+{
+	float fval;
+
+	_ODP_ASSERT(scale != 0);
+
+	for (uint32_t i = 0; i < num; i++) {
+		/* Range mapping: map real values to signed integer */
+		fval = nearbyintf(fp32[i] / scale) + (float)zerop;
+
+		/* clip */
+		fval = _ODP_MAX(fval, 0.f);
+		fval = _ODP_MIN(fval, 255.f);
+		u8[i] = (uint8_t)(int32_t)fval;
+	}
+}
+
+void odp_ml_fp32_from_uint8(float *fp32, const uint8_t *u8, uint32_t num, float scale,
+			    uint8_t zerop)
+{
+	for (uint32_t i = 0; i < num; i++)
+		fp32[i] = (float)(u8[i] - zerop) * scale;
+}
+
+void odp_ml_fp32_to_int8(int8_t *i8, const float *fp32, uint32_t num, float scale, int8_t zerop)
+{
+	float fval;
+
+	_ODP_ASSERT(scale != 0);
+
+	for (uint32_t i = 0; i < num; i++) {
+		/* Range mapping: map real values to signed integer */
+		fval = nearbyintf(fp32[i] / scale) + (float)zerop;
+
+		/* NOTE: Clamps signed quantization values to [-127,127] instead of [-128,127].
+		 * This is to ensure that symmetric quantization results in a zero
+		 * point of exactly 0 for signed 8 bit ints.
+		 */
+		fval = _ODP_MAX(fval, -127.f);
+		fval = _ODP_MIN(fval, 127.f);
+		i8[i] = (int8_t)(int32_t)fval;
+	}
+}
+
+void odp_ml_fp32_from_int8(float *fp32, const int8_t *i8, uint32_t num, float scale, int8_t zerop)
+{
+	for (uint32_t i = 0; i < num; i++)
+		fp32[i] = (float)(i8[i] - zerop) * scale;
+}
+
+void odp_ml_fp32_to_fp16(uint16_t *fp16, const float *fp32, uint32_t num)
+{
+	uint32_t i;
+
+	for (i = 0; i < num; i++)
+		fp16[i] = _odp_float32_to_float16(fp32[i]);
+}
+
+void odp_ml_fp32_from_fp16(float *fp32, const uint16_t *fp16, uint32_t num)
+{
+	uint32_t i;
+
+	for (i = 0; i < num; i++)
+		fp32[i] = _odp_float16_to_float32(fp16[i]);
+}
diff --git a/platform/linux-generic/odp_packet.c b/platform/linux-generic/odp_packet.c
index 96fcd928a..17a4a9298 100644
--- a/platform/linux-generic/odp_packet.c
+++ b/platform/linux-generic/odp_packet.c
@@ -66,7 +66,6 @@ const _odp_packet_inline_offset_t _odp_packet_inline ODP_ALIGNED_CACHE = {
 	.timestamp      = offsetof(odp_packet_hdr_t, timestamp),
 	.input_flags    = offsetof(odp_packet_hdr_t, p.input_flags),
 	.flags          = offsetof(odp_packet_hdr_t, p.flags),
-	.subtype        = offsetof(odp_packet_hdr_t, subtype),
 	.cls_mark       = offsetof(odp_packet_hdr_t, cls_mark),
 	.ipsec_ctx      = offsetof(odp_packet_hdr_t, ipsec_ctx),
 	.crypto_op      = offsetof(odp_packet_hdr_t, crypto_op_result),
@@ -1454,7 +1453,7 @@ void odp_packet_print(odp_packet_t pkt)
 	len += _odp_snprint(&str[len], n - len, "  pool index     %u\n", hdr->event_hdr.index.pool);
 	len += _odp_snprint(&str[len], n - len, "  buf index      %u\n",
 			    hdr->event_hdr.index.event);
-	len += _odp_snprint(&str[len], n - len, "  ev subtype     %i\n", hdr->subtype);
+	len += _odp_snprint(&str[len], n - len, "  ev subtype     %i\n", hdr->event_hdr.subtype);
 	len += _odp_snprint(&str[len], n - len, "  input_flags    0x%" PRIx64 "\n",
 			    hdr->p.input_flags.all);
 	if (hdr->p.input_flags.all) {
@@ -2401,7 +2400,7 @@ odp_packet_t odp_packet_reassemble(odp_pool_t pool_hdl, odp_packet_buf_t pkt_buf
 	pkt_hdr->tailroom  = tailroom;
 
 	/* Reset metadata */
-	pkt_hdr->subtype = ODP_EVENT_PACKET_BASIC;
+	pkt_hdr->event_hdr.subtype = ODP_EVENT_PACKET_BASIC;
 	pkt_hdr->input   = ODP_PKTIO_INVALID;
 	packet_parse_reset(pkt_hdr, 1);
 
diff --git a/platform/linux-generic/odp_packet_io.c b/platform/linux-generic/odp_packet_io.c
index 236813e80..8283c41e6 100644
--- a/platform/linux-generic/odp_packet_io.c
+++ b/platform/linux-generic/odp_packet_io.c
@@ -3015,8 +3015,15 @@ static int lso_update_custom(lso_profile_t *lso_prof, odp_packet_t pkt, int segn
 			ptr = &u32;
 		else if (size == 2)
 			ptr = &u16;
-		else
+		else {
+			/*
+			 * odp_lso_profile_create() ensures that size is one of the allowed values.
+			 * But compiler doesn't know that, so set it here to avoid possibility of
+			 * out of bounds warnings.
+			 */
+			size = 1;
 			ptr = &u8;
+		}
 
 		if (odp_packet_copy_to_mem(pkt, offset, size, ptr)) {
 			_ODP_ERR("Read from packet failed at offset %u\n", offset);
diff --git a/platform/linux-generic/odp_pool.c b/platform/linux-generic/odp_pool.c
index 94461e6b1..d3fde70f6 100644
--- a/platform/linux-generic/odp_pool.c
+++ b/platform/linux-generic/odp_pool.c
@@ -495,6 +495,7 @@ static void init_event_hdr(pool_t *pool, _odp_event_hdr_t *event_hdr, uint32_t e
 	event_hdr->index.event  = event_index;
 	event_hdr->type         = type;
 	event_hdr->event_type   = type;
+	event_hdr->subtype      = ODP_EVENT_NO_SUBTYPE;
 	event_hdr->pool         = _odp_pool_handle(pool);
 
 	/* Store base values for fast init */
@@ -542,7 +543,6 @@ static void init_event_hdr(pool_t *pool, _odp_event_hdr_t *event_hdr, uint32_t e
 
 static void init_buffers(pool_t *pool)
 {
-	uint64_t i;
 	_odp_event_hdr_t *event_hdr;
 	odp_buffer_hdr_t *buf_hdr;
 	odp_packet_hdr_t *pkt_hdr;
@@ -566,7 +566,7 @@ static void init_buffers(pool_t *pool)
 	mask = pool->ring_mask;
 	type = pool->type;
 
-	for (i = 0; i < pool->num + skipped_blocks ; i++) {
+	for (uint64_t i = 0; i < pool->num + skipped_blocks ; i++) {
 		int skip = 0;
 		addr = &pool->base_addr[i * pool->block_size];
 
@@ -1257,6 +1257,10 @@ int odp_pool_info(odp_pool_t pool_hdl, odp_pool_info_t *info)
 		info->dma_pool_param.uarea_size = pool->params.buf.uarea_size;
 		info->dma_pool_param.cache_size = pool->params.buf.cache_size;
 
+	} else if (pool->type_2 == ODP_POOL_ML_COMPL) {
+		info->ml_pool_param.num        = pool->params.buf.num;
+		info->ml_pool_param.uarea_size = pool->params.buf.uarea_size;
+		info->ml_pool_param.cache_size = pool->params.buf.cache_size;
 	} else {
 		info->params = pool->params;
 	}
@@ -1371,11 +1375,11 @@ static inline void event_free_to_pool(pool_t *pool,
 		if (odp_unlikely((uint32_t)num > cache_num))
 			burst = cache_num;
 
-		_odp_event_hdr_t *event_hdr[burst];
+		_odp_event_hdr_t *ev_hdr[burst];
 
-		cache_pop(cache, event_hdr, burst);
+		cache_pop(cache, ev_hdr, burst);
 
-		ring_ptr_enq_multi(ring, mask, (void **)event_hdr, burst);
+		ring_ptr_enq_multi(ring, mask, (void **)ev_hdr, burst);
 		if (CONFIG_POOL_STATISTICS && pool->params.stats.bit.free_ops)
 			odp_atomic_inc_u64(&pool->stats.free_ops);
 	}
@@ -1559,6 +1563,8 @@ static const char *get_long_type_str(odp_pool_type_t type)
 		return "vector";
 	case ODP_POOL_DMA_COMPL:
 		return "dma completion";
+	case ODP_POOL_ML_COMPL:
+		return "ml completion";
 	default:
 		return "unknown";
 	}
@@ -1577,6 +1583,8 @@ static const char *get_short_type_str(odp_pool_type_t type)
 		return "V";
 	case ODP_POOL_DMA_COMPL:
 		return "D";
+	case ODP_POOL_ML_COMPL:
+		return "M";
 	default:
 		return "-";
 	}
@@ -1875,6 +1883,7 @@ int odp_pool_ext_capability(odp_pool_type_t type, odp_pool_ext_capability_t *cap
 	case ODP_POOL_TIMEOUT:
 	case ODP_POOL_VECTOR:
 	case ODP_POOL_DMA_COMPL:
+	case ODP_POOL_ML_COMPL:
 		memset(capa, 0, sizeof(odp_pool_ext_capability_t));
 		return 0;
 	default:
diff --git a/platform/linux-generic/odp_queue_scalable.c b/platform/linux-generic/odp_queue_scalable.c
index c7040dd3c..bddaa532d 100644
--- a/platform/linux-generic/odp_queue_scalable.c
+++ b/platform/linux-generic/odp_queue_scalable.c
@@ -18,7 +18,6 @@
 
 #include <odp_config_internal.h>
 #include <odp_debug_internal.h>
-
 #include <odp_event_internal.h>
 #include <odp_packet_io_internal.h>
 #include <odp_pool_internal.h>
@@ -472,12 +471,8 @@ static int queue_destroy(odp_queue_t handle)
 	 */
 	while (__atomic_load_n(&q->qschst.numevts, __ATOMIC_RELAXED) != 0 ||
 	       __atomic_load_n(&q->qschst.cur_ticket, __ATOMIC_RELAXED) !=
-	       __atomic_load_n(&q->qschst.nxt_ticket, __ATOMIC_RELAXED)) {
-		sevl();
-		while (wfe() && monitor32((uint32_t *)&q->qschst.numevts,
-					  __ATOMIC_RELAXED) != 0)
-			odp_cpu_pause();
-	}
+	       __atomic_load_n(&q->qschst.nxt_ticket, __ATOMIC_RELAXED))
+		_odp_wait_until_eq_u32((uint32_t *)&q->qschst.numevts, 0);
 
 	if (q->schedq != NULL) {
 		_odp_sched_queue_rem(q->sched_grp, q->sched_prio);
@@ -596,13 +591,8 @@ static inline int _odp_queue_enq(sched_elem_t *q,
 	__builtin_prefetch(&q->node, 1, 0);
 #endif
 	/* Wait for our turn to signal consumers */
-	if (odp_unlikely(__atomic_load_n(&q->cons_write,
-					 __ATOMIC_RELAXED) != old_write)) {
-		sevl();
-		while (wfe() && monitor32(&q->cons_write,
-					  __ATOMIC_RELAXED) != old_write)
-			odp_cpu_pause();
-	}
+	if (odp_unlikely(__atomic_load_n(&q->cons_write, __ATOMIC_RELAXED) != old_write))
+		_odp_wait_until_eq_u32(&q->cons_write, old_write);
 
 	/* Signal consumers that events are available (release events)
 	 * Enable other producers to continue
@@ -824,13 +814,8 @@ int _odp_queue_deq(sched_elem_t *q, _odp_event_hdr_t *event_hdr[], int num)
 	__builtin_prefetch(&q->node, 1, 0);
 #endif
 	/* Wait for our turn to signal producers */
-	if (odp_unlikely(__atomic_load_n(&q->prod_read, __ATOMIC_RELAXED) !=
-		old_read)) {
-		sevl();
-		while (wfe() && monitor32(&q->prod_read,
-					  __ATOMIC_RELAXED) != old_read)
-			odp_cpu_pause();
-	}
+	if (odp_unlikely(__atomic_load_n(&q->prod_read, __ATOMIC_RELAXED) != old_read))
+		_odp_wait_until_eq_u32(&q->prod_read, old_read);
 
 	/* Signal producers that empty slots are available
 	 * (release ring slots)
diff --git a/platform/linux-generic/odp_schedule_basic.c b/platform/linux-generic/odp_schedule_basic.c
index 7bd8cbfed..379f1f828 100644
--- a/platform/linux-generic/odp_schedule_basic.c
+++ b/platform/linux-generic/odp_schedule_basic.c
@@ -44,6 +44,7 @@
 
 #include <string.h>
 #include <time.h>
+#include <inttypes.h>
 
 /* No synchronization context */
 #define NO_SYNC_CONTEXT ODP_SCHED_SYNC_PARALLEL
@@ -297,7 +298,7 @@ typedef struct {
 
 	struct {
 		uint32_t poll_time;
-		struct timespec sleep_time;
+		uint64_t sleep_time;
 	} powersave;
 
 	/* Scheduler interface config options (not used in fast path) */
@@ -545,8 +546,8 @@ static int read_config_file(sched_global_t *sched)
 	}
 
 	val = _ODP_MAX(0, val);
-	sched->powersave.sleep_time.tv_sec = val / 1000000000;
-	sched->powersave.sleep_time.tv_nsec = val % 1000000000;
+	val = _ODP_MIN((int)ODP_TIME_SEC_IN_NS - 1, val);
+	sched->powersave.sleep_time = val;
 	_ODP_PRINT("  %s: %i\n", str, val);
 
 	_ODP_PRINT("  dynamic load balance: %s\n", sched->load_balance ? "ON" : "OFF");
@@ -1672,7 +1673,7 @@ static inline int schedule_loop_sleep(odp_queue_t *out_queue, uint64_t wait,
 			timer_run(2);
 			break;
 		}
-		timer_run(1);
+		uint64_t next = timer_run(sleep ? TIMER_SCAN_FORCE : 1);
 
 		if (first) {
 			start = odp_time_local();
@@ -1683,19 +1684,27 @@ static inline int schedule_loop_sleep(odp_queue_t *out_queue, uint64_t wait,
 			continue;
 		}
 
-		if (sleep)
-			nanosleep(&sched->powersave.sleep_time, NULL);
+		if (sleep && next) {
+			uint64_t sleep_nsec = _ODP_MIN(sched->powersave.sleep_time, next);
 
-		if (wait != ODP_SCHED_WAIT || !sleep) {
-			current = odp_time_local();
-			if (odp_time_cmp(start_sleep, current) < 0)
-				sleep = 1;
+			if (wait != ODP_SCHED_WAIT) {
+				uint64_t nsec_to_end = odp_time_diff_ns(end, current);
+
+				sleep_nsec = _ODP_MIN(sleep_nsec, nsec_to_end);
+			}
+
+			struct timespec ts = { 0, sleep_nsec };
+
+			nanosleep(&ts, NULL);
 		}
 
-		if (wait == ODP_SCHED_WAIT)
-			continue;
+		if (!sleep || wait != ODP_SCHED_WAIT)
+			current = odp_time_local();
+
+		if (!sleep && odp_time_cmp(start_sleep, current) < 0)
+			sleep = 1;
 
-		if (odp_time_cmp(end, current) < 0)
+		if (wait != ODP_SCHED_WAIT && odp_time_cmp(end, current) < 0)
 			break;
 	}
 
diff --git a/platform/linux-generic/odp_schedule_scalable.c b/platform/linux-generic/odp_schedule_scalable.c
index 6d60c048f..5166fb6d0 100644
--- a/platform/linux-generic/odp_schedule_scalable.c
+++ b/platform/linux-generic/odp_schedule_scalable.c
@@ -223,13 +223,9 @@ void _odp_sched_update_enq(sched_elem_t *q, uint32_t actual)
 	if (odp_unlikely(ticket != TICKET_INVALID)) {
 		/* Wait for our turn to update schedq. */
 		if (odp_unlikely(__atomic_load_n(&q->qschst.cur_ticket,
-						 __ATOMIC_ACQUIRE) != ticket)) {
-			sevl();
-			while (wfe() &&
-			       monitor8(&q->qschst.cur_ticket,
-					__ATOMIC_ACQUIRE) != ticket)
-				odp_cpu_pause();
-		}
+						 __ATOMIC_ACQUIRE) != ticket))
+			_odp_wait_until_eq_acq_u8(&q->qschst.cur_ticket, ticket);
+
 		/* Enqueue at end of scheduler queue */
 		/* We are here because of empty-to-non-empty transition
 		 * This means queue must be pushed to schedq if possible
@@ -366,13 +362,9 @@ sched_update_deq(sched_elem_t *q,
 		_ODP_ASSERT(q->qschst_type != ODP_SCHED_SYNC_ATOMIC);
 		/* Wait for our turn to update schedq. */
 		if (odp_unlikely(__atomic_load_n(&q->qschst.cur_ticket,
-						 __ATOMIC_ACQUIRE) != ticket)) {
-			sevl();
-			while (wfe() &&
-			       monitor8(&q->qschst.cur_ticket,
-					__ATOMIC_ACQUIRE) != ticket)
-				odp_cpu_pause();
-		}
+						 __ATOMIC_ACQUIRE) != ticket))
+			_odp_wait_until_eq_acq_u8(&q->qschst.cur_ticket, ticket);
+
 		/* We are here because of non-empty-to-empty transition or
 		 * WRR budget exhausted
 		 * This means the queue must be popped from the schedq, now or
@@ -494,12 +486,9 @@ static inline void sched_update_popd(sched_elem_t *elem)
 					    1,
 					    __ATOMIC_RELAXED);
 	if (odp_unlikely(__atomic_load_n(&elem->qschst.cur_ticket,
-					 __ATOMIC_ACQUIRE) != ticket)) {
-		sevl();
-		while (wfe() && monitor8(&elem->qschst.cur_ticket,
-					 __ATOMIC_ACQUIRE) != ticket)
-			odp_cpu_pause();
-	}
+					 __ATOMIC_ACQUIRE) != ticket))
+		_odp_wait_until_eq_acq_u8(&elem->qschst.cur_ticket, ticket);
+
 	sched_update_popd_sc(elem);
 	atomic_store_release(&elem->qschst.cur_ticket, ticket + 1,
 			     /*readonly=*/false);
@@ -1054,15 +1043,8 @@ restart_same:
 				continue;
 			}
 			/* Wait for our turn to dequeue */
-			if (odp_unlikely(__atomic_load_n(&rwin->turn,
-							 __ATOMIC_ACQUIRE)
-			    != sn)) {
-				sevl();
-				while (wfe() &&
-				       monitor32(&rwin->turn, __ATOMIC_ACQUIRE)
-						!= sn)
-					odp_cpu_pause();
-			}
+			if (odp_unlikely(__atomic_load_n(&rwin->turn, __ATOMIC_ACQUIRE) != sn))
+				_odp_wait_until_eq_acq_u32(&rwin->turn, sn);
 #ifdef CONFIG_QSCHST_LOCK
 			LOCK(&elem->qschlock);
 #endif
@@ -1143,13 +1125,8 @@ static void schedule_order_lock(uint32_t lock_index)
 		return;
 	}
 	if (odp_unlikely(__atomic_load_n(&rctx->rwin->olock[lock_index],
-					 __ATOMIC_ACQUIRE) != rctx->sn)) {
-		sevl();
-		while (wfe() &&
-		       monitor32(&rctx->rwin->olock[lock_index],
-				 __ATOMIC_ACQUIRE) != rctx->sn)
-			odp_cpu_pause();
-	}
+					 __ATOMIC_ACQUIRE) != rctx->sn))
+		_odp_wait_until_eq_acq_u32(&rctx->rwin->olock[lock_index], rctx->sn);
 }
 
 static void schedule_order_unlock(uint32_t lock_index)
@@ -1555,12 +1532,7 @@ static int schedule_group_destroy(odp_schedule_group_t group)
 		if (sg->xcount[p] != 0) {
 			bitset_t wanted = atom_bitset_load(&sg->thr_wanted, __ATOMIC_RELAXED);
 
-			sevl();
-			while (wfe() &&
-			       !bitset_is_eql(wanted,
-					      bitset_monitor(&sg->thr_actual[p],
-							     __ATOMIC_RELAXED)))
-				odp_cpu_pause();
+			_odp_wait_until_eq_bitset(&sg->thr_actual[p], wanted);
 		}
 		/* Else ignore because no ODP queues on this prio */
 	}
@@ -2127,13 +2099,10 @@ static void order_lock(void)
 		_ODP_ASSERT(ts->rctx != NULL);
 		rwin = ts->rctx->rwin;
 		sn = ts->rctx->sn;
-		sevl();
 		/* Use acquire ordering to be on the safe side even if
 		 * this isn't an acquire/release situation (aka lock).
 		 */
-		while (wfe() &&
-		       monitor32(&rwin->hc.head, __ATOMIC_ACQUIRE) != sn)
-			odp_cpu_pause();
+		_odp_wait_until_eq_acq_u32(&rwin->hc.head, sn);
 	}
 }
 
diff --git a/platform/linux-generic/odp_schedule_scalable_ordered.c b/platform/linux-generic/odp_schedule_scalable_ordered.c
index f6655d7fa..f8568ce53 100644
--- a/platform/linux-generic/odp_schedule_scalable_ordered.c
+++ b/platform/linux-generic/odp_schedule_scalable_ordered.c
@@ -123,8 +123,6 @@ static void rwin_insert(reorder_window_t *rwin,
 				     /*readonly=*/false);
 		rctx = NULL;
 		do {
-			hc_t new;
-
 			new.head = old.head;
 			new.chgi = old.chgi + 1; /* Changed value */
 			/* Update head & chgi, fail if any has changed */
diff --git a/platform/linux-generic/odp_system_info.c b/platform/linux-generic/odp_system_info.c
index 52f1000f1..a2593b531 100644
--- a/platform/linux-generic/odp_system_info.c
+++ b/platform/linux-generic/odp_system_info.c
@@ -26,7 +26,6 @@
 #include <odp/api/cpu.h>
 
 #include <errno.h>
-#include <pthread.h>
 #include <string.h>
 #include <stdio.h>
 #include <inttypes.h>
@@ -386,8 +385,9 @@ int _odp_system_info_init(void)
 			num_cpus);
 
 	/* Read and save all CPU frequencies for static mode */
-	for (i = 0; i < CONFIG_NUM_CPU_IDS; i++)
-		odp_global_ro.system_info.cpu_hz[i] = cpu_hz_current(i);
+	if (odp_global_ro.system_info.cpu_hz_static)
+		for (i = 0; i < CONFIG_NUM_CPU_IDS; i++)
+			odp_global_ro.system_info.cpu_hz[i] = cpu_hz_current(i);
 
 	/* By default, read max frequency from a cpufreq file */
 	for (i = 0; i < CONFIG_NUM_CPU_IDS; i++) {
@@ -627,5 +627,8 @@ void odp_sys_config_print(void)
 	_ODP_PRINT("CONFIG_IPSEC_MAX_NUM_SA:       %i\n", CONFIG_IPSEC_MAX_NUM_SA);
 	_ODP_PRINT("CONFIG_TIMER_128BIT_ATOMICS:   %i\n", CONFIG_TIMER_128BIT_ATOMICS);
 	_ODP_PRINT("CONFIG_TIMER_PROFILE_INLINE:   %i\n", CONFIG_TIMER_PROFILE_INLINE);
+	_ODP_PRINT("CONFIG_ML_MAX_MODELS:          %i\n", CONFIG_ML_MAX_MODELS);
+	_ODP_PRINT("CONFIG_ML_MAX_INPUTS:          %i\n", CONFIG_ML_MAX_INPUTS);
+	_ODP_PRINT("CONFIG_ML_MAX_OUTPUTS:         %i\n", CONFIG_ML_MAX_OUTPUTS);
 	_ODP_PRINT("\n");
 }
diff --git a/platform/linux-generic/odp_timer.c b/platform/linux-generic/odp_timer.c
index daf187390..c8ea31078 100644
--- a/platform/linux-generic/odp_timer.c
+++ b/platform/linux-generic/odp_timer.c
@@ -761,11 +761,12 @@ static inline void timer_expire(timer_pool_t *tp, uint32_t idx, uint64_t tick)
 	}
 }
 
-static inline void timer_pool_scan(timer_pool_t *tp, uint64_t tick)
+static inline uint64_t timer_pool_scan(timer_pool_t *tp, uint64_t tick)
 {
 	tick_buf_t *array = &tp->tick_buf[0];
 	uint32_t high_wm = odp_atomic_load_acq_u32(&tp->high_wm);
 	uint32_t i;
+	uint64_t min = UINT64_MAX;
 
 	_ODP_ASSERT(high_wm <= tp->param.num_timers);
 	for (i = 0; i < high_wm; i++) {
@@ -780,18 +781,23 @@ static inline void timer_pool_scan(timer_pool_t *tp, uint64_t tick)
 		if (odp_unlikely(exp_tck <= tick)) {
 			/* Attempt to expire timer */
 			timer_expire(tp, i, tick);
+			min = 0;
+		} else {
+			min = _ODP_MIN(min, exp_tck - tick);
 		}
 	}
+
+	return min;
 }
 
 /******************************************************************************
  * Inline timer processing
  *****************************************************************************/
 
-static inline void timer_pool_scan_inline(int num, odp_time_t now)
+static inline uint64_t timer_pool_scan_inline(int num, odp_time_t now, int force)
 {
 	timer_pool_t *tp;
-	uint64_t new_tick, old_tick, nsec;
+	uint64_t new_tick, old_tick, ticks_to_next_expire, nsec, min = UINT64_MAX;
 	int64_t diff;
 	int i;
 
@@ -817,7 +823,7 @@ static inline void timer_pool_scan_inline(int num, odp_time_t now)
 		old_tick = odp_atomic_load_u64(&tp->cur_tick);
 		diff = new_tick - old_tick;
 
-		if (diff < 1)
+		if (diff < 1 && !force)
 			continue;
 
 		if (odp_atomic_cas_u64(&tp->cur_tick, &old_tick, new_tick)) {
@@ -832,26 +838,30 @@ static inline void timer_pool_scan_inline(int num, odp_time_t now)
 					odp_atomic_store_u32(&tp->notify_overrun, 2);
 				}
 			}
-			timer_pool_scan(tp, nsec);
+			ticks_to_next_expire = timer_pool_scan(tp, nsec);
+			min = _ODP_MIN(min, ticks_to_next_expire);
 		}
 	}
+
+	return min;
 }
 
-void _odp_timer_run_inline(int dec)
+uint64_t _odp_timer_run_inline(int dec)
 {
 	odp_time_t now;
 	int num = timer_global->highest_tp_idx + 1;
-	int poll_interval = timer_global->poll_interval;
+	int force = (dec == TIMER_SCAN_FORCE);
+	int poll_interval = force ? 0 : timer_global->poll_interval;
 
 	if (num == 0)
-		return;
+		return UINT64_MAX;
 
 	/* Rate limit how often this thread checks the timer pools. */
 
 	if (poll_interval > 1) {
 		timer_local.run_cnt -= dec;
 		if (timer_local.run_cnt > 0)
-			return;
+			return UINT64_MAX;
 		timer_local.run_cnt = poll_interval;
 	}
 
@@ -862,7 +872,12 @@ void _odp_timer_run_inline(int dec)
 
 		if (odp_time_cmp(period,
 				 timer_global->poll_interval_time) < 0)
-			return;
+			return UINT64_MAX;
+		timer_local.last_run = now;
+	}
+
+	if (force) {
+		timer_local.run_cnt = poll_interval;
 		timer_local.last_run = now;
 	}
 
@@ -870,13 +885,14 @@ void _odp_timer_run_inline(int dec)
 	if (CONFIG_TIMER_PROFILE_INLINE) {
 		odp_time_t t1 = odp_time_local_strict();
 
-		timer_pool_scan_inline(num, now);
+		uint64_t ret = timer_pool_scan_inline(num, now, force);
 		odp_time_t t2 = odp_time_local_strict();
 
 		timer_local.prof_nsec += odp_time_diff_ns(t2, t1);
 		timer_local.prof_rounds++;
+		return ret;
 	} else {
-		timer_pool_scan_inline(num, now);
+		return timer_pool_scan_inline(num, now, force);
 	}
 }
 
diff --git a/platform/linux-generic/test/Makefile.am b/platform/linux-generic/test/Makefile.am
index 30ef26078..7aca5fd3f 100644
--- a/platform/linux-generic/test/Makefile.am
+++ b/platform/linux-generic/test/Makefile.am
@@ -21,6 +21,11 @@ SUBDIRS += validation/api/pktio \
 	   example \
 	   performance
 
+if WITH_ML
+TESTS += validation/api/ml/ml_linux$(EXEEXT)
+SUBDIRS += validation/api/ml
+endif
+
 if ODP_PKTIO_PCAP
 TESTS += validation/api/pktio/pktio_run_pcap.sh
 endif
diff --git a/platform/linux-generic/test/example/ipsec_api/Makefile.am b/platform/linux-generic/test/example/ipsec_api/Makefile.am
index 101c97cdf..2535ad466 100644
--- a/platform/linux-generic/test/example/ipsec_api/Makefile.am
+++ b/platform/linux-generic/test/example/ipsec_api/Makefile.am
@@ -19,5 +19,3 @@ clean-local:
 			rm -f $(builddir)/$$f; \
 		done \
 	fi
-
-.NOTPARALLEL:
diff --git a/platform/linux-generic/test/example/ipsec_crypto/Makefile.am b/platform/linux-generic/test/example/ipsec_crypto/Makefile.am
index 101c97cdf..2535ad466 100644
--- a/platform/linux-generic/test/example/ipsec_crypto/Makefile.am
+++ b/platform/linux-generic/test/example/ipsec_crypto/Makefile.am
@@ -19,5 +19,3 @@ clean-local:
 			rm -f $(builddir)/$$f; \
 		done \
 	fi
-
-.NOTPARALLEL:
diff --git a/platform/linux-generic/test/inline-timer.conf b/platform/linux-generic/test/inline-timer.conf
index d645bef3c..fa3b6982f 100644
--- a/platform/linux-generic/test/inline-timer.conf
+++ b/platform/linux-generic/test/inline-timer.conf
@@ -1,6 +1,6 @@
 # Mandatory fields
 odp_implementation = "linux-generic"
-config_file_version = "0.1.27"
+config_file_version = "0.1.28"
 
 timer: {
 	# Enable inline timer implementation
diff --git a/platform/linux-generic/test/packet_align.conf b/platform/linux-generic/test/packet_align.conf
index 427674bb2..fb1418348 100644
--- a/platform/linux-generic/test/packet_align.conf
+++ b/platform/linux-generic/test/packet_align.conf
@@ -1,6 +1,6 @@
 # Mandatory fields
 odp_implementation = "linux-generic"
-config_file_version = "0.1.27"
+config_file_version = "0.1.28"
 
 pool: {
 	pkt: {
diff --git a/platform/linux-generic/test/pktio_ipc/ipc_common.c b/platform/linux-generic/test/pktio_ipc/ipc_common.c
index f693feeb2..128a7c6e1 100644
--- a/platform/linux-generic/test/pktio_ipc/ipc_common.c
+++ b/platform/linux-generic/test/pktio_ipc/ipc_common.c
@@ -1,11 +1,12 @@
-/* Copyright (c) 2015-2018, Linaro Limited
- * All rights reserved.
- *
- * SPDX-License-Identifier:     BSD-3-Clause
+/*  SPDX-License-Identifier: BSD-3-Clause
+ *  Copyright (c) 2015-2018 Linaro Limited
+ *  Copyright (c) 2023 Nokia
  */
 
 #include "ipc_common.h"
 
+/** Start time in seconds */
+int start_time_sec;
 /** Run time in seconds */
 int run_time_sec;
 /** Pid of the master process */
@@ -97,23 +98,28 @@ void parse_args(int argc, char *argv[])
 	int opt;
 	int long_index;
 	static struct option longopts[] = {
-		{"time", required_argument, NULL, 't'},
+		{"start-timeout", required_argument, NULL, 's'},
+		{"run-time", required_argument, NULL, 't'},
 		{"pid", required_argument, NULL, 'p'}, /* master process pid */
 		{"help", no_argument, NULL, 'h'},     /* return 'h' */
 		{NULL, 0, NULL, 0}
 	};
 
+	start_time_sec = 0; /* wait forever if time is 0 */
 	run_time_sec = 0; /* loop forever if time to run is 0 */
 	master_pid = 0;
 
 	while (1) {
-		opt = getopt_long(argc, argv, "+t:p:h",
+		opt = getopt_long(argc, argv, "+s:t:p:h",
 				  longopts, &long_index);
 
 		if (opt == -1)
 			break;	/* No more options */
 
 		switch (opt) {
+		case 's':
+			start_time_sec = atoi(optarg);
+			break;
 		case 't':
 			run_time_sec = atoi(optarg);
 			break;
@@ -151,15 +157,14 @@ void usage(char *progname)
 {
 	printf("\n"
 	       "Usage: %s OPTIONS\n"
-	       "  E.g. -n ipc_name_space %s -t seconds\n"
 	       "\n"
 	       "OpenDataPlane odp-linux ipc test application.\n"
 	       "\n"
-		"Mandatory OPTIONS:\n"
-	       "  -n, --ns           IPC name space ID /dev/shm/odp-<ns>-objname.\n"
 	       "Optional OPTIONS\n"
 	       "  -h, --help           Display help and exit.\n"
-	       "  -t, --time           Time to run in seconds.\n"
-	       "\n", NO_PATH(progname), NO_PATH(progname)
+	       "  -p, --pid            PID of the master process.\n"
+	       "  -t, --run-time       Time to run in seconds.\n"
+	       "  -s, --start-timeout  Maximum time for pktio startup.\n"
+	       "\n", NO_PATH(progname)
 	    );
 }
diff --git a/platform/linux-generic/test/pktio_ipc/ipc_common.h b/platform/linux-generic/test/pktio_ipc/ipc_common.h
index b2b469553..94ec21460 100644
--- a/platform/linux-generic/test/pktio_ipc/ipc_common.h
+++ b/platform/linux-generic/test/pktio_ipc/ipc_common.h
@@ -1,7 +1,6 @@
-/* Copyright (c) 2015-2018, Linaro Limited
- * All rights reserved.
- *
- * SPDX-License-Identifier:     BSD-3-Clause
+/*  SPDX-License-Identifier: BSD-3-Clause
+ *  Copyright (c) 2015-2018 Linaro Limited
+ *  Copyright (c) 2023 Nokia
  */
 
 #define _POSIX_C_SOURCE 200809L
@@ -64,6 +63,9 @@ typedef struct ODP_PACKED {
 	odp_u32be_t magic;
 } pkt_tail_t;
 
+/** Start time in seconds */
+extern int start_time_sec;
+
 /** Run time in seconds */
 extern int run_time_sec;
 
diff --git a/platform/linux-generic/test/pktio_ipc/pktio_ipc1.c b/platform/linux-generic/test/pktio_ipc/pktio_ipc1.c
index 6c71e18da..df7a5ca3f 100644
--- a/platform/linux-generic/test/pktio_ipc/pktio_ipc1.c
+++ b/platform/linux-generic/test/pktio_ipc/pktio_ipc1.c
@@ -1,7 +1,6 @@
-/* Copyright (c) 2015-2018, Linaro Limited
- * All rights reserved.
- *
- * SPDX-License-Identifier:     BSD-3-Clause
+/*  SPDX-License-Identifier: BSD-3-Clause
+ *  Copyright (c) 2015-2018 Linaro Limited
+ *  Copyright (c) 2023 Nokia
  */
 
 #include "ipc_common.h"
@@ -49,17 +48,17 @@ static int pktio_run_loop(odp_pool_t pool)
 	else
 		sprintf(name, TEST_IPC_PKTIO_NAME);
 
-	wait = odp_time_local_from_ns(run_time_sec * ODP_TIME_SEC_IN_NS);
+	wait = odp_time_local_from_ns(start_time_sec * ODP_TIME_SEC_IN_NS);
 	start_cycle = odp_time_local();
 	current_cycle = start_cycle;
 
 	for (;;) {
-		if (run_time_sec) {
+		if (start_time_sec) {
 			cycle = odp_time_local();
 			diff = odp_time_diff(cycle, start_cycle);
 			if (odp_time_cmp(wait, diff) < 0) {
-				printf("timeout exit, run_time_sec %d\n",
-				       run_time_sec);
+				printf("timeout exit 1, start_time_sec %d\n",
+				       start_time_sec);
 				return -1;
 			}
 		}
@@ -83,12 +82,12 @@ static int pktio_run_loop(odp_pool_t pool)
 
 	/* start ipc pktio, i.e. wait until other process connects */
 	for (;;) {
-		if (run_time_sec) {
+		if (start_time_sec) {
 			cycle = odp_time_local();
 			diff = odp_time_diff(cycle, start_cycle);
 			if (odp_time_cmp(wait, diff) < 0) {
-				printf("timeout exit, run_time_sec %d\n",
-				       run_time_sec);
+				printf("timeout exit 2, start_time_sec %d\n",
+				       start_time_sec);
 				goto exit;
 			}
 		}
@@ -102,6 +101,8 @@ static int pktio_run_loop(odp_pool_t pool)
 	}
 
 	/* packets loop */
+	wait = odp_time_local_from_ns(run_time_sec * ODP_TIME_SEC_IN_NS);
+	start_cycle = odp_time_local();
 	for (;;) {
 		int i;
 
diff --git a/platform/linux-generic/test/pktio_ipc/pktio_ipc2.c b/platform/linux-generic/test/pktio_ipc/pktio_ipc2.c
index e6ca5f5e5..fc3b6833a 100644
--- a/platform/linux-generic/test/pktio_ipc/pktio_ipc2.c
+++ b/platform/linux-generic/test/pktio_ipc/pktio_ipc2.c
@@ -1,7 +1,6 @@
-/* Copyright (c) 2015-2018, Linaro Limited
- * All rights reserved.
- *
- * SPDX-License-Identifier:     BSD-3-Clause
+/*  SPDX-License-Identifier: BSD-3-Clause
+ *  Copyright (c) 2015-2018 Linaro Limited
+ *  Copyright (c) 2023 Nokia
  */
 
 /**
@@ -46,17 +45,17 @@ static int ipc_second_process(int master_pid)
 		exit(EXIT_FAILURE);
 	}
 
-	wait = odp_time_local_from_ns(run_time_sec * ODP_TIME_SEC_IN_NS);
+	wait = odp_time_local_from_ns(start_time_sec * ODP_TIME_SEC_IN_NS);
 	start_cycle = odp_time_local();
 
 	for (;;) {
 		/*  exit loop if time specified */
-		if (run_time_sec) {
+		if (start_time_sec) {
 			cycle = odp_time_local();
 			diff = odp_time_diff(cycle, start_cycle);
 			if (odp_time_cmp(wait, diff) < 0) {
-				printf("timeout exit, run_time_sec %d\n",
-				       run_time_sec);
+				printf("timeout exit 1, start_time_sec %d\n",
+				       start_time_sec);
 				goto not_started;
 			}
 		}
@@ -85,12 +84,12 @@ static int ipc_second_process(int master_pid)
 	/* start ipc pktio, i.e. wait until other process connects */
 	for (;;) {
 		/* 1. exit loop if time specified */
-		if (run_time_sec) {
+		if (start_time_sec) {
 			cycle = odp_time_local();
 			diff = odp_time_diff(cycle, start_cycle);
 			if (odp_time_cmp(wait, diff) < 0) {
-				printf("timeout exit, run_time_sec %d\n",
-				       run_time_sec);
+				printf("timeout exit 2, start_time_sec %d\n",
+				       start_time_sec);
 				goto not_started;
 			}
 		}
@@ -103,6 +102,8 @@ static int ipc_second_process(int master_pid)
 		odp_time_wait_ns(50 * ODP_TIME_MSEC_IN_NS);
 	}
 
+	wait = odp_time_local_from_ns(run_time_sec * ODP_TIME_SEC_IN_NS);
+	start_cycle = odp_time_local();
 	for (;;) {
 		/* exit loop if time specified */
 		if (run_time_sec) {
diff --git a/platform/linux-generic/test/pktio_ipc/pktio_ipc_run.sh b/platform/linux-generic/test/pktio_ipc/pktio_ipc_run.sh
index bad2626bd..b181668e8 100755
--- a/platform/linux-generic/test/pktio_ipc/pktio_ipc_run.sh
+++ b/platform/linux-generic/test/pktio_ipc/pktio_ipc_run.sh
@@ -1,9 +1,8 @@
 #!/bin/sh
 #
-# Copyright (c) 2015-2018, Linaro Limited
-# All rights reserved.
-#
-# SPDX-License-Identifier:	BSD-3-Clause
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright (c) 2015-2018 Linaro Limited
+# Copyright (c) 2023 Nokia
 #
 
 # directories where test binary can be found:
@@ -17,31 +16,23 @@ PATH=$(dirname $0):$PATH
 PATH=$(dirname $0)/../../../../platform/linux-generic/test/pktio_ipc:$PATH
 PATH=.:$PATH
 
-RUNTIME1=3
-RUNTIME2=1
-TIMEOUT=3
-if [ "${TEST}" = "coverage" ]; then
-	RUNTIME1=30
-	RUNTIME2=15
-	TIMEOUT=20
-fi
+STARTTIME=30
+RUNTIME=1
 
 run()
 {
 	local ret=0
 
 	echo "==== run pktio_ipc1 then pktio_ipc2 ===="
-	pktio_ipc1${EXEEXT} -t ${RUNTIME1} &
+	pktio_ipc1${EXEEXT} -s ${STARTTIME} -t ${RUNTIME} &
 	IPC_PID=$!
 
-	pktio_ipc2${EXEEXT} -p ${IPC_PID} -t ${RUNTIME2}
+	pktio_ipc2${EXEEXT} -p ${IPC_PID}  -s ${STARTTIME} -t ${RUNTIME}
 	ret=$?
-	# pktio_ipc1 should do clean up and exit just
-	# after pktio_ipc2 exited. If it does not happen
-	# kill him in test.
-	sleep ${TIMEOUT}
+
 	(kill ${IPC_PID} 2>&1 > /dev/null ) > /dev/null
 	if [ $? -eq 0 ]; then
+		wait $IPC_PID
 		echo "pktio_ipc1${EXEEXT} was killed"
 		ls -l /dev/shm/${UID}/odp* 2> /dev/null
 		rm -rf /dev/shm/${UID}/odp-${IPC_PID}* 2>&1 > /dev/null
@@ -58,16 +49,15 @@ run()
 	fi
 
 	echo "==== run pktio_ipc2 then pktio_ipc1 ===="
-	pktio_ipc2${EXEEXT} -t ${RUNTIME1} &
+	pktio_ipc2${EXEEXT}  -s ${STARTTIME} -t ${RUNTIME} &
 	IPC_PID=$!
 
-	pktio_ipc1${EXEEXT} -p ${IPC_PID} -t ${RUNTIME2}
+	pktio_ipc1${EXEEXT} -p ${IPC_PID}  -s ${STARTTIME} -t ${RUNTIME}
 	ret=$?
-	# pktio_ipc2 do not exit on pktio_ipc1 disconnect
-	# wait until it exits cleanly
-	sleep ${TIMEOUT}
+
 	(kill ${IPC_PID} 2>&1 > /dev/null ) > /dev/null
 	if [ $? -eq 0 ]; then
+		wait $IPC_PID
 		echo "pktio_ipc2${EXEEXT} was killed"
 		ls -l /dev/shm/${UID}/odp* 2> /dev/null
 		rm -rf /dev/shm/${UID}/odp-${IPC_PID}* 2>&1 > /dev/null
diff --git a/platform/linux-generic/test/process-mode.conf b/platform/linux-generic/test/process-mode.conf
index 5bfcb9f2f..f4c6f7952 100644
--- a/platform/linux-generic/test/process-mode.conf
+++ b/platform/linux-generic/test/process-mode.conf
@@ -1,6 +1,6 @@
 # Mandatory fields
 odp_implementation = "linux-generic"
-config_file_version = "0.1.27"
+config_file_version = "0.1.28"
 
 # Shared memory options
 shm: {
diff --git a/platform/linux-generic/test/sched-basic.conf b/platform/linux-generic/test/sched-basic.conf
index 1a401298e..8a6d0ac98 100644
--- a/platform/linux-generic/test/sched-basic.conf
+++ b/platform/linux-generic/test/sched-basic.conf
@@ -1,6 +1,6 @@
 # Mandatory fields
 odp_implementation = "linux-generic"
-config_file_version = "0.1.27"
+config_file_version = "0.1.28"
 
 # Test scheduler with an odd spread value and without dynamic load balance
 sched_basic: {
diff --git a/platform/linux-generic/test/stash-custom.conf b/platform/linux-generic/test/stash-custom.conf
index b96c1cf45..6a2496303 100644
--- a/platform/linux-generic/test/stash-custom.conf
+++ b/platform/linux-generic/test/stash-custom.conf
@@ -1,6 +1,6 @@
 # Mandatory fields
 odp_implementation = "linux-generic"
-config_file_version = "0.1.27"
+config_file_version = "0.1.28"
 
 # Test overflow safe stash variant
 stash: {
diff --git a/platform/linux-generic/test/validation/api/ml/.gitignore b/platform/linux-generic/test/validation/api/ml/.gitignore
new file mode 100644
index 000000000..e31f902c4
--- /dev/null
+++ b/platform/linux-generic/test/validation/api/ml/.gitignore
@@ -0,0 +1 @@
+ml_linux
diff --git a/platform/linux-generic/test/validation/api/ml/Makefile.am b/platform/linux-generic/test/validation/api/ml/Makefile.am
new file mode 100644
index 000000000..f4b9e9755
--- /dev/null
+++ b/platform/linux-generic/test/validation/api/ml/Makefile.am
@@ -0,0 +1,34 @@
+include ../Makefile.inc
+
+test_PROGRAMS = ml_linux
+ml_linux_SOURCES = ml_linux.c
+
+EXTRA_DIST = \
+	batch_add_gen.py \
+	batch_add.onnx \
+	gen_models.sh \
+	README.md \
+	requirements.txt \
+	simple_linear_gen.py \
+	simple_linear.onnx
+
+# If building out-of-tree, make check will not copy the scripts and data to the
+# $(builddir) assuming that all commands are run locally. However this prevents
+# running tests on a remote target using LOG_COMPILER.
+# So copy all script and data files explicitly here.
+all-local:
+	if [ "x$(srcdir)" != "x$(builddir)" ]; then \
+		for f in $(EXTRA_DIST); do \
+			if [ -e $(srcdir)/$$f ]; then \
+				mkdir -p $(builddir)/$$(dirname $$f); \
+				cp -f $(srcdir)/$$f $(builddir)/$$f; \
+			fi \
+		done \
+	fi
+
+clean-local:
+	if [ "x$(srcdir)" != "x$(builddir)" ]; then \
+		for f in $(EXTRA_DIST); do \
+			rm -f $(builddir)/$$f; \
+		done \
+	fi
diff --git a/platform/linux-generic/test/validation/api/ml/README.md b/platform/linux-generic/test/validation/api/ml/README.md
new file mode 100644
index 000000000..80ad30e96
--- /dev/null
+++ b/platform/linux-generic/test/validation/api/ml/README.md
@@ -0,0 +1,23 @@
+# How to run ML validation test
+
+Simple onnx models are used to test ML API.
+
+## Generate models
+
+### Install python requirements
+
+```bash
+python3 -m pip install -r <this directory>/requirements.txt
+```
+
+### Generate models for validation tests
+
+```bash
+<this directory>/gen_models.sh
+```
+
+## Run ML validation tests
+
+```bash
+<this directory>/ml_linux
+```
diff --git a/platform/linux-generic/test/validation/api/ml/batch_add.onnx b/platform/linux-generic/test/validation/api/ml/batch_add.onnx
new file mode 100644
index 000000000..43485f463
--- /dev/null
+++ b/platform/linux-generic/test/validation/api/ml/batch_add.onnx
diff --git a/platform/linux-generic/test/validation/api/ml/batch_add_gen.py b/platform/linux-generic/test/validation/api/ml/batch_add_gen.py
new file mode 100644
index 000000000..33515bd2f
--- /dev/null
+++ b/platform/linux-generic/test/validation/api/ml/batch_add_gen.py
@@ -0,0 +1,32 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright (c) 2023 Nokia
+#
+
+import onnx
+from onnx import helper
+from onnx import TensorProto
+
+graph = helper.make_graph(
+    [  # nodes
+        helper.make_node("Add", ["x1", "x2"], ["y"], "Batch Add"),
+    ],
+    "Batch Add",  # name
+    [  # inputs
+        helper.make_tensor_value_info('x1', TensorProto.DOUBLE, ["c", 3]),
+        helper.make_tensor_value_info('x2', TensorProto.DOUBLE, ["c", 3]),
+    ],
+    [  # outputs
+        helper.make_tensor_value_info('y', TensorProto.DOUBLE, ["c", 3]),
+    ]
+)
+
+model = helper.make_model(
+    graph,
+    opset_imports=[helper.make_opsetid("", 14)],
+    producer_name='ODP validation tests',
+    model_version=1,
+    doc_string="y = x1 + x2",
+    ir_version = 8
+)
+
+onnx.save(model, 'batch_add.onnx')
diff --git a/platform/linux-generic/test/validation/api/ml/gen_models.sh b/platform/linux-generic/test/validation/api/ml/gen_models.sh
new file mode 100755
index 000000000..d88f3c432
--- /dev/null
+++ b/platform/linux-generic/test/validation/api/ml/gen_models.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+#
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright (c) 2023 Nokia
+#
+
+set -e
+
+# cd to the directory where this script is in
+cd "$( dirname "${BASH_SOURCE[0]}" )"
+
+python3 simple_linear_gen.py
+
+python3 batch_add_gen.py
diff --git a/platform/linux-generic/test/validation/api/ml/ml_linux.c b/platform/linux-generic/test/validation/api/ml/ml_linux.c
new file mode 100644
index 000000000..28e18fbb5
--- /dev/null
+++ b/platform/linux-generic/test/validation/api/ml/ml_linux.c
@@ -0,0 +1,1167 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2023 Nokia
+ */
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <unistd.h>
+#include <string.h>
+#include <libgen.h>
+#include <odp_api.h>
+#include <odp/helper/odph_api.h>
+#include "odp_cunit_common.h"
+
+#define TIMEOUT		5
+#define MODEL_NAME	"Test"
+#define NUM_INPUTS	1
+#define NUM_OUTPUTS	1
+#define RUN_NUM		2
+#define BUF_LEN		256
+#define CONFIG_MAX_MODEL_SIZE	500
+
+#define COMPL_POOL_NAME "ML compl pool"
+#define NUM_COMPL	10
+
+/**
+ * About model simple_linear.onnx being tested in this suite
+ *
+ * Model info:
+ *	Version: 1
+ *	Inputs: name: x, type: int32, shape: [1]
+ *	Outputs: name: y, type: int32, shape: [1]
+ *
+ * The model is of form y = 3 * x + 4
+ * Thus when x = 5, the output y should be 19.
+ */
+typedef struct global_t {
+	int disabled;
+	odp_ml_capability_t ml_capa;
+	odp_ml_config_t ml_config;
+	odp_ml_model_param_t model_param;
+	odp_ml_model_t ml_model;
+	odp_pool_t compl_pool;
+	odp_queue_t queue;
+	odp_ml_data_t data;
+	odp_ml_data_seg_t input_seg;
+	odp_ml_data_seg_t output_seg;
+	odp_ml_run_param_t run_param;
+	uint64_t wait_ns;
+	int32_t x;
+	int32_t y;
+	int32_t y_expected;
+
+} global_t;
+
+static global_t global;
+
+static int fill_model_param(const char *model_name, odp_ml_model_param_t *model_param)
+{
+	size_t size;
+	char *pos;
+	char *exe_dir;
+	size_t exe_dir_len;
+	FILE *model_file;
+	char exe_path[BUF_LEN];
+	ssize_t exe_path_len;
+	char model_path[BUF_LEN];
+
+	/* Model file is placed in the same directory as the executable ml_linux */
+	exe_path_len = readlink("/proc/self/exe", exe_path, BUF_LEN - 1);
+	if (exe_path_len != -1) {
+		exe_path[exe_path_len] = '\0';
+
+		pos = strstr(exe_path, ".libs");
+		if (pos)
+			*(pos + 5) = '\0';
+
+		exe_dir = dirname(exe_path);
+		exe_dir_len = strlen(exe_dir);
+
+		memcpy(model_path, exe_dir, exe_dir_len);
+		model_path[exe_dir_len] = '/';
+		model_path[exe_dir_len + 1] = '\0';
+
+		strncat(model_path, model_name, BUF_LEN - strlen(model_path) - 1);
+		ODPH_DBG("model_path: %s\n", model_path);
+		model_file = fopen(model_path, "rb");
+	} else { /* Can't get executable path, try to find model file at current dir*/
+		model_file = fopen(model_name, "rb");
+	}
+
+	if (model_file == NULL) {
+		perror("Failed to open model file");
+		return -1;
+	}
+
+	/* Get the model file size in bytes */
+	fseek(model_file, 0, SEEK_END);
+	model_param->size = ftell(model_file);
+	rewind(model_file);
+
+	model_param->model = malloc(model_param->size);
+	if (!model_param->model) {
+		ODPH_ERR("\n\nMemory allocation failed\n");
+		fclose(model_file);
+		return -1;
+	}
+	size = fread(model_param->model, model_param->size, 1, model_file);
+
+	fclose(model_file);
+	if (size != 1) {
+		ODPH_ERR("\n\nRead model file failed\n");
+		return -1;
+	}
+
+	model_param->max_compl_id = 0;
+
+	return 0;
+}
+
+static int ml_suite_init(void)
+{
+	odp_ml_capability_t *ml_capa = &global.ml_capa;
+	odp_queue_param_t queue_param;
+	odp_ml_compl_pool_param_t ml_pool_param;
+
+	memset(&global, 0, sizeof(global_t));
+	global.queue = ODP_QUEUE_INVALID;
+	global.compl_pool = ODP_POOL_INVALID;
+
+	if (odp_ml_capability(ml_capa)) {
+		ODPH_ERR("ML capability failed\n");
+		return -1;
+	}
+
+	if (ml_capa->max_models == 0) {
+		global.disabled = 1;
+		ODPH_DBG("ML test disabled\n");
+		return 0;
+	}
+
+	/* Configure ML */
+	odp_ml_config_init(&global.ml_config);
+	global.ml_config.max_models_created = ml_capa->max_models;
+	global.ml_config.max_models_loaded = ml_capa->max_models_loaded;
+	global.ml_config.max_model_size = CONFIG_MAX_MODEL_SIZE;
+
+	if (ml_capa->load.compl_mode_mask & ODP_ML_COMPL_MODE_SYNC)
+		global.ml_config.load_mode_mask |= ODP_ML_COMPL_MODE_SYNC;
+
+	if (ml_capa->load.compl_mode_mask & ODP_ML_COMPL_MODE_POLL)
+		global.ml_config.load_mode_mask |= ODP_ML_COMPL_MODE_POLL;
+
+	if (ml_capa->load.compl_mode_mask & ODP_ML_COMPL_MODE_EVENT)
+		global.ml_config.load_mode_mask |= ODP_ML_COMPL_MODE_EVENT;
+
+	if (ml_capa->run.compl_mode_mask & ODP_ML_COMPL_MODE_SYNC)
+		global.ml_config.run_mode_mask |= ODP_ML_COMPL_MODE_SYNC;
+
+	if (ml_capa->run.compl_mode_mask & ODP_ML_COMPL_MODE_POLL)
+		global.ml_config.run_mode_mask |= ODP_ML_COMPL_MODE_POLL;
+
+	if (ml_capa->run.compl_mode_mask & ODP_ML_COMPL_MODE_EVENT)
+		global.ml_config.run_mode_mask |= ODP_ML_COMPL_MODE_EVENT;
+
+	if (odp_ml_config(&global.ml_config)) {
+		ODPH_ERR("\n\nConfiguring ML failed\n");
+		return -1;
+	}
+
+	global.x = 5;
+	global.wait_ns = 500 * ODP_TIME_MSEC_IN_NS;
+	global.y_expected = 19; /* y = 3 * x + 4 = 3 * 5 + 4 = 19 */
+
+	/* Prepare data for running model inference */
+	odp_ml_run_param_init(&global.run_param);
+
+	global.data.num_input_seg = NUM_INPUTS;
+	global.data.input_seg = &global.input_seg;
+	global.input_seg.size = sizeof(int32_t);
+	global.input_seg.addr = &global.x;
+
+	global.data.num_output_seg = NUM_OUTPUTS;
+	global.data.output_seg = &global.output_seg;
+	global.output_seg.size = sizeof(int32_t);
+	global.output_seg.addr = &global.y;
+
+	if (fill_model_param("simple_linear.onnx", &global.model_param))
+		return -1;
+
+	/* Create ML model */
+	global.ml_model = odp_ml_model_create(MODEL_NAME, &global.model_param);
+	if (global.ml_model == ODP_ML_MODEL_INVALID) {
+		ODPH_ERR("Create ML model failed\n");
+		goto error;
+	}
+
+	/* Asynchronous mode with event completion is not supported */
+	if (!((ml_capa->load.compl_mode_mask & ODP_ML_COMPL_MODE_EVENT) ||
+	      (ml_capa->run.compl_mode_mask & ODP_ML_COMPL_MODE_EVENT)))
+		return 0;
+
+	/* Create a queue for sending ML completion event to */
+	odp_queue_param_init(&queue_param);
+	queue_param.type        = ODP_QUEUE_TYPE_SCHED;
+	queue_param.sched.sync  = ODP_SCHED_SYNC_PARALLEL;
+	queue_param.sched.prio  = odp_schedule_default_prio();
+	queue_param.sched.group = ODP_SCHED_GROUP_ALL;
+
+	global.queue = odp_queue_create("ML compl queue", &queue_param);
+	if (global.queue == ODP_QUEUE_INVALID) {
+		ODPH_ERR("Queue create failed\n");
+		goto error;
+	}
+
+	/* Create an ML job completion pool */
+	if (ml_capa->pool.max_num < NUM_COMPL) {
+		ODPH_ERR("Too small ML compl pool %u\n", ml_capa->pool.max_num);
+		goto error;
+	}
+
+	odp_ml_compl_pool_param_init(&ml_pool_param);
+	ml_pool_param.num = NUM_COMPL;
+
+	global.compl_pool = odp_ml_compl_pool_create(COMPL_POOL_NAME, &ml_pool_param);
+	if (global.compl_pool == ODP_POOL_INVALID) {
+		ODPH_ERR("Create ML completion pool failed\n");
+		goto error;
+	}
+
+	return 0;
+
+error:
+	free(global.model_param.model);
+	return -1;
+}
+
+static int ml_suite_term(void)
+{
+	if (global.compl_pool != ODP_POOL_INVALID &&
+	    odp_pool_destroy(global.compl_pool)) {
+		ODPH_ERR("Completion pool destroy failed\n");
+		return -1;
+	}
+
+	if (global.ml_model && odp_ml_model_destroy(global.ml_model)) {
+		ODPH_ERR("Destroy ML model failed\n");
+		return -1;
+	}
+
+	if (global.queue != ODP_QUEUE_INVALID &&
+	    odp_queue_destroy(global.queue)) {
+		ODPH_ERR("Destroy ML queue failed\n");
+		return -1;
+	}
+
+	free(global.model_param.model);
+
+	return 0;
+}
+
+static int check_ml_support(void)
+{
+	if (global.disabled)
+		return ODP_TEST_INACTIVE;
+
+	return ODP_TEST_ACTIVE;
+}
+
+static int check_load_sync(void)
+{
+	if (global.disabled)
+		return ODP_TEST_INACTIVE;
+
+	if (global.ml_config.load_mode_mask & ODP_ML_COMPL_MODE_SYNC)
+		return ODP_TEST_ACTIVE;
+
+	return ODP_TEST_INACTIVE;
+}
+
+static int check_load_poll(void)
+{
+	if (global.disabled)
+		return ODP_TEST_INACTIVE;
+
+	if (global.ml_config.load_mode_mask & ODP_ML_COMPL_MODE_POLL)
+		return ODP_TEST_ACTIVE;
+
+	return ODP_TEST_INACTIVE;
+}
+
+static int check_load_event(void)
+{
+	if (global.disabled)
+		return ODP_TEST_INACTIVE;
+
+	if (global.ml_config.load_mode_mask & ODP_ML_COMPL_MODE_EVENT)
+		return ODP_TEST_ACTIVE;
+
+	return ODP_TEST_INACTIVE;
+}
+
+static int check_run_sync(void)
+{
+	if (global.disabled)
+		return ODP_TEST_INACTIVE;
+
+	/* Model run test uses synchronous load */
+	if ((global.ml_config.run_mode_mask & ODP_ML_COMPL_MODE_SYNC) &&
+	    (global.ml_config.load_mode_mask & ODP_ML_COMPL_MODE_SYNC))
+		return ODP_TEST_ACTIVE;
+
+	return ODP_TEST_INACTIVE;
+}
+
+static int check_run_poll(void)
+{
+	if (global.disabled)
+		return ODP_TEST_INACTIVE;
+
+	/* Poll mode model run test uses synchronous load */
+	if ((global.ml_config.run_mode_mask & ODP_ML_COMPL_MODE_POLL) &&
+	    (global.ml_config.load_mode_mask & ODP_ML_COMPL_MODE_SYNC))
+		return ODP_TEST_ACTIVE;
+
+	return ODP_TEST_INACTIVE;
+}
+
+static int check_run_event(void)
+{
+	if (global.disabled)
+		return ODP_TEST_INACTIVE;
+
+	/* Poll mode model run test uses synchronous load */
+	if ((global.ml_config.run_mode_mask & ODP_ML_COMPL_MODE_EVENT) &&
+	    (global.ml_config.load_mode_mask & ODP_ML_COMPL_MODE_SYNC))
+		return ODP_TEST_ACTIVE;
+
+	return ODP_TEST_INACTIVE;
+}
+
+static int check_run_poll_event(void)
+{
+	if (global.disabled)
+		return ODP_TEST_INACTIVE;
+
+	/* test_ml_run_start_multi uses synchronous load, poll mode and event mode run */
+	if ((global.ml_config.run_mode_mask & ODP_ML_COMPL_MODE_EVENT) &&
+	    (global.ml_config.run_mode_mask & ODP_ML_COMPL_MODE_POLL) &&
+	    (global.ml_config.load_mode_mask & ODP_ML_COMPL_MODE_SYNC))
+		return ODP_TEST_ACTIVE;
+
+	return ODP_TEST_INACTIVE;
+}
+
+static void test_ml_debug(void)
+{
+	uint64_t u64;
+
+	u64 = odp_ml_model_to_u64(global.ml_model);
+	CU_ASSERT(u64 != odp_ml_model_to_u64(ODP_ML_MODEL_INVALID));
+	printf("\n    ML model handle: 0x%" PRIx64 "\n", u64);
+
+	odp_ml_model_print(global.ml_model);
+}
+
+static void test_ml_model_create(void)
+{
+	uint32_t i;
+	/* One for global.ml_model */
+	uint32_t max_models = global.ml_config.max_models_created - 1;
+	odp_ml_model_t models[max_models];
+
+	for (i = 0; i < max_models; i++) {
+		models[i] = odp_ml_model_create(NULL, &global.model_param);
+
+		if (models[i] == ODP_ML_MODEL_INVALID) {
+			ODPH_ERR("ML model create failed: %u / %u\n", i, max_models);
+			break;
+		}
+	}
+
+	CU_ASSERT(i == max_models);
+	max_models = i;
+
+	/* Destroy valid models */
+	for (i = 0; i < max_models; i++)
+		CU_ASSERT_FATAL(odp_ml_model_destroy(models[i]) == 0);
+}
+
+static void test_ml_model_lookup(void)
+{
+	odp_ml_model_t model2;
+	odp_ml_model_t model_lookup;
+
+	/* Look up model with the same name, should find one with equal handle */
+	model_lookup = odp_ml_model_lookup(MODEL_NAME);
+	CU_ASSERT_FATAL(model_lookup != ODP_ML_MODEL_INVALID);
+	CU_ASSERT(odp_ml_model_to_u64(global.ml_model) == odp_ml_model_to_u64(model_lookup));
+
+	/* Look up model with a different name, should return invalid handle */
+	model_lookup = odp_ml_model_lookup("diff");
+	CU_ASSERT_FATAL(model_lookup == ODP_ML_MODEL_INVALID);
+
+	model2 = odp_ml_model_create(MODEL_NAME, &global.model_param);
+	CU_ASSERT_FATAL(model2 != ODP_ML_MODEL_INVALID);
+	CU_ASSERT(odp_ml_model_to_u64(global.ml_model) != odp_ml_model_to_u64(model2));
+
+	model_lookup = odp_ml_model_lookup(MODEL_NAME);
+	CU_ASSERT(odp_ml_model_to_u64(model_lookup) == odp_ml_model_to_u64(global.ml_model) ||
+		  odp_ml_model_to_u64(model_lookup) == odp_ml_model_to_u64(model2));
+
+	CU_ASSERT(odp_ml_model_destroy(model2) == 0);
+}
+
+static void test_ml_model_info(void)
+{
+	int ret;
+	uint32_t num_ret;
+	odp_ml_model_info_t ml_info;
+	odp_ml_input_info_t input_info[2];
+	odp_ml_output_info_t output_info[2];
+
+	/* Verify model info about global.ml_model, namely, simple_linear.onnx */
+	memset(&ml_info, 0x88, sizeof(odp_ml_model_info_t));
+	ret = odp_ml_model_info(global.ml_model, &ml_info);
+	CU_ASSERT(ret == 0);
+	CU_ASSERT(!strcmp(ml_info.name, MODEL_NAME));
+	CU_ASSERT(ml_info.model_version == 1);
+	CU_ASSERT(ml_info.num_inputs == NUM_INPUTS);
+	CU_ASSERT(ml_info.num_outputs == NUM_OUTPUTS);
+
+	num_ret = odp_ml_model_input_info(global.ml_model, input_info, NUM_INPUTS);
+	CU_ASSERT(num_ret == NUM_INPUTS);
+	CU_ASSERT(!strcmp(input_info[0].name, "x"));
+	CU_ASSERT(input_info[0].shape.num_dim == 1);
+	CU_ASSERT(input_info[0].shape.dim[0] == 1);
+	CU_ASSERT((int)input_info[0].data_type == ODP_ML_DATA_TYPE_INT32);
+
+	/* When num is 0, return normally, and input_info is ignored */
+	num_ret = odp_ml_model_input_info(global.ml_model, input_info, 0);
+	CU_ASSERT(num_ret == NUM_INPUTS);
+
+	/* When num is bigger than actual number of inputs, extra input_info is left untouched */
+	input_info[1].data_type = (odp_ml_data_type_t)-1;
+	num_ret = odp_ml_model_input_info(global.ml_model, input_info, NUM_INPUTS + 1);
+	CU_ASSERT(num_ret == NUM_INPUTS);
+	CU_ASSERT(!strcmp(input_info[0].name, "x"));
+	CU_ASSERT(input_info[0].shape.num_dim == 1);
+	CU_ASSERT(input_info[0].shape.dim[0] == 1);
+	CU_ASSERT((int)input_info[0].data_type == ODP_ML_DATA_TYPE_INT32);
+	/* input_info[1] is left untouched */
+	CU_ASSERT(input_info[1].data_type == (odp_ml_data_type_t)-1);
+
+	num_ret = odp_ml_model_output_info(global.ml_model, output_info, NUM_OUTPUTS);
+	CU_ASSERT(num_ret == NUM_OUTPUTS);
+	CU_ASSERT(!strcmp(output_info[0].name, "y"));
+	CU_ASSERT(output_info[0].shape.num_dim == 1);
+	CU_ASSERT(output_info[0].shape.dim[0] == 1);
+	CU_ASSERT((int)output_info[0].data_type == ODP_ML_DATA_TYPE_INT32);
+
+	/* When num is 0, return normally, and input_info is ignored */
+	num_ret = odp_ml_model_output_info(global.ml_model, output_info, 0);
+	CU_ASSERT(num_ret == NUM_OUTPUTS);
+
+	/* When num is bigger than actual number of inputs, extra output_info is left untouched */
+	num_ret = odp_ml_model_output_info(global.ml_model, output_info, NUM_OUTPUTS + 1);
+	output_info[1].shape.num_dim = 98876;
+	CU_ASSERT(num_ret == NUM_OUTPUTS);
+	CU_ASSERT(!strcmp(output_info[0].name, "y"));
+	CU_ASSERT(output_info[0].shape.num_dim == 1);
+	CU_ASSERT(output_info[0].shape.dim[0] == 1);
+	CU_ASSERT((int)output_info[0].data_type == ODP_ML_DATA_TYPE_INT32);
+	/* output_info[1] is left untouched */
+	CU_ASSERT(output_info[1].shape.num_dim == 98876);
+}
+
+static void test_ml_model_load(void)
+{
+	int ret;
+	odp_ml_model_t test_model;
+	odp_ml_load_result_t result;
+
+	test_model = odp_ml_model_create(NULL, &global.model_param);
+	CU_ASSERT_FATAL(test_model != ODP_ML_MODEL_INVALID);
+
+	ret = odp_ml_model_load(test_model, &result);
+	CU_ASSERT(ret == 0);
+	CU_ASSERT(result.error_code == 0);
+
+	ret = odp_ml_model_unload(test_model, NULL);
+	CU_ASSERT(ret == 0);
+
+	CU_ASSERT(odp_ml_model_destroy(test_model) == 0);
+}
+
+/* Test asynchronous model loading in ODP_ML_COMPL_MODE_POLL mode */
+static void test_ml_model_load_async_poll(void)
+{
+	int ret;
+	odp_ml_load_result_t result;
+	odp_ml_compl_param_t compl_param;
+	int dummy = 6;
+	void *user_ptr = &dummy;
+	uint64_t wait_ns = 500 * ODP_TIME_MSEC_IN_NS;
+
+	memset(&result, 0, sizeof(result));
+	odp_ml_compl_param_init(&compl_param);
+	compl_param.mode = ODP_ML_COMPL_MODE_POLL;
+	compl_param.compl_id = 0;
+	compl_param.user_ptr = user_ptr;
+
+	ret = odp_ml_model_load_start(global.ml_model, &compl_param);
+	CU_ASSERT_FATAL(ret == 0);
+
+	/* When odp_ml_model_load_start() succeeded, continue to check completion status */
+	for (int i = 0; i < TIMEOUT; i++) {
+		ret = odp_ml_model_load_status(global.ml_model, 0, &result);
+		if (ret)
+			break;
+
+		/* ret = 0 meaning run has not finished, continue to check status */
+		odp_time_wait_ns(wait_ns);
+	}
+
+	CU_ASSERT(ret > 0);
+	CU_ASSERT(result.error_code == 0);
+	CU_ASSERT(result.user_ptr == user_ptr);
+	/* odp_ml_model_load does not modify data in user_ptr */
+	if (result.user_ptr)
+		CU_ASSERT(*(int *)result.user_ptr == dummy);
+
+	ret = odp_ml_model_unload_start(global.ml_model, &compl_param);
+	CU_ASSERT_FATAL(ret == 0);
+
+	/* When odp_ml_model_unload_start() succeeded, continue to check completion
+	 * status */
+	for (int i = 0; i < TIMEOUT; i++) {
+		ret = odp_ml_model_unload_status(global.ml_model, 0, &result);
+		if (ret)
+			break;
+
+		/* ret = 0 meaning run has not finished, continue to check status */
+		odp_time_wait_ns(wait_ns);
+	}
+
+	CU_ASSERT_FATAL(ret > 0);
+	CU_ASSERT(result.error_code == 0);
+	CU_ASSERT(result.user_ptr == user_ptr);
+
+	/* odp_ml_model_unload does not modify data in user_ptr */
+	if (result.user_ptr)
+		CU_ASSERT(*(int *)result.user_ptr == dummy);
+}
+
+static int
+get_result_from_ml_compl_event(odp_ml_load_result_t *load_result, odp_ml_run_result_t *run_result)
+{
+	int ret;
+	odp_event_t ev;
+	odp_ml_compl_t compl;
+	odp_event_type_t ev_type;
+	odp_queue_t from_queue = ODP_QUEUE_INVALID;
+	uint64_t sched_wait = odp_schedule_wait_time(global.wait_ns);
+
+	/* Run event scheduler to find the ml completion event */
+	for (int i = 0; i < TIMEOUT; i++) {
+		ev = odp_schedule(&from_queue, sched_wait);
+		if (ev != ODP_EVENT_INVALID)
+			break;
+	}
+
+	CU_ASSERT(ev != ODP_EVENT_INVALID);
+	if (ev == ODP_EVENT_INVALID) {
+		ODPH_ERR("Timeout while waiting for completion event\n");
+		return -1;
+	}
+
+	ev_type = odp_event_type(ev);
+	CU_ASSERT(from_queue == global.queue);
+	CU_ASSERT(ev_type == ODP_EVENT_ML_COMPL);
+	if (from_queue != global.queue || ev_type != ODP_EVENT_ML_COMPL) {
+		odp_event_free(ev);
+		ODPH_ERR("Received unexpected event while waiting for completion\n");
+		return -1;
+	}
+
+	compl = odp_ml_compl_from_event(ev);
+	CU_ASSERT(compl != ODP_ML_COMPL_INVALID);
+
+	if (load_result) {
+		CU_ASSERT(odp_ml_compl_load_result(compl, NULL) == 0);
+		ret = odp_ml_compl_load_result(compl, load_result);
+	} else {
+		CU_ASSERT(odp_ml_compl_run_result(compl, NULL) == 0);
+		ret = odp_ml_compl_run_result(compl, run_result);
+	}
+
+	CU_ASSERT(ret == 0);
+	odp_ml_compl_free(compl);
+
+	return ret;
+}
+
+/* Test asynchronous model loading in ODP_ML_COMPL_MODE_EVENT mode */
+static void test_ml_model_load_async_event(void)
+{
+	int ret;
+	odp_ml_compl_t compl;
+	odp_ml_load_result_t result;
+	odp_ml_compl_param_t compl_param;
+	int dummy = 6;
+	void *user_ptr = &dummy;
+
+	compl = odp_ml_compl_alloc(global.compl_pool);
+	CU_ASSERT_FATAL(compl != ODP_ML_COMPL_INVALID);
+
+	odp_ml_compl_param_init(&compl_param);
+	compl_param.mode = ODP_ML_COMPL_MODE_EVENT;
+	compl_param.event = odp_ml_compl_to_event(compl);
+	compl_param.queue = global.queue;
+	compl_param.user_ptr = user_ptr;
+
+	ret = odp_ml_model_load_start(global.ml_model, &compl_param);
+	CU_ASSERT(ret == 0);
+
+	/* Return when odp_ml_model_load_start() failed */
+	if (ret) {
+		odp_ml_compl_free(compl);
+		ODPH_ERR("ML model odp_ml_model_load_start() failed\n");
+		return;
+	}
+
+	/* Run event scheduler to find the ml completion event and verify it */
+	if (get_result_from_ml_compl_event(&result, NULL))
+		return;
+
+	CU_ASSERT(result.error_code == 0);
+	CU_ASSERT(result.user_ptr == user_ptr);
+
+	/* Model load does not modify data in user_ptr */
+	if (result.user_ptr)
+		CU_ASSERT(*(int *)result.user_ptr == dummy);
+
+	compl = odp_ml_compl_alloc(global.compl_pool);
+	CU_ASSERT(compl != ODP_ML_COMPL_INVALID);
+
+	if (compl == ODP_ML_COMPL_INVALID)
+		return;
+
+	compl_param.event = odp_ml_compl_to_event(compl);
+	ret = odp_ml_model_unload_start(global.ml_model, &compl_param);
+	CU_ASSERT_FATAL(ret == 0);
+
+	/* Run event scheduler to find the ml completion event and verify it */
+	if (get_result_from_ml_compl_event(&result, NULL))
+		return;
+
+	CU_ASSERT(result.error_code == 0);
+	CU_ASSERT(result.user_ptr == user_ptr);
+
+	/* odp_ml_model_unload does not modify data in user_ptr */
+	if (result.user_ptr)
+		CU_ASSERT(*(int *)result.user_ptr == dummy);
+}
+
+/* About model batch_add.onnx being tested in this function
+ *
+ * Model info:
+ *	Version: 1
+ *	Inputs:
+ *		inputs[0]: name: x1, type: double, shape: [c, 3]
+ *		inputs[1]: name: x2, type: double, shape: [c, 3]
+ *	Outputs:
+ *		Outputs[0]: name: y, type: double, shape: [c, 3]
+ *
+ * The model computes element-wise sum of input tensors x1 and x2 and stores them
+ * in y. The first dimension of input and output tensors represent batch size,
+ * thus it must be the same for all tensors here. The dynamic dimension size
+ * in the output tensor here can be deduced from the given batch size, thus no
+ * need for the implementation to fill it.
+ */
+#define NUM_COLUMN 3
+#define MAX_BATCH_SIZE 4
+#define SIZE (NUM_COLUMN * MAX_BATCH_SIZE * sizeof(double))
+static void run_model_batch_add(void)
+{
+	int ret;
+	odp_ml_data_t data;
+	odp_ml_model_t model;
+	odp_ml_data_seg_t input_segs[SIZE * 2];
+	odp_ml_data_seg_t output_segs[SIZE];
+	odp_ml_run_result_t result;
+	odp_ml_run_param_t run_param;
+	odp_ml_model_param_t model_param;
+
+	double y[12];
+	double y_expected[12];
+	uint32_t batch_size = MAX_BATCH_SIZE;
+	double x1[12] = {97, 47, 62, 19, 93, 59, 67, 42, 28, 55, 46, 31};
+	double x2[12] = {81, 56, 27, 4, 69, 12, 91, 98, 23, 90, 52, 64};
+
+	for (int i = 0; i < 12; i++)
+		y_expected[i] = x1[i] + x2[i];
+
+	odp_ml_model_param_init(&model_param);
+
+	odp_ml_data_format_t input_format[2] = {
+		{
+			.data_type = ODP_ML_DATA_TYPE_FP64,
+			.data_type_size = 8,
+			.shape.type = ODP_ML_SHAPE_BATCH,
+			.shape.num_dim = 2,
+			.shape.dim = {ODP_ML_DIM_DYNAMIC, NUM_COLUMN},
+			.shape.dim_max = {MAX_BATCH_SIZE, NUM_COLUMN}
+		},
+		{
+			.data_type = ODP_ML_DATA_TYPE_FP64,
+			.data_type_size = 8,
+			.shape.type = ODP_ML_SHAPE_BATCH,
+			.shape.num_dim = 2,
+			.shape.dim = {ODP_ML_DIM_DYNAMIC, NUM_COLUMN},
+			.shape.dim_max = {MAX_BATCH_SIZE, NUM_COLUMN}
+		}
+	};
+
+	model_param.extra_info.num_inputs = 2;
+	model_param.extra_info.input_format = input_format;
+
+	/* Verify model info about matrix_mul.onnx */
+	if (fill_model_param("batch_add.onnx", &model_param))
+		return;
+
+	model = odp_ml_model_create("batch_add", &model_param);
+	free(model_param.model);
+	CU_ASSERT(model != ODP_ML_MODEL_INVALID);
+	if (!model)
+		return;
+
+	if (odp_ml_model_load(model, NULL)) {
+		CU_ASSERT(odp_ml_model_destroy(model) == 0);
+		return;
+	}
+
+	odp_ml_model_print(model);
+
+	/* Prepare parameters for running inference */
+	odp_ml_run_param_init(&run_param);
+	run_param.result = &result;
+
+	data.num_input_seg = 2;
+	data.input_seg = input_segs;
+	input_segs[0].addr = x1;
+	input_segs[1].addr = x2;
+
+	data.num_output_seg = 1;
+	data.output_seg = output_segs;
+	output_segs[0].size = sizeof(y);
+	output_segs[0].addr = y;
+
+	/* Test different batch sizes */
+	for (int i = 0; i < MAX_BATCH_SIZE; i++) {
+		run_param.batch_size = batch_size;
+		input_segs[0].size = sizeof(double) * NUM_COLUMN * batch_size;
+		input_segs[1].size = sizeof(double) * NUM_COLUMN * batch_size;
+		ret = odp_ml_run(model, &data, &run_param);
+		CU_ASSERT(ret == 1);
+		if (ret != 1)
+			goto fail;
+
+		for (uint32_t j = 0; j < batch_size * NUM_COLUMN; j++)
+			CU_ASSERT(y[j] == y_expected[j]);
+
+		batch_size--;
+	}
+
+	/* Test also without run results */
+	run_param.result = NULL;
+	ret = odp_ml_run(model, &data, &run_param);
+	CU_ASSERT(ret == 1);
+
+	/* Test different segment sizes */
+	batch_size = MAX_BATCH_SIZE;
+	odp_ml_run_param_init(&run_param);
+	run_param.result = &result;
+	run_param.batch_size = batch_size;
+	data.input_seg = input_segs;
+	data.output_seg = output_segs;
+
+	for (int seg_size = SIZE; seg_size > 0; seg_size--) {
+		int num_seg = (SIZE + seg_size - 1) / seg_size;
+
+		if ((uint32_t)num_seg > global.ml_capa.max_segs_per_input ||
+		    (uint32_t)num_seg > global.ml_capa.max_segs_per_output)
+			break;
+
+		data.num_input_seg = num_seg * 2;
+		data.num_output_seg = num_seg;
+
+		for (int seg = 0; seg < num_seg; seg++) {
+			int size = seg_size;
+
+			if (seg == num_seg - 1)
+				size = SIZE - seg * seg_size;
+
+			input_segs[seg].addr = (char *)x1 + seg * seg_size;
+			input_segs[seg].size = size;
+			input_segs[seg + num_seg].addr = (char *)x2 + seg * seg_size;
+			input_segs[seg + num_seg].size = size;
+			output_segs[seg].addr = (char *)y + seg * seg_size;
+			output_segs[seg].size = size;
+		}
+
+		memset(y, 0, sizeof(y));
+		ret = odp_ml_run(model, &data, &run_param);
+		CU_ASSERT(ret == 1);
+		if (ret != 1)
+			goto fail;
+
+		for (uint32_t j = 0; j < batch_size * NUM_COLUMN; j++)
+			CU_ASSERT(y[j] == y_expected[j]);
+	}
+
+fail:
+	CU_ASSERT_FATAL(odp_ml_model_unload(model, NULL) == 0);
+	CU_ASSERT(odp_ml_model_destroy(model) == 0);
+}
+
+static void run_global_ml_model(void)
+{
+	int ret = 0;
+	odp_ml_run_result_t result;
+
+	ret = odp_ml_model_load(global.ml_model, NULL);
+	CU_ASSERT_FATAL(ret == 0);
+
+	global.run_param.result = &result;
+
+	ret = odp_ml_run(global.ml_model, &global.data, &global.run_param);
+	CU_ASSERT(ret == 1);
+	CU_ASSERT(!result.error_code);
+	CU_ASSERT(*(int32_t *)global.output_seg.addr == global.y_expected);
+
+	ret = odp_ml_model_unload(global.ml_model, NULL);
+	CU_ASSERT_FATAL(ret == 0);
+	global.run_param.result = NULL;
+}
+
+static void test_ml_run(void)
+{
+	run_global_ml_model();
+	run_model_batch_add();
+}
+
+static void test_ml_run_multi(void)
+{
+	int ret;
+	int32_t y;
+	int32_t x = 8;
+	int32_t y_expected = 28;
+	odp_ml_data_t data[RUN_NUM];
+	odp_ml_data_seg_t input_seg;
+	odp_ml_data_seg_t output_seg;
+	odp_ml_run_param_t param[RUN_NUM];
+	odp_ml_run_result_t result[RUN_NUM];
+	uint64_t wait_ns = 500 * ODP_TIME_MSEC_IN_NS;
+
+	ret = odp_ml_model_load(global.ml_model, NULL);
+	CU_ASSERT_FATAL(ret == 0);
+
+	param[0] = global.run_param;
+	param[0].result = &result[0];
+	odp_ml_run_param_init(&param[1]);
+	param[1].result = &result[1];
+
+	/* Prepare data for running model inference */
+	data[0] = global.data;
+	data[1].num_input_seg = NUM_INPUTS;
+	data[1].input_seg = &input_seg;
+	input_seg.size = sizeof(int32_t);
+	input_seg.addr = &x;
+
+	data[1].num_output_seg = NUM_OUTPUTS;
+	data[1].output_seg = &output_seg;
+	output_seg.size = sizeof(int32_t);
+	output_seg.addr = &y;
+
+	int num_completed = 0;
+
+	for (int i = 0; i < TIMEOUT; i++) {
+		ret = odp_ml_run_multi(global.ml_model, data + num_completed, param + num_completed,
+				       RUN_NUM - num_completed);
+		CU_ASSERT(ret >= 0);
+		if (ret < 0)
+			break;
+
+		num_completed += ret;
+
+		if (num_completed >= RUN_NUM)
+			break;
+
+		odp_time_wait_ns(wait_ns);
+	}
+
+	CU_ASSERT(num_completed == RUN_NUM);
+	CU_ASSERT(!result[0].error_code);
+	CU_ASSERT(!result[1].error_code);
+	CU_ASSERT(*(int32_t *)global.output_seg.addr == global.y_expected);
+	CU_ASSERT(*(int32_t *)output_seg.addr == y_expected);
+
+	ret = odp_ml_model_unload(global.ml_model, NULL);
+	CU_ASSERT_FATAL(ret == 0);
+}
+
+/* Test asynchronous inference running in ODP_ML_COMPL_MODE_EVENT mode */
+static void test_ml_model_run_async_event(void)
+{
+	int ret;
+	void *user_ptr;
+	odp_ml_compl_t compl;
+	odp_ml_run_result_t result;
+	odp_ml_data_seg_t *outputs;
+	odp_ml_compl_param_t compl_param;
+
+	/* Load model in order to run inference */
+	ret = odp_ml_model_load(global.ml_model, NULL);
+	CU_ASSERT_FATAL(ret == 0);
+
+	compl = odp_ml_compl_alloc(global.compl_pool);
+	CU_ASSERT_FATAL(compl != ODP_ML_COMPL_INVALID);
+
+	odp_ml_compl_param_init(&compl_param);
+	compl_param.mode = ODP_ML_COMPL_MODE_EVENT;
+	compl_param.event = odp_ml_compl_to_event(compl);
+	compl_param.queue = global.queue;
+
+	/* user_ptr structure maintains the output data pointer for output retrieval */
+	user_ptr = &global.output_seg;
+	compl_param.user_ptr = user_ptr;
+
+	memset(global.output_seg.addr, 0, global.output_seg.size);
+	ret = odp_ml_run_start(global.ml_model, &global.data, &compl_param, NULL);
+	CU_ASSERT_FATAL(ret == 1);
+
+	/* Run event scheduler to find the ml completion event and verify it */
+	if (get_result_from_ml_compl_event(NULL, &result))
+		return;
+
+	CU_ASSERT(!result.error_code);
+	CU_ASSERT(result.user_ptr == user_ptr);
+
+	outputs = (odp_ml_data_seg_t *)result.user_ptr;
+	CU_ASSERT(*(int32_t *)outputs[0].addr == global.y_expected);
+
+	/* Unload model */
+	ret = odp_ml_model_unload(global.ml_model, NULL);
+	CU_ASSERT_FATAL(ret == 0);
+}
+
+/* Test asynchronous inference running in ODP_ML_COMPL_MODE_POLL mode */
+static void test_ml_model_run_async_poll(void)
+{
+	int ret;
+	void *user_ptr;
+	odp_ml_run_result_t result;
+	odp_ml_data_seg_t *outputs;
+	odp_ml_compl_param_t compl_param;
+	uint64_t wait_ns = 500 * ODP_TIME_MSEC_IN_NS;
+
+	memset(&result, 0, sizeof(result));
+	/* Load model in order to run inference */
+	ret = odp_ml_model_load(global.ml_model, NULL);
+	CU_ASSERT_FATAL(ret == 0);
+
+	odp_ml_compl_param_init(&compl_param);
+	compl_param.mode = ODP_ML_COMPL_MODE_POLL;
+	compl_param.compl_id = 0;
+
+	/* user_ptr structure maintains the output data pointer for output retrieval */
+	user_ptr = &global.output_seg;
+	compl_param.user_ptr = user_ptr;
+
+	memset(global.output_seg.addr, 0, global.output_seg.size);
+	ret = odp_ml_run_start(global.ml_model, &global.data, &compl_param, NULL);
+	CU_ASSERT_FATAL(ret == 1);
+
+	/* When odp_ml_run_start() succeeded, continue to check completion status */
+	for (int i = 0; i < TIMEOUT; i++) {
+		ret = odp_ml_run_status(global.ml_model, 0, &result);
+		if (ret)
+			break;
+
+		/* ret = 0 meaning run has not finished, continue to check status */
+		odp_time_wait_ns(wait_ns);
+	}
+
+	outputs = (odp_ml_data_seg_t *)result.user_ptr;
+
+	CU_ASSERT(ret > 0);
+	CU_ASSERT(!result.error_code);
+	CU_ASSERT(result.user_ptr == user_ptr);
+	CU_ASSERT(*(int32_t *)outputs[0].addr == global.y_expected);
+
+	/* Unload model */
+	ret = odp_ml_model_unload(global.ml_model, NULL);
+	CU_ASSERT_FATAL(ret == 0);
+}
+
+static void test_ml_run_start_multi(void)
+{
+	int ret;
+	int32_t y;
+	odp_ml_compl_t compl;
+	odp_ml_data_t data[RUN_NUM];
+	odp_ml_data_seg_t input_seg;
+	odp_ml_data_seg_t output_seg;
+	odp_ml_data_seg_t *outputs[RUN_NUM];
+	odp_ml_compl_param_t compl_param[RUN_NUM];
+	odp_ml_run_result_t run_result[RUN_NUM];
+	int32_t x = 5;
+	int32_t y_expected = 19;
+	uint64_t wait_ns = 500 * ODP_TIME_MSEC_IN_NS;
+
+	/* Load model in order to run inference */
+	ret = odp_ml_model_load(global.ml_model, NULL);
+	CU_ASSERT_FATAL(ret == 0);
+
+	compl = odp_ml_compl_alloc(global.compl_pool);
+	CU_ASSERT_FATAL(compl != ODP_ML_COMPL_INVALID);
+
+	/* Prepare data for running model inference */
+	data[0] = global.data;
+
+	data[1].num_input_seg = NUM_INPUTS;
+	data[1].input_seg = &input_seg;
+	input_seg.size = sizeof(int32_t);
+	input_seg.addr = &x;
+
+	data[1].num_output_seg = NUM_OUTPUTS;
+	data[1].output_seg = &output_seg;
+	output_seg.size = sizeof(int32_t);
+	output_seg.addr = &y;
+
+	/* Two completion parameters: one use event mode, another poll mode */
+	odp_ml_compl_param_init(&compl_param[0]);
+	compl_param[0].mode = ODP_ML_COMPL_MODE_EVENT;
+	compl_param[0].event = odp_ml_compl_to_event(compl);
+	compl_param[0].queue = global.queue;
+	/* user_ptr structure maintains the output data pointer for output retrieval */
+	compl_param[0].user_ptr = &global.output_seg;
+
+	odp_ml_compl_param_init(&compl_param[1]);
+	compl_param[1].mode = ODP_ML_COMPL_MODE_POLL;
+	compl_param[1].compl_id = 0;
+	/* user_ptr structure maintains the output data pointer for output retrieval */
+	compl_param[1].user_ptr = &output_seg;
+
+	memset(global.output_seg.addr, 0, sizeof(int32_t));
+
+	int num_completed = 0;
+
+	for (int i = 0; i < TIMEOUT; i++) {
+		ret = odp_ml_run_start_multi(global.ml_model, data + num_completed,
+					     compl_param + num_completed, NULL,
+					     RUN_NUM - num_completed);
+		CU_ASSERT(ret >= 0);
+		if (ret < 0)
+			break;
+
+		num_completed += ret;
+
+		if (num_completed >= RUN_NUM)
+			break;
+
+		odp_time_wait_ns(wait_ns);
+	}
+
+	CU_ASSERT(num_completed == RUN_NUM);
+
+	/* Run event scheduler to find the ml completion event and verify it */
+	if (get_result_from_ml_compl_event(NULL, &run_result[0])) {
+		ret = odp_ml_model_unload(global.ml_model, NULL);
+		return;
+	}
+
+	CU_ASSERT(!run_result[0].error_code);
+	CU_ASSERT(run_result[0].user_ptr == &global.output_seg);
+	outputs[0] = (odp_ml_data_seg_t *)run_result[0].user_ptr;
+	CU_ASSERT(*(int32_t *)outputs[0][0].addr == global.y_expected);
+
+	/* Check completion status for the poll mode */
+	for (int i = 0; i < TIMEOUT; i++) {
+		ret = odp_ml_run_status(global.ml_model, 0, &run_result[1]);
+		if (ret)
+			break;
+
+		/* ret = 0 meaning run has not finished, continue to check status */
+		odp_time_wait_ns(wait_ns);
+	}
+
+	outputs[1] = (odp_ml_data_seg_t *)run_result[1].user_ptr;
+	CU_ASSERT(ret > 0);
+	CU_ASSERT(!run_result[1].error_code);
+	CU_ASSERT(run_result[1].user_ptr == &output_seg);
+	CU_ASSERT(*(int32_t *)outputs[1][0].addr == y_expected);
+
+	/* Unload model */
+	ret = odp_ml_model_unload(global.ml_model, NULL);
+	CU_ASSERT_FATAL(ret == 0);
+}
+
+static void test_ml_model_extra_stat_info(void)
+{
+	int ret;
+
+	ret = odp_ml_model_extra_stat_info(global.ml_model, NULL, 0);
+	CU_ASSERT(ret >= 0);
+}
+
+static void test_ml_model_extra_stats(void)
+{
+	int ret;
+
+	ret = odp_ml_model_extra_stats(global.ml_model, NULL, 0);
+	CU_ASSERT(ret >= 0);
+}
+
+odp_testinfo_t ml_suite[] = {
+	ODP_TEST_INFO_CONDITIONAL(test_ml_debug, check_ml_support),
+	ODP_TEST_INFO_CONDITIONAL(test_ml_model_create, check_ml_support),
+	ODP_TEST_INFO_CONDITIONAL(test_ml_model_lookup, check_ml_support),
+	ODP_TEST_INFO_CONDITIONAL(test_ml_model_info, check_ml_support),
+	ODP_TEST_INFO_CONDITIONAL(test_ml_model_load, check_load_sync),
+	ODP_TEST_INFO_CONDITIONAL(test_ml_model_load_async_poll, check_load_poll),
+	ODP_TEST_INFO_CONDITIONAL(test_ml_model_load_async_event, check_load_event),
+	/* Synchronous load/unload is used load/unload model before/after model run */
+	ODP_TEST_INFO_CONDITIONAL(test_ml_run, check_run_sync),
+	ODP_TEST_INFO_CONDITIONAL(test_ml_run_multi, check_run_sync),
+	ODP_TEST_INFO_CONDITIONAL(test_ml_model_run_async_event, check_run_event),
+	ODP_TEST_INFO_CONDITIONAL(test_ml_model_run_async_poll, check_run_poll),
+	ODP_TEST_INFO_CONDITIONAL(test_ml_run_start_multi, check_run_poll_event),
+	ODP_TEST_INFO_CONDITIONAL(test_ml_model_extra_stat_info, check_ml_support),
+	ODP_TEST_INFO_CONDITIONAL(test_ml_model_extra_stats, check_ml_support),
+	ODP_TEST_INFO_NULL
+};
+
+odp_suiteinfo_t ml_suites[] = {
+	{"ML", ml_suite_init, ml_suite_term, ml_suite},
+	ODP_SUITE_INFO_NULL
+};
+
+int main(int argc, char *argv[])
+{
+	int ret;
+
+	/* parse common options: */
+	if (odp_cunit_parse_options(&argc, argv))
+		return -1;
+
+	ret = odp_cunit_register(ml_suites);
+
+	if (ret == 0)
+		ret = odp_cunit_run();
+
+	return ret;
+}
diff --git a/platform/linux-generic/test/validation/api/ml/requirements.txt b/platform/linux-generic/test/validation/api/ml/requirements.txt
new file mode 100644
index 000000000..2dcba7a3a
--- /dev/null
+++ b/platform/linux-generic/test/validation/api/ml/requirements.txt
@@ -0,0 +1,2 @@
+onnx
+numpy
diff --git a/platform/linux-generic/test/validation/api/ml/simple_linear.onnx b/platform/linux-generic/test/validation/api/ml/simple_linear.onnx
new file mode 100644
index 000000000..45c4b95b9
--- /dev/null
+++ b/platform/linux-generic/test/validation/api/ml/simple_linear.onnx
diff --git a/platform/linux-generic/test/validation/api/ml/simple_linear_gen.py b/platform/linux-generic/test/validation/api/ml/simple_linear_gen.py
new file mode 100644
index 000000000..b3e6124cd
--- /dev/null
+++ b/platform/linux-generic/test/validation/api/ml/simple_linear_gen.py
@@ -0,0 +1,34 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright (c) 2023 Nokia
+#
+
+import onnx
+from onnx import helper
+from onnx import TensorProto
+
+weight = helper.make_tensor(name='w', data_type=TensorProto.INT32, dims=[1], vals=[3])
+w = helper.make_node('Constant', inputs=[], outputs=['w'], name='weight', value=weight)
+
+bias = helper.make_tensor(name='b', data_type=TensorProto.INT32, dims=[1], vals=[4])
+b = helper.make_node('Constant', inputs=[], outputs=['b'], name='bias', value=bias)
+
+# The functional nodes:
+mul = helper.make_node('Mul', inputs=['x', 'w'], outputs=['wx'], name='Mul')
+add = helper.make_node('Add', inputs=['wx', 'b'], outputs=['y'], name='Add')
+
+# Create the graph
+g = helper.make_graph([w, mul, b, add], 'linear',
+    [helper.make_tensor_value_info('x', TensorProto.INT32, [1])],
+    [helper.make_tensor_value_info('y', TensorProto.INT32, [1])]
+)
+
+model = helper.make_model(
+    producer_name='ODP validation tests',
+    model_version=1,
+    doc_string="y = 3x + 4",
+    graph=g,
+    opset_imports=[helper.make_opsetid("", 13)]
+)
+
+# Save the model
+onnx.save(model, 'simple_linear.onnx')
diff --git a/platform/linux-generic/test/validation/api/shmem/shmem_odp1.c b/platform/linux-generic/test/validation/api/shmem/shmem_odp1.c
index 3b4ba819c..98148d6c7 100644
--- a/platform/linux-generic/test/validation/api/shmem/shmem_odp1.c
+++ b/platform/linux-generic/test/validation/api/shmem/shmem_odp1.c
@@ -79,7 +79,7 @@ int main(int argc, char *argv[])
 	int ret;
 
 	/* parse common options: */
-	if (odp_cunit_parse_options(argc, argv))
+	if (odp_cunit_parse_options(&argc, argv))
 		return -1;
 
 	ret = odp_cunit_register(shmem_suites);