diff options
Diffstat (limited to 'platform/linux-generic/odp_ishm.c')
-rw-r--r-- | platform/linux-generic/odp_ishm.c | 2242 |
1 files changed, 2242 insertions, 0 deletions
diff --git a/platform/linux-generic/odp_ishm.c b/platform/linux-generic/odp_ishm.c new file mode 100644 index 000000000..84663078c --- /dev/null +++ b/platform/linux-generic/odp_ishm.c @@ -0,0 +1,2242 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2016-2018 Linaro Limited + * Copyright (c) 2019 Nokia + */ + +/* This file handles the internal shared memory: internal shared memory + * is memory which is sharable by all ODP threads regardless of how the + * ODP thread is implemented (pthread or process) and regardless of fork() + * time. + * Moreover, when reserved with the _ODP_ISHM_SINGLE_VA flag, + * internal shared memory is guaranteed to always be located at the same virtual + * address, i.e. pointers to internal shared memory are fully shareable + * between odp threads (regardless of thread type or fork time) in that case. + * Internal shared memory is mainly meant to be used internally within ODP + * (hence its name), but may also be allocated by odp applications and drivers, + * in the future (through these interfaces). + * To guarantee this full pointer shareability (when reserved with the + * _ODP_ISHM_SINGLE_VA flag) the whole internal shared memory area is reserved + * at global_init time. + * Because all ODP threads (pthreads or processes) are descendants of the ODP + * instantiation process, this address space is inherited by all ODP threads. + * When internal shmem reservation actually occurs, and when reserved with the + * _ODP_ISHM_SINGLE_VA flag, memory is allocated from the pre-reserved single + * VA memory. + * When an internal shared memory block is released, the memory is returned to + * its "pool" of pre-reserved memory (assuming it was allocated from there). The + * memory is not returned back to kernel until odp_term_global(). + * + * This file contains functions to handle the VA area (handling fragmentation + * and defragmentation resulting from different allocs/release) and also + * define the functions to allocate, release and lookup internal shared + * memory: + * _odp_ishm_reserve(), _odp_ishm_free*() and _odp_ishm_lookup*()... + */ +#include <odp_posix_extensions.h> +#include <odp_config_internal.h> +#include <odp_global_data.h> +#include <odp/api/spinlock.h> +#include <odp/api/align.h> +#include <odp/api/system_info.h> +#include <odp/api/debug.h> +#include <odp_init_internal.h> +#include <odp_shm_internal.h> +#include <odp_debug_internal.h> +#include <odp_fdserver_internal.h> +#include <odp_shm_internal.h> +#include <odp_ishmphy_internal.h> +#include <odp_ishmpool_internal.h> +#include <odp_libconfig_internal.h> +#include <odp_string_internal.h> +#include <stdlib.h> +#include <stdio.h> +#include <unistd.h> +#include <string.h> +#include <errno.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/syscall.h> +#include <fcntl.h> +#include <sys/types.h> +#include <inttypes.h> +#include <sys/wait.h> +#include <libgen.h> +#include <sys/types.h> +#include <dirent.h> + +/* + * Maximum number of internal shared memory blocks. + * + * This is the number of separate ISHM areas that can be reserved concurrently + * (Note that freeing such blocks may take time, or possibly never happen + * if some of the block owners never procsync() after free). This number + * should take that into account) + */ +#define ISHM_MAX_NB_BLOCKS (CONFIG_INTERNAL_SHM_BLOCKS + CONFIG_SHM_BLOCKS) + +/* + * Maximum internal shared memory block name length in chars + * probably taking the same number as SHM name size make sense at this stage + */ +#define ISHM_NAME_MAXLEN 128 + +/* + * Linux underlying file name: <directory>/odp-<odp_pid>-ishm-<name> + * The <name> part may be replaced by a sequence number if no specific + * name is given at reserve time + * <directory> is either /dev/shm or the hugepagefs mount point for default + * size. + * (searched at init time) + */ +#define ISHM_FILENAME_MAXLEN (ISHM_NAME_MAXLEN + 64) +#define ISHM_FILENAME_FORMAT "%s/odp-%d-ishm-%s" +#define ISHM_FILENAME_NORMAL_PAGE_DIR "/dev/shm" +#define _ODP_FILES_FMT "odp-%d-" + +/* + * when the memory is to be shared with an external entity (such as another + * ODP instance or an OS process not part of this ODP instance) then a + * export file is created describing the exported memory: this defines the + * location and the filename format of this description file + */ +#define ISHM_EXPTNAME_FORMAT "%s/%s/odp-%d-shm-%s" + +/* + * At worse case the virtual space gets so fragmented that there is + * a unallocated fragment between each allocated fragment: + * In that case, the number of fragments to take care of is twice the + * number of ISHM blocks + 1. + */ +#define ISHM_NB_FRAGMNTS (ISHM_MAX_NB_BLOCKS * 2 + 1) + +/* + * when a memory block is to be exported outside its ODP instance, + * an block 'attribute file' is created in /dev/shm/odp-<pid>-shm-<name>. + * The information given in this file is according to the following: + */ +#define EXPORT_FILE_LINE1_FMT "ODP exported shm block info:" +#define EXPORT_FILE_LINE2_FMT "ishm_blockname: %s" +#define EXPORT_FILE_LINE3_FMT "file: %s" +#define EXPORT_FILE_LINE4_FMT "length: %" PRIu64 +#define EXPORT_FILE_LINE5_FMT "flags: %" PRIu32 +#define EXPORT_FILE_LINE6_FMT "user_length: %" PRIu64 +#define EXPORT_FILE_LINE7_FMT "user_flags: %" PRIu32 +#define EXPORT_FILE_LINE8_FMT "align: %" PRIu32 +#define EXPORT_FILE_LINE9_FMT "offset: %" PRIu64 + +/* + * A fragment describes a piece of the shared virtual address space, + * and is allocated only when allocation is done with the _ODP_ISHM_SINGLE_VA + * flag: + * A fragment is said to be used when it actually does represent some + * portion of the virtual address space, and is said to be unused when + * it does not (so at start, one single fragment is used -describing the + * whole address space as unallocated-, and all others are unused). + * Fragments get used as address space fragmentation increases. + * A fragment is allocated if the piece of address space it + * describes is actually used by a shared memory block. + * Allocated fragments get their block_index set >=0. + */ +typedef struct ishm_fragment { + struct ishm_fragment *prev; /* not used when the fragment is unused */ + struct ishm_fragment *next; + void *start; /* start of segment (VA) */ + uintptr_t len; /* length of segment. multiple of page size */ + int block_index; /* -1 for unallocated fragments */ +} ishm_fragment_t; + +/* + * A block describes a piece of reserved memory: Any successful ishm_reserve() + * will allocate a block. A ishm_reserve() with the _ODP_ISHM_SINGLE_VA flag set + * will allocate both a block and a fragment. + * Blocks contain only global data common to all processes. + */ +typedef enum {UNKNOWN, HUGE, NORMAL, EXTERNAL, CACHED} huge_flag_t; +typedef struct ishm_block { + char name[ISHM_NAME_MAXLEN]; /* name for the ishm block (if any) */ + char filename[ISHM_FILENAME_MAXLEN]; /* name of the .../odp-* file */ + char exptname[ISHM_FILENAME_MAXLEN]; /* name of the export file */ + uint32_t user_flags; /* any flags the user want to remember. */ + uint32_t flags; /* block creation flags. */ + uint32_t external_fd:1; /* block FD was externally provided */ + uint64_t user_len; /* length, as requested at reserve time. */ + uint64_t offset; /* offset from beginning of the fd */ + void *start; /* only valid if _ODP_ISHM_SINGLE_VA is set*/ + uint64_t len; /* length. multiple of page size. 0 if free*/ + ishm_fragment_t *fragment; /* used when _ODP_ISHM_SINGLE_VA is used */ + huge_flag_t huge; /* page type: external means unknown here. */ + uint64_t seq; /* sequence number, incremented on alloc and free */ + uint64_t refcnt;/* number of linux processes mapping this block */ +} ishm_block_t; + +/* + * Table of blocks describing allocated internal shared memory + * This table is visible to every ODP thread (linux process or pthreads). + * (it is allocated shared at odp init time and is therefore inherited by all) + * Table index is used as handle, so it cannot move!. Entry is regarded as + * free when len==0 + */ +typedef struct { + odp_spinlock_t lock; + uint64_t dev_seq; /* used when creating device names */ + /* limit for reserving memory using huge pages */ + uint64_t huge_page_limit; + uint32_t odpthread_cnt; /* number of running ODP threads */ + ishm_block_t block[ISHM_MAX_NB_BLOCKS]; + void *single_va_start; /* start of single VA memory */ + int single_va_fd; /* single VA memory file descriptor */ + odp_bool_t single_va_huge; /* single VA memory from huge pages */ + char single_va_filename[ISHM_FILENAME_MAXLEN]; +} ishm_table_t; +static ishm_table_t *ishm_tbl; + +/* + * Process local table containing the list of (believed) allocated blocks seen + * from the current process. There is one such table per linux process. linux + * threads within a process shares this table. + * The contents within this table may become obsolete when other processes + * reserve/free ishm blocks. This is what the procsync() function + * catches by comparing the block sequence number with the one in this table. + * This table is filled at ishm_reserve and ishm_lookup time. + * Entries are removed at ishm_free or procsync time. + * Note that flags and len are present in this table and seems to be redundant + * with those present in the ishm block table: but this is not fully true: + * When ishm_sync() detects obsolete mappings and tries to remove them, + * the entry in the ishm block table is then obsolete, and the values which are + * found in this table must be used to perform the ummap. + * (and the values in the block tables are needed at lookup time...) + */ +typedef struct { + int thrd_refcnt; /* number of pthreads in this process, really */ + struct { + int block_index; /* entry in the ishm_tbl */ + uint32_t flags; /* flags used at creation time */ + uint64_t seq; + void *start; /* start of block (VA) */ + uint64_t len; /* length of block. multiple of page size */ + int fd; /* file descriptor used for this block */ + } entry[ISHM_MAX_NB_BLOCKS]; + int nb_entries; +} ishm_proctable_t; +static ishm_proctable_t *ishm_proctable; + +/* + * Table of fragments describing the common virtual address space: + * This table is visible to every ODP thread (linux process or pthreads). + * (it is allocated at odp init time and is therefore inherited by all) + */ +typedef struct { + ishm_fragment_t fragment[ISHM_NB_FRAGMNTS]; + ishm_fragment_t *used_fragmnts; /* ordered by increasing start addr */ + ishm_fragment_t *unused_fragmnts; +} ishm_ftable_t; +static ishm_ftable_t *ishm_ftbl; + +struct huge_page_cache { + uint64_t len; + int max_fds; /* maximum amount requested of pre-allocated huge pages */ + int total; /* amount of actually pre-allocated huge pages */ + int idx; /* retrieve fd[idx] to get a free file descriptor */ + int fd[]; /* list of file descriptors */ +}; + +static struct huge_page_cache *hpc; + +#ifndef MAP_ANONYMOUS +#define MAP_ANONYMOUS MAP_ANON +#endif + +/* prototypes: */ +static void procsync(void); + +static int hp_create_file(uint64_t len, const char *filename) +{ + int fd; + int ret; + void *addr; + + if (len <= 0) { + _ODP_ERR("Length is wrong\n"); + return -1; + } + + fd = open(filename, O_RDWR | O_CREAT | O_TRUNC, + S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); + if (fd < 0) { + _ODP_ERR("Could not create cache file %s\n", filename); + return -1; + } + + /* remove file from file system */ + unlink(filename); + + ret = fallocate(fd, 0, 0, len); + if (ret == -1) { + if (errno == ENOTSUP) { + _ODP_DBG("fallocate() not supported\n"); + ret = ftruncate(fd, len); + } + + if (ret == -1) { + _ODP_ERR("memory allocation failed: fd=%d, err=%s.\n", + fd, strerror(errno)); + close(fd); + return -1; + } + } + + /* commit huge page */ + addr = _odp_ishmphy_map(fd, len, 0, 0); + if (addr == NULL) { + /* no more pages available */ + close(fd); + return -1; + } + _odp_ishmphy_unmap(addr, len, 0); + + _ODP_DBG("Created HP cache file %s, fd: %d\n", filename, fd); + + return fd; +} + +static void hp_init(void) +{ + char filename[ISHM_FILENAME_MAXLEN]; + char dir[ISHM_FILENAME_MAXLEN]; + int count; + void *addr; + + if (!_odp_libconfig_lookup_ext_int("shm", NULL, "num_cached_hp", + &count)) { + return; + } + + if (count <= 0) + return; + + _ODP_DBG("Init HP cache with up to %d pages\n", count); + + if (!odp_global_ro.hugepage_info.default_huge_page_dir) { + _ODP_ERR("No huge page dir\n"); + return; + } + + snprintf(dir, ISHM_FILENAME_MAXLEN, "%s/%s", + odp_global_ro.hugepage_info.default_huge_page_dir, + odp_global_ro.uid); + + if (mkdir(dir, 0744) != 0) { + if (errno != EEXIST) { + _ODP_ERR("Failed to create dir: %s\n", strerror(errno)); + return; + } + } + + snprintf(filename, ISHM_FILENAME_MAXLEN, + "%s/odp-%d-ishm_cached", + dir, + odp_global_ro.main_pid); + + addr = mmap(NULL, + sizeof(struct huge_page_cache) + sizeof(int) * count, + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); + if (addr == MAP_FAILED) { + _ODP_ERR("Unable to mmap memory for huge page cache\n."); + return; + } + + hpc = addr; + + hpc->max_fds = count; + hpc->total = 0; + hpc->idx = -1; + hpc->len = odp_sys_huge_page_size(); + + for (int i = 0; i < count; ++i) { + int fd; + + fd = hp_create_file(hpc->len, filename); + if (fd == -1) { + do { + hpc->fd[i++] = -1; + } while (i < count); + break; + } + hpc->total++; + hpc->fd[i] = fd; + } + hpc->idx = hpc->total - 1; + + _ODP_DBG("HP cache has %d huge pages of size 0x%08" PRIx64 "\n", + hpc->total, hpc->len); +} + +static void hp_term(void) +{ + if (NULL == hpc) + return; + + for (int i = 0; i < hpc->total; i++) { + if (hpc->fd[i] != -1) + close(hpc->fd[i]); + } + + hpc->total = 0; + hpc->idx = -1; + hpc->len = 0; +} + +static int hp_get_cached(uint64_t len) +{ + int fd; + + if (hpc == NULL) + return -1; + + if (hpc->idx < 0 || len != hpc->len) + return -1; + + fd = hpc->fd[hpc->idx]; + hpc->fd[hpc->idx--] = -1; + + return fd; +} + +static int hp_put_cached(int fd) +{ + if (hpc == NULL) { + _ODP_ERR("Bad hpc state\n"); + return -1; + } + + if (odp_unlikely((hpc->idx + 1) >= hpc->total)) { + _ODP_ERR("Trying to put more FD than allowed: %d\n", fd); + return -1; + } + + hpc->idx++; + hpc->fd[hpc->idx] = fd; + + return 0; +} + +/* + * Take a piece of the preallocated virtual space to fit "size" bytes. + * (best fit). Size must be rounded up to an integer number of pages size. + * Possibly split the fragment to keep track of remaining space. + * Returns the allocated fragment (best_fragment) and the corresponding address. + * External caller must ensure mutex before the call! + */ +static void *alloc_fragment(uintptr_t size, int block_index, intptr_t align, + ishm_fragment_t **best_fragmnt) +{ + ishm_fragment_t *fragmnt; + *best_fragmnt = NULL; + ishm_fragment_t *rem_fragmnt; + uintptr_t border;/* possible start of new fragment (next alignment) */ + intptr_t left; /* room remaining after, if the segment is allocated */ + uintptr_t remainder = odp_global_ro.shm_max_memory; + + /* + * search for the best bit, i.e. search for the unallocated fragment + * would give less remainder if the new fragment was allocated within + * it: + */ + for (fragmnt = ishm_ftbl->used_fragmnts; + fragmnt; fragmnt = fragmnt->next) { + /* skip allocated segment: */ + if (fragmnt->block_index >= 0) + continue; + /* skip too short segment: */ + border = ((uintptr_t)fragmnt->start + align - 1) & (-align); + left = + ((uintptr_t)fragmnt->start + fragmnt->len) - (border + size); + if (left < 0) + continue; + /* remember best fit: */ + if ((uintptr_t)left < remainder) { + remainder = left; /* best, so far */ + *best_fragmnt = fragmnt; + } + } + + if (!(*best_fragmnt)) { + _ODP_ERR("Out of single VA memory. Try increasing " + "'shm.single_va_size_kb' in ODP config.\n"); + return NULL; + } + + (*best_fragmnt)->block_index = block_index; + border = ((uintptr_t)(*best_fragmnt)->start + align - 1) & (-align); + + /* + * if there is room between previous fragment and new one, (due to + * alignment requirement) then fragment (split) the space between + * the end of the previous fragment and the beginning of the new one: + */ + if (border - (uintptr_t)(*best_fragmnt)->start > 0) { + /* fragment space, i.e. take a new fragment descriptor... */ + rem_fragmnt = ishm_ftbl->unused_fragmnts; + if (!rem_fragmnt) { + _ODP_ERR("unable to get shmem fragment descriptor!\n."); + return NULL; + } + ishm_ftbl->unused_fragmnts = rem_fragmnt->next; + + /* and link it between best_fragmnt->prev and best_fragmnt */ + if ((*best_fragmnt)->prev) + (*best_fragmnt)->prev->next = rem_fragmnt; + else + ishm_ftbl->used_fragmnts = rem_fragmnt; + rem_fragmnt->prev = (*best_fragmnt)->prev; + (*best_fragmnt)->prev = rem_fragmnt; + rem_fragmnt->next = (*best_fragmnt); + + /* update length: rem_fragmnt getting space before border */ + rem_fragmnt->block_index = -1; + rem_fragmnt->start = (*best_fragmnt)->start; + rem_fragmnt->len = border - (uintptr_t)(*best_fragmnt)->start; + (*best_fragmnt)->start = + (void *)((uintptr_t)rem_fragmnt->start + rem_fragmnt->len); + (*best_fragmnt)->len -= rem_fragmnt->len; + } + + /* if this was a perfect fit, i.e. no free space follows, we are done */ + if (remainder == 0) + return (*best_fragmnt)->start; + + /* otherwise, fragment space, i.e. take a new fragment descriptor... */ + rem_fragmnt = ishm_ftbl->unused_fragmnts; + if (!rem_fragmnt) { + _ODP_ERR("unable to get shmem fragment descriptor!\n."); + return (*best_fragmnt)->start; + } + ishm_ftbl->unused_fragmnts = rem_fragmnt->next; + + /* ... double link it... */ + rem_fragmnt->next = (*best_fragmnt)->next; + rem_fragmnt->prev = (*best_fragmnt); + if ((*best_fragmnt)->next) + (*best_fragmnt)->next->prev = rem_fragmnt; + (*best_fragmnt)->next = rem_fragmnt; + + /* ... and keep track of the remainder */ + (*best_fragmnt)->len = size; + rem_fragmnt->len = remainder; + rem_fragmnt->start = (void *)((char *)(*best_fragmnt)->start + size); + rem_fragmnt->block_index = -1; + + return (*best_fragmnt)->start; +} + +/* + * Free a portion of virtual space. + * Possibly defragment, if the freed fragment is adjacent to another + * free virtual fragment. + * External caller must ensure mutex before the call! + */ +static void free_fragment(ishm_fragment_t *fragmnt) +{ + ishm_fragment_t *prev_f; + ishm_fragment_t *next_f; + + /* sanity check */ + if (!fragmnt) + return; + + prev_f = fragmnt->prev; + next_f = fragmnt->next; + + /* free the fragment */ + fragmnt->block_index = -1; + + /* check if the previous fragment is also free: if so, defragment */ + if (prev_f && (prev_f->block_index < 0)) { + fragmnt->start = prev_f->start; + fragmnt->len += prev_f->len; + if (prev_f->prev) { + prev_f->prev->next = fragmnt; + } else { + if (ishm_ftbl->used_fragmnts == prev_f) + ishm_ftbl->used_fragmnts = fragmnt; + else + _ODP_ERR("corrupted fragment list!.\n"); + } + fragmnt->prev = prev_f->prev; + + /* put removed fragment in free list */ + prev_f->prev = NULL; + prev_f->next = ishm_ftbl->unused_fragmnts; + ishm_ftbl->unused_fragmnts = prev_f; + } + + /* check if the next fragment is also free: if so, defragment */ + if (next_f && (next_f->block_index < 0)) { + fragmnt->len += next_f->len; + if (next_f->next) + next_f->next->prev = fragmnt; + fragmnt->next = next_f->next; + + /* put removed fragment in free list */ + next_f->prev = NULL; + next_f->next = ishm_ftbl->unused_fragmnts; + ishm_ftbl->unused_fragmnts = next_f; + } +} + +static char *create_seq_string(char *output, size_t size) +{ + snprintf(output, size, "%08" PRIu64, ishm_tbl->dev_seq++); + + return output; +} + +static int create_export_file(ishm_block_t *new_block, const char *name, + uint64_t len, uint32_t flags, uint32_t align, + odp_bool_t single_va, uint64_t offset) +{ + FILE *export_file; + + snprintf(new_block->exptname, ISHM_FILENAME_MAXLEN, + ISHM_EXPTNAME_FORMAT, + odp_global_ro.shm_dir, + odp_global_ro.uid, + odp_global_ro.main_pid, + name); + export_file = fopen(new_block->exptname, "w"); + if (export_file == NULL) { + _ODP_ERR("open failed: err=%s.\n", strerror(errno)); + new_block->exptname[0] = 0; + return -1; + } + + fprintf(export_file, EXPORT_FILE_LINE1_FMT "\n"); + fprintf(export_file, EXPORT_FILE_LINE2_FMT "\n", new_block->name); + if (single_va) + fprintf(export_file, EXPORT_FILE_LINE3_FMT "\n", + ishm_tbl->single_va_filename); + else + fprintf(export_file, EXPORT_FILE_LINE3_FMT "\n", + new_block->filename); + + fprintf(export_file, EXPORT_FILE_LINE4_FMT "\n", len); + fprintf(export_file, EXPORT_FILE_LINE5_FMT "\n", flags); + fprintf(export_file, EXPORT_FILE_LINE6_FMT "\n", + new_block->user_len); + fprintf(export_file, EXPORT_FILE_LINE7_FMT "\n", + new_block->user_flags); + fprintf(export_file, EXPORT_FILE_LINE8_FMT "\n", align); + fprintf(export_file, EXPORT_FILE_LINE9_FMT "\n", offset); + + fclose(export_file); + + return 0; +} + +/* + * Create file with size len. returns -1 on error + * Creates a file to /dev/shm/odp-<pid>-<sequence_or_name> (for normal pages) + * or /mnt/huge/odp-<pid>-<sequence_or_name> (for huge pages). + * Return the new file descriptor, or -1 on error. + */ +static int create_file(int block_index, huge_flag_t huge, uint64_t len, + uint32_t flags, uint32_t align, odp_bool_t single_va) +{ + char *name; + int fd; + ishm_block_t *new_block = NULL; /* entry in the main block table */ + char seq_string[ISHM_FILENAME_MAXLEN]; /* used to construct filename*/ + char filename[ISHM_FILENAME_MAXLEN]; /* filename in /dev/shm or + * /mnt/huge */ + int oflag = O_RDWR | O_CREAT | O_TRUNC; /* flags for open */ + char dir[ISHM_FILENAME_MAXLEN]; + int ret; + const odp_bool_t use_huge = huge == HUGE; + + /* No ishm_block_t for the master single VA memory file */ + if (single_va) { + name = (char *)(uintptr_t)"single_va"; + } else { + new_block = &ishm_tbl->block[block_index]; + name = new_block->name; + if (!name || !name[0]) + name = create_seq_string(seq_string, + ISHM_FILENAME_MAXLEN); + } + + /* huge dir must be known to create files there!: */ + if (use_huge && !odp_global_ro.hugepage_info.default_huge_page_dir) + return -1; + + if (use_huge) + snprintf(dir, ISHM_FILENAME_MAXLEN, "%s/%s", + odp_global_ro.hugepage_info.default_huge_page_dir, + odp_global_ro.uid); + else + snprintf(dir, ISHM_FILENAME_MAXLEN, "%s/%s", + odp_global_ro.shm_dir, + odp_global_ro.uid); + + snprintf(filename, ISHM_FILENAME_MAXLEN, ISHM_FILENAME_FORMAT, dir, + odp_global_ro.main_pid, name); + + mkdir(dir, 0744); + + fd = open(filename, oflag, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); + if (fd < 0) { + if (!use_huge) + _ODP_ERR("Normal page open failed: file=%s, " + "err=\"%s\"\n", filename, strerror(errno)); + return -1; + } + + ret = fallocate(fd, 0, 0, len); + if (ret == -1) { + if (errno == ENOTSUP) { + _ODP_DBG("fallocate() not supported\n"); + ret = ftruncate(fd, len); + } + + if (ret == -1) { + _ODP_LOG(use_huge ? ODP_LOG_WARN : ODP_LOG_ERR, + use_huge ? "WARN" : "ERR", + "%s page memory allocation failed: fd=%d, file=%s, err=\"%s\"\n", + use_huge ? "Huge" : "Normal", fd, filename, strerror(errno)); + close(fd); + unlink(filename); + return -1; + } + } + + /* No export file is created since this is only for internal use.*/ + if (single_va) { + snprintf(ishm_tbl->single_va_filename, ISHM_FILENAME_MAXLEN, + "%s", filename); + return fd; + } + + /* if _ODP_ISHM_EXPORT is set, create a description file for + * external ref: + */ + if (flags & _ODP_ISHM_EXPORT) { + memcpy(new_block->filename, filename, ISHM_FILENAME_MAXLEN); + + create_export_file(new_block, name, len, flags, align, false, + 0); + } else { + new_block->exptname[0] = 0; + /* remove the file from the filesystem, keeping its fd open */ + unlink(filename); + } + + return fd; +} + +/* delete the files related to a given ishm block: */ +static void delete_file(ishm_block_t *block) +{ + /* remove the .../odp-* file, unless fd was external or single va */ + if (block->filename[0] != 0 && + strcmp(block->filename, ishm_tbl->single_va_filename)) + unlink(block->filename); + /* also remove possible description file (if block was exported): */ + if (block->exptname[0] != 0) + unlink(block->exptname); +} + +/* + * Performs the mapping. + * Sets fd, and returns the mapping address. Not to be used with + * _ODP_ISHM_SINGLE_VA blocks. + * Mutex must be assured by the caller. + */ +static void *do_map(int block_index, uint64_t len, uint32_t align, + uint64_t offset, uint32_t flags, huge_flag_t huge, int *fd) +{ + ishm_block_t *new_block; /* entry in the main block table */ + void *mapped_addr; + + _ODP_ASSERT(!(flags & _ODP_ISHM_SINGLE_VA)); + + new_block = &ishm_tbl->block[block_index]; + + /* + * Creates a file to /dev/shm/odp-<pid>-<sequence> (for normal pages) + * or /mnt/huge/odp-<pid>-<sequence> (for huge pages) + * unless a fd was already given + */ + if (*fd < 0) { + *fd = create_file(block_index, huge, len, flags, align, false); + if (*fd < 0) + return NULL; + } else { + new_block->filename[0] = 0; + } + + /* try to mmap: */ + mapped_addr = _odp_ishmphy_map(*fd, len, offset, flags); + if (mapped_addr == NULL) { + if (!new_block->external_fd) { + close(*fd); + *fd = -1; + delete_file(new_block); + } + return NULL; + } + + return mapped_addr; +} + +/* + * Allocate block from pre-reserved single VA memory + */ +static void *alloc_single_va(const char *name, int new_index, uint64_t size, + uint32_t align, uint32_t flags, int *fd, + uint64_t *len_out) +{ + uint64_t len; + uint64_t page_sz; + char *file_name = (char *)(uintptr_t)name; + void *addr; + ishm_block_t *new_block = &ishm_tbl->block[new_index]; + ishm_fragment_t *fragment = NULL; + char seq_string[ISHM_FILENAME_MAXLEN]; + + if (!file_name || !file_name[0]) + file_name = create_seq_string(seq_string, ISHM_FILENAME_MAXLEN); + + /* Common fd for all single VA blocks */ + *fd = ishm_tbl->single_va_fd; + + if (ishm_tbl->single_va_huge) { + page_sz = odp_sys_huge_page_size(); + new_block->huge = HUGE; + } else { + page_sz = odp_sys_page_size(); + new_block->huge = NORMAL; + } + new_block->filename[0] = 0; + + len = (size + (page_sz - 1)) & (-page_sz); + + if (align < page_sz) + align = page_sz; + + /* Allocate memory from the pre-reserved single VA space */ + addr = alloc_fragment(len, new_index, align, &fragment); + if (!addr) { + _ODP_ERR("alloc_fragment failed.\n"); + return NULL; + } + new_block->fragment = fragment; + + /* Create export info file */ + if (flags & _ODP_ISHM_EXPORT) { + uint64_t offset = (uintptr_t)addr - + (uintptr_t)ishm_tbl->single_va_start; + memcpy(new_block->filename, ishm_tbl->single_va_filename, + ISHM_FILENAME_MAXLEN); + + create_export_file(new_block, file_name, len, flags, align, + true, offset); + } else { + new_block->exptname[0] = 0; + } + + *len_out = len; + return addr; +} + +/* + * Performs an extra mapping (for a process trying to see an existing block + * i.e. performing a lookup). Not to be used with _ODP_ISHM_SINGLE_VA blocks. + * Mutex must be assured by the caller. + */ +static void *do_remap(int block_index, int fd) +{ + void *mapped_addr; + uint64_t len; + uint64_t offset; + uint32_t flags; + + len = ishm_tbl->block[block_index].len; + offset = ishm_tbl->block[block_index].offset; + flags = ishm_tbl->block[block_index].flags; + + _ODP_ASSERT(!(flags & _ODP_ISHM_SINGLE_VA)); + + /* try to mmap: */ + mapped_addr = _odp_ishmphy_map(fd, len, offset, flags); + + if (mapped_addr == NULL) + return NULL; + + return mapped_addr; +} + +/* + * Performs unmapping, possibly freeing a pre-reserved single VA memory + * fragment, if the _ODP_ISHM_SINGLE_VA flag was set at alloc time. + * Mutex must be assured by the caller. + */ +static int do_unmap(void *start, uint64_t size, uint32_t flags, + int block_index) +{ + int ret; + + if (start) + ret = _odp_ishmphy_unmap(start, size, flags); + else + ret = 0; + + if ((block_index >= 0) && (flags & _ODP_ISHM_SINGLE_VA)) { + /* mark reserved address space as free */ + free_fragment(ishm_tbl->block[block_index].fragment); + } + + return ret; +} + +/* + * Search for a given used and allocated block name. + * (search is performed in the global ishm table) + * Returns the index of the found block (if any) or -1 if none. + * Mutex must be assured by the caller. + */ +static int find_block_by_name(const char *name) +{ + int i; + + if (name == NULL || name[0] == 0) + return -1; + + for (i = 0; i < ISHM_MAX_NB_BLOCKS; i++) { + if ((ishm_tbl->block[i].len) && + (strcmp(name, ishm_tbl->block[i].name) == 0)) + return i; + } + + return -1; +} + +/* + * Search a given ishm block in the process local table. Return its index + * in the process table or -1 if not found (meaning that the ishm table + * block index was not referenced in the process local table, i.e. the + * block is known by some other process, but not by the current process). + * Caller must assure mutex. + */ +static int procfind_block(int block_index) +{ + int i; + + for (i = 0; i < ishm_proctable->nb_entries; i++) { + if (ishm_proctable->entry[i].block_index == block_index) + return i; + } + return -1; +} + +/* + * Release the physical memory mapping for blocks which have been freed + * by other processes. Caller must ensure mutex. + * Mutex must be assured by the caller. + */ +static void procsync(void) +{ + int i = 0; + int last; + ishm_block_t *block; + + last = ishm_proctable->nb_entries; + while (i < last) { + /* if the process sequence number doesn't match the main + * table seq number, this entry is obsolete + */ + block = &ishm_tbl->block[ishm_proctable->entry[i].block_index]; + if (ishm_proctable->entry[i].seq != block->seq) { + /* obsolete entry: free memory and remove proc entry */ + if (ishm_proctable->entry[i].fd != + ishm_tbl->single_va_fd) + close(ishm_proctable->entry[i].fd); + _odp_ishmphy_unmap(ishm_proctable->entry[i].start, + ishm_proctable->entry[i].len, + ishm_proctable->entry[i].flags); + ishm_proctable->entry[i] = + ishm_proctable->entry[--last]; + } else { + i++; + } + } + ishm_proctable->nb_entries = last; +} + +/* + * Free a block as described in block_free(), but + * considering whether to close the file descriptor or not, and + * whether to deregister from the fdserver. + */ +static int block_free_internal(int block_index, int close_fd, int deregister) +{ + int proc_index; + ishm_block_t *block; /* entry in the main block table*/ + int last; + int ret = 0; + + if ((block_index < 0) || + (block_index >= ISHM_MAX_NB_BLOCKS) || + (ishm_tbl->block[block_index].len == 0)) { + _ODP_ERR("Request to free an invalid block\n"); + return -1; + } + + block = &ishm_tbl->block[block_index]; + + proc_index = procfind_block(block_index); + if (proc_index >= 0) { + int fd = ishm_proctable->entry[proc_index].fd; + + /* remove the mapping and possible fragment */ + do_unmap(ishm_proctable->entry[proc_index].start, + block->len, + ishm_proctable->entry[proc_index].flags, + block_index); + + /* close the related fd */ + if (close_fd && (fd != ishm_tbl->single_va_fd)) { + if (block->huge == CACHED) + hp_put_cached(fd); + else + close(fd); + } + + /* remove entry from process local table: */ + last = ishm_proctable->nb_entries - 1; + ishm_proctable->entry[proc_index] = ishm_proctable->entry[last]; + ishm_proctable->nb_entries = last; + } else { + /* just possibly free the fragment as no mapping exist here: */ + do_unmap(NULL, 0, block->flags, block_index); + } + + /* remove all files related to this block: */ + if (close_fd) + delete_file(block); + + /* deregister the file descriptor from the file descriptor server. */ + if (deregister) + ret = _odp_fdserver_deregister_fd(FD_SRV_CTX_ISHM, block_index); + + /* mark the block as free in the main block table: */ + block->len = 0; + + /* mark the change so other processes see this entry as obsolete: */ + block->seq++; + + return ret; +} + +/* + * Allocate and map internal shared memory, or other objects: + * If a name is given, check that this name is not already in use. + * If ok, allocate a new shared memory block and map the + * provided fd in it (if fd >=0 was given). + * If no fd is provided, a shared memory file desc named + * /dev/shm/odp-<pid>-ishm-<name_or_sequence> is created and mapped. + * (the name is different for huge page file as they must be on hugepagefs) + * The function returns the index of the newly created block in the + * main block table (>=0) or -1 on error. + */ +int _odp_ishm_reserve(const char *name, uint64_t size, int fd, + uint32_t align, uint64_t offset, uint32_t flags, + uint32_t user_flags) +{ + int new_index; /* index in the main block table*/ + ishm_block_t *new_block; /* entry in the main block table*/ + uint64_t page_sz; /* normal page size. usually 4K*/ + uint64_t page_hp_size; /* huge page size */ + uint32_t hp_align; + uint64_t len = 0; /* mapped length */ + void *addr = NULL; /* mapping address */ + int new_proc_entry; + static int huge_error_printed; /* to avoid millions of error...*/ + + odp_spinlock_lock(&ishm_tbl->lock); + + /* update this process view... */ + procsync(); + + /* Get system page sizes: page_hp_size is 0 if no huge page available*/ + page_sz = odp_sys_page_size(); + /* Use normal pages if ODP_SHM_NO_HP was used */ + page_hp_size = (user_flags & ODP_SHM_NO_HP) ? 0 : odp_sys_huge_page_size(); + + /* grab a new entry: */ + for (new_index = 0; new_index < ISHM_MAX_NB_BLOCKS; new_index++) { + if (ishm_tbl->block[new_index].len == 0) { + /* Found free block */ + break; + } + } + + /* check if we have reached the maximum number of allocation: */ + if (new_index >= ISHM_MAX_NB_BLOCKS) { + odp_spinlock_unlock(&ishm_tbl->lock); + _ODP_ERR("ISHM_MAX_NB_BLOCKS limit reached!\n"); + return -1; + } + + new_block = &ishm_tbl->block[new_index]; + + /* save block name (if any given): */ + if (name) + _odp_strcpy(new_block->name, name, ISHM_NAME_MAXLEN); + else + new_block->name[0] = 0; + + new_block->offset = 0; + + /* save user data: */ + new_block->user_flags = user_flags; + new_block->user_len = size; + + /* If a file descriptor is provided, get the real size and map: */ + if (fd >= 0) { + new_block->external_fd = 1; + len = size; + /* note that the huge page flag is meaningless here as huge + * page is determined by the provided file descriptor: */ + addr = do_map(new_index, len, align, offset, flags, EXTERNAL, + &fd); + if (addr == NULL) { + odp_spinlock_unlock(&ishm_tbl->lock); + _ODP_ERR("_ishm_reserve failed.\n"); + return -1; + } + new_block->huge = EXTERNAL; + } else { + new_block->external_fd = 0; + new_block->huge = UNKNOWN; + } + + /* Otherwise, Try first huge pages when possible and needed: */ + if ((fd < 0) && page_hp_size && ((user_flags & ODP_SHM_HP) || + size > ishm_tbl->huge_page_limit)) { + /* at least, alignment in VA should match page size, but user + * can request more: If the user requirement exceeds the page + * size then we have to make sure the block will be mapped at + * the same address every where, otherwise alignment may be + * wrong for some process */ + hp_align = align; + if (hp_align <= page_hp_size) + hp_align = page_hp_size; + else + flags |= _ODP_ISHM_SINGLE_VA; + + if (flags & _ODP_ISHM_SINGLE_VA) + goto use_single_va; + + /* roundup to page size */ + len = (size + (page_hp_size - 1)) & (-page_hp_size); + + /* try pre-allocated pages */ + fd = hp_get_cached(len); + if (fd != -1) { + /* do as if user provided a fd */ + new_block->external_fd = 1; + addr = do_map(new_index, len, hp_align, 0, flags, + CACHED, &fd); + if (addr == NULL) { + _ODP_ERR("Could not use cached hp %d\n", fd); + hp_put_cached(fd); + fd = -1; + } else { + new_block->huge = CACHED; + } + } + if (fd == -1) { + addr = do_map(new_index, len, hp_align, 0, flags, HUGE, + &fd); + + if (addr == NULL) { + if (!huge_error_printed) { + _ODP_WARN("No huge pages, fall back to normal pages. " + "Check: /proc/sys/vm/nr_hugepages.\n"); + huge_error_printed = 1; + } + } else { + new_block->huge = HUGE; + } + } + } + + /* Try normal pages if huge pages failed */ + if (fd < 0) { + if (user_flags & ODP_SHM_HP) { + odp_spinlock_unlock(&ishm_tbl->lock); + _ODP_ERR("Unable to allocate memory from huge pages\n"); + return -1; + } + /* at least, alignment in VA should match page size, but user + * can request more: If the user requirement exceeds the page + * size then we have to make sure the block will be mapped at + * the same address every where, otherwise alignment may be + * wrong for some process */ + if (align <= odp_sys_page_size()) + align = odp_sys_page_size(); + else + flags |= _ODP_ISHM_SINGLE_VA; + + if (flags & _ODP_ISHM_SINGLE_VA) + goto use_single_va; + + /* roundup to page size */ + len = (size + (page_sz - 1)) & (-page_sz); + addr = do_map(new_index, len, align, 0, flags, NORMAL, &fd); + new_block->huge = NORMAL; + } + +use_single_va: + /* Reserve memory from single VA space */ + if (fd < 0 && (flags & _ODP_ISHM_SINGLE_VA)) + addr = alloc_single_va(name, new_index, size, align, flags, &fd, + &len); + + /* if neither huge pages or normal pages works, we cannot proceed: */ + if ((fd < 0) || (addr == NULL) || (len == 0)) { + if (new_block->external_fd) { + if (new_block->huge == CACHED) + hp_put_cached(fd); + } else if (fd >= 0 && (fd != ishm_tbl->single_va_fd)) { + close(fd); + } + delete_file(new_block); + odp_spinlock_unlock(&ishm_tbl->lock); + _ODP_ERR("_ishm_reserve failed.\n"); + return -1; + } + + /* remember block data and increment block seq number to mark change */ + new_block->len = len; + new_block->user_len = size; + new_block->flags = flags; + new_block->user_flags = user_flags; + new_block->seq++; + new_block->refcnt = 1; + new_block->start = addr; /* only for SINGLE_VA*/ + + /* the allocation succeeded: update the process local view */ + new_proc_entry = ishm_proctable->nb_entries++; + ishm_proctable->entry[new_proc_entry].block_index = new_index; + ishm_proctable->entry[new_proc_entry].flags = flags; + ishm_proctable->entry[new_proc_entry].seq = new_block->seq; + ishm_proctable->entry[new_proc_entry].start = addr; + ishm_proctable->entry[new_proc_entry].len = len; + ishm_proctable->entry[new_proc_entry].fd = fd; + + /* register the file descriptor to the file descriptor server. */ + if (_odp_fdserver_register_fd(FD_SRV_CTX_ISHM, new_index, fd) == -1) { + block_free_internal(new_index, !new_block->external_fd, 0); + new_index = -1; + } + + odp_spinlock_unlock(&ishm_tbl->lock); + return new_index; +} + +/* + * Pre-reserve all single VA memory. Called only in global init. + */ +static void *reserve_single_va(uint64_t size, int *fd_out) +{ + uint64_t page_sz; /* normal page size. usually 4K*/ + uint64_t page_hp_size; /* huge page size */ + uint64_t len; /* mapped length */ + int fd = -1; + void *addr = NULL; + + /* Get system page sizes: page_hp_size is 0 if no huge page available*/ + page_sz = odp_sys_page_size(); + page_hp_size = odp_sys_huge_page_size(); + + /* Try first huge pages when possible and needed: */ + if (page_hp_size && (size > page_sz)) { + /* roundup to page size */ + len = (size + (page_hp_size - 1)) & (-page_hp_size); + fd = create_file(-1, HUGE, len, 0, 0, true); + if (fd >= 0) { + addr = _odp_ishmphy_reserve_single_va(len, fd); + if (!addr) { + close(fd); + unlink(ishm_tbl->single_va_filename); + fd = -1; + } + } + if (fd < 0) + _ODP_WARN("No huge pages, fall back to normal pages. " + "Check: /proc/sys/vm/nr_hugepages.\n"); + ishm_tbl->single_va_huge = true; + } + + /* Fall back to normal pages if necessary */ + if (fd < 0) { + /* roundup to page size */ + len = (size + (page_sz - 1)) & (-page_sz); + + fd = create_file(-1, NORMAL, len, 0, 0, true); + if (fd >= 0) + addr = _odp_ishmphy_reserve_single_va(len, fd); + ishm_tbl->single_va_huge = false; + } + + /* If neither huge pages or normal pages works, we cannot proceed: */ + if ((fd < 0) || (len == 0) || !addr) { + if (fd >= 0) { + close(fd); + unlink(ishm_tbl->single_va_filename); + } + _ODP_ERR("Reserving single VA memory failed.\n"); + return NULL; + } + + *fd_out = fd; + return addr; +} + +/* + * Try to map an memory block mapped by another ODP instance into the + * current ODP instance. + * returns 0 on success. + */ +int _odp_ishm_find_exported(const char *remote_name, pid_t external_odp_pid, + const char *local_name) +{ + char export_filename[ISHM_FILENAME_MAXLEN]; + char blockname[ISHM_FILENAME_MAXLEN]; + char filename[ISHM_FILENAME_MAXLEN]; + FILE *export_file; + uint64_t len; + uint32_t flags; + uint64_t user_len; + uint64_t offset; + uint32_t user_flags; + uint32_t align; + int fd; + int block_index; + + /* try to read the block description file: */ + snprintf(export_filename, ISHM_FILENAME_MAXLEN, + ISHM_EXPTNAME_FORMAT, + odp_global_ro.shm_dir, + odp_global_ro.uid, + external_odp_pid, + remote_name); + + export_file = fopen(export_filename, "r"); + + if (export_file == NULL) { + _ODP_ERR("Error opening %s.\n", export_filename); + return -1; + } + + if (fscanf(export_file, EXPORT_FILE_LINE1_FMT " ") != 0) + goto error_exp_file; + + if (fscanf(export_file, EXPORT_FILE_LINE2_FMT " ", blockname) != 1) + goto error_exp_file; + + if (fscanf(export_file, EXPORT_FILE_LINE3_FMT " ", filename) != 1) + goto error_exp_file; + + if (fscanf(export_file, EXPORT_FILE_LINE4_FMT " ", &len) != 1) + goto error_exp_file; + + if (fscanf(export_file, EXPORT_FILE_LINE5_FMT " ", &flags) != 1) + goto error_exp_file; + + if (fscanf(export_file, EXPORT_FILE_LINE6_FMT " ", &user_len) != 1) + goto error_exp_file; + + if (fscanf(export_file, EXPORT_FILE_LINE7_FMT " ", &user_flags) != 1) + goto error_exp_file; + + if (fscanf(export_file, EXPORT_FILE_LINE8_FMT " ", &align) != 1) + goto error_exp_file; + + if (fscanf(export_file, EXPORT_FILE_LINE9_FMT " ", &offset) != 1) + goto error_exp_file; + + fclose(export_file); + + /* now open the filename given in the description file: */ + fd = open(filename, O_RDWR, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); + if (fd == -1) { + _ODP_ERR("open failed for %s: %s.\n", filename, strerror(errno)); + return -1; + } + + /* Clear the _ODP_ISHM_EXPORT flag so we don't export again. Single + * VA doesn't hold up after export. */ + flags &= ~(uint32_t)_ODP_ISHM_EXPORT; + flags &= ~(uint32_t)_ODP_ISHM_SINGLE_VA; + + /* reserve the memory, providing the opened file descriptor: */ + block_index = _odp_ishm_reserve(local_name, len, fd, align, offset, + flags, 0); + if (block_index < 0) { + close(fd); + return block_index; + } + + /* Offset is required to remap the block to other processes */ + ishm_tbl->block[block_index].offset = offset; + + /* set inherited info: */ + ishm_tbl->block[block_index].user_flags = user_flags; + ishm_tbl->block[block_index].user_len = user_len; + + return block_index; + +error_exp_file: + fclose(export_file); + _ODP_ERR("Error reading %s.\n", export_filename); + return -1; +} + +/* + * Free and unmap internal shared memory: + * The file descriptor is closed and the .../odp-* file deleted, + * unless fd was externally provided at reserve() time. + * return 0 if OK, and -1 on error. + * Mutex must be assured by the caller. + */ +static int block_free(int block_index) +{ + return block_free_internal(block_index, 1, 1); +} + +/* + * Free and unmap internal shared memory, identified by its block number: + * return -1 on error. 0 if OK. + */ +int _odp_ishm_free_by_index(int block_index) +{ + int ret; + + odp_spinlock_lock(&ishm_tbl->lock); + procsync(); + + ret = block_free(block_index); + odp_spinlock_unlock(&ishm_tbl->lock); + return ret; +} + +/* + * Lookup for an ishm shared memory, identified by its block index + * in the main ishm block table. + * Map this ishm area in the process VA (if not already present). + * Returns the block user address or NULL on error. + * Mutex must be assured by the caller. + */ +static void *block_lookup(int block_index) +{ + int proc_index; + int fd = -1; + ishm_block_t *block; + void *mapped_addr; + int new_entry; + + if ((block_index < 0) || + (block_index >= ISHM_MAX_NB_BLOCKS) || + (ishm_tbl->block[block_index].len == 0)) { + _ODP_ERR("Request to lookup an invalid block\n"); + return NULL; + } + + /* search it in process table: if there, this process knows it already*/ + proc_index = procfind_block(block_index); + if (proc_index >= 0) + return ishm_proctable->entry[proc_index].start; + + /* this ishm is not known by this process, yet: we create the mapping.*/ + fd = _odp_fdserver_lookup_fd(FD_SRV_CTX_ISHM, block_index); + if (fd < 0) { + _ODP_ERR("Could not find ishm file descriptor (BUG!)\n"); + return NULL; + } + + /* perform the mapping */ + block = &ishm_tbl->block[block_index]; + + /* No need to remap single VA */ + if (block->flags & _ODP_ISHM_SINGLE_VA) + mapped_addr = block->start; + else + mapped_addr = do_remap(block_index, fd); + + if (mapped_addr == NULL) { + _ODP_ERR(" lookup: Could not map existing shared memory!\n"); + return NULL; + } + + /* the mapping succeeded: update the process local view */ + new_entry = ishm_proctable->nb_entries++; + ishm_proctable->entry[new_entry].block_index = block_index; + ishm_proctable->entry[new_entry].flags = block->flags; + ishm_proctable->entry[new_entry].seq = block->seq; + ishm_proctable->entry[new_entry].start = mapped_addr; + ishm_proctable->entry[new_entry].len = block->len; + ishm_proctable->entry[new_entry].fd = fd; + block->refcnt++; + + return mapped_addr; +} + +/* + * Lookup for an ishm shared memory, identified by its block name. + * Return the block index, or -1 if the index does not match any known ishm + * blocks. + */ +int _odp_ishm_lookup_by_name(const char *name) +{ + int block_index; + + odp_spinlock_lock(&ishm_tbl->lock); + procsync(); + + /* search the block in main ishm table: return -1 if not found: */ + block_index = find_block_by_name(name); + + odp_spinlock_unlock(&ishm_tbl->lock); + return block_index; +} + +/* + * Returns the VA address of a given block. Maps this ishm area in the process + * VA (if not already present). + * Returns NULL if the block is unknown. + */ +void *_odp_ishm_address(int block_index) +{ + void *addr; + + odp_spinlock_lock(&ishm_tbl->lock); + procsync(); + + addr = block_lookup(block_index); + + odp_spinlock_unlock(&ishm_tbl->lock); + return addr; +} + +int _odp_ishm_info(int block_index, _odp_ishm_info_t *info) +{ + int proc_index; + + odp_spinlock_lock(&ishm_tbl->lock); + procsync(); + + if ((block_index < 0) || + (block_index >= ISHM_MAX_NB_BLOCKS) || + (ishm_tbl->block[block_index].len == 0)) { + odp_spinlock_unlock(&ishm_tbl->lock); + _ODP_ERR("Request for info on an invalid block\n"); + return -1; + } + + /* search it in process table: if not there, need to map*/ + proc_index = procfind_block(block_index); + if (proc_index < 0) { + odp_spinlock_unlock(&ishm_tbl->lock); + return -1; + } + + info->name = ishm_tbl->block[block_index].name; + info->addr = ishm_proctable->entry[proc_index].start; + info->size = ishm_tbl->block[block_index].user_len; + info->page_size = (ishm_tbl->block[block_index].huge == HUGE) ? + odp_sys_huge_page_size() : odp_sys_page_size(); + info->flags = ishm_tbl->block[block_index].flags; + info->user_flags = ishm_tbl->block[block_index].user_flags; + + odp_spinlock_unlock(&ishm_tbl->lock); + return 0; +} + +static int do_odp_ishm_init_local(void) +{ + int i; + int block_index; + + /* + * the ishm_process table is local to each linux process + * Check that no other linux threads (of same or ancestor processes) + * have already created the table, and create it if needed. + * We protect this with the general ishm lock to avoid + * init race condition of different running threads. + */ + odp_spinlock_lock(&ishm_tbl->lock); + ishm_tbl->odpthread_cnt++; /* count ODPthread (pthread or process) */ + if (!ishm_proctable) { + ishm_proctable = malloc(sizeof(ishm_proctable_t)); + if (!ishm_proctable) { + odp_spinlock_unlock(&ishm_tbl->lock); + return -1; + } + memset(ishm_proctable, 0, sizeof(ishm_proctable_t)); + } + if (syscall(SYS_gettid) != getpid()) + ishm_proctable->thrd_refcnt++; /* new linux thread */ + else + ishm_proctable->thrd_refcnt = 1;/* new linux process */ + + /* + * if this ODP thread is actually a new linux process, (as opposed + * to a pthread), i.e, we just forked, then all shmem blocks + * of the parent process are mapped into this child by inheritance. + * (The process local table is inherited as well). We hence have to + * increase the process refcount for each of the inherited mappings: + */ + if (syscall(SYS_gettid) == getpid()) { + for (i = 0; i < ishm_proctable->nb_entries; i++) { + block_index = ishm_proctable->entry[i].block_index; + ishm_tbl->block[block_index].refcnt++; + } + } + + odp_spinlock_unlock(&ishm_tbl->lock); + return 0; +} + +/* remove all files staring with "odp-<pid>" from a directory "dir" */ +int _odp_ishm_cleanup_files(const char *dirpath) +{ + struct dirent *e; + DIR *dir; + char userdir[PATH_MAX]; + char prefix[PATH_MAX]; + char *fullpath; + int d_len; + int p_len; + int f_len; + + snprintf(userdir, PATH_MAX, "%s/%s", dirpath, odp_global_ro.uid); + d_len = strlen(userdir); + + dir = opendir(userdir); + if (!dir) { + /* ok if the dir does not exist. no much to delete then! */ + _ODP_DBG("opendir failed for %s: %s\n", userdir, strerror(errno)); + return 0; + } + snprintf(prefix, PATH_MAX, _ODP_FILES_FMT, odp_global_ro.main_pid); + p_len = strlen(prefix); + while ((e = readdir(dir)) != NULL) { + if (strncmp(e->d_name, prefix, p_len) == 0) { + f_len = strlen(e->d_name); + fullpath = malloc(d_len + f_len + 2); + if (fullpath == NULL) { + closedir(dir); + return -1; + } + snprintf(fullpath, PATH_MAX, "%s/%s", + userdir, e->d_name); + _ODP_DBG("deleting obsolete file: %s\n", fullpath); + if (unlink(fullpath)) + _ODP_ERR("unlink failed for %s: %s\n", fullpath, strerror(errno)); + free(fullpath); + } + } + closedir(dir); + + return 0; +} + +int _odp_ishm_init_global(const odp_init_t *init) +{ + void *addr; + void *spce_addr = NULL; + int i; + int val_kb; + uid_t uid; + char *hp_dir = odp_global_ro.hugepage_info.default_huge_page_dir; + uint64_t max_memory; + uint64_t internal; + uint64_t huge_page_limit; + + if (!_odp_libconfig_lookup_ext_int("shm", NULL, "single_va_size_kb", + &val_kb)) { + _ODP_ERR("Unable to read single VA size from config\n"); + return -1; + } + + _ODP_DBG("Shm single VA size: %dkB\n", val_kb); + + max_memory = (uint64_t)val_kb * 1024; + internal = max_memory / 8; + + if (!_odp_libconfig_lookup_ext_int("shm", NULL, "huge_page_limit_kb", + &val_kb)) { + _ODP_ERR("Unable to read huge page usage limit from config\n"); + return -1; + } + huge_page_limit = (uint64_t)val_kb * 1024; + + _ODP_DBG("Shm huge page usage limit: %dkB\n", val_kb); + + /* user requested memory size + some extra for internal use */ + if (init && init->shm.max_memory) + max_memory = init->shm.max_memory + internal; + + odp_global_ro.shm_max_memory = max_memory; + odp_global_ro.shm_max_size = max_memory - internal; + odp_global_ro.main_pid = getpid(); + odp_global_ro.shm_dir = getenv("ODP_SHM_DIR"); + if (odp_global_ro.shm_dir) { + odp_global_ro.shm_dir_from_env = 1; + } else { + odp_global_ro.shm_dir = + calloc(1, sizeof(ISHM_FILENAME_NORMAL_PAGE_DIR)); + sprintf(odp_global_ro.shm_dir, "%s", + ISHM_FILENAME_NORMAL_PAGE_DIR); + odp_global_ro.shm_dir_from_env = 0; + } + + _ODP_DBG("ishm: using dir %s\n", odp_global_ro.shm_dir); + + uid = getuid(); + snprintf(odp_global_ro.uid, UID_MAXLEN, "%d", + uid); + + if ((syscall(SYS_gettid)) != odp_global_ro.main_pid) { + _ODP_ERR("ishm init must be performed by the main " + "ODP process!\n."); + return -1; + } + + if (!hp_dir) { + _ODP_DBG("NOTE: No support for huge pages\n"); + } else { + _ODP_DBG("Huge pages mount point is: %s\n", hp_dir); + _odp_ishm_cleanup_files(hp_dir); + } + + _odp_ishm_cleanup_files(odp_global_ro.shm_dir); + + /* allocate space for the internal shared mem block table: */ + addr = mmap(NULL, sizeof(ishm_table_t), + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); + if (addr == MAP_FAILED) { + _ODP_ERR("unable to mmap the main block table\n."); + goto init_glob_err1; + } + ishm_tbl = addr; + memset(ishm_tbl, 0, sizeof(ishm_table_t)); + ishm_tbl->dev_seq = 0; + ishm_tbl->odpthread_cnt = 0; + ishm_tbl->huge_page_limit = huge_page_limit; + odp_spinlock_init(&ishm_tbl->lock); + + /* allocate space for the internal shared mem fragment table: */ + addr = mmap(NULL, sizeof(ishm_ftable_t), + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); + if (addr == MAP_FAILED) { + _ODP_ERR("unable to mmap the main fragment table\n."); + goto init_glob_err2; + } + ishm_ftbl = addr; + memset(ishm_ftbl, 0, sizeof(ishm_ftable_t)); + + /* Reserve memory for _ODP_ISHM_SINGLE_VA reserved blocks */ + ishm_tbl->single_va_fd = -1; + if (max_memory) { + spce_addr = reserve_single_va(max_memory, + &ishm_tbl->single_va_fd); + if (!spce_addr) { + _ODP_ERR("unable to reserve single VA memory\n."); + goto init_glob_err3; + } + ishm_tbl->single_va_start = spce_addr; + } + + /* use the first fragment descriptor to describe to whole VA space: */ + ishm_ftbl->fragment[0].block_index = -1; + ishm_ftbl->fragment[0].start = spce_addr; + ishm_ftbl->fragment[0].len = max_memory; + ishm_ftbl->fragment[0].prev = NULL; + ishm_ftbl->fragment[0].next = NULL; + ishm_ftbl->used_fragmnts = &ishm_ftbl->fragment[0]; + + /* and put all other fragment descriptors in the unused list: */ + for (i = 1; i < ISHM_NB_FRAGMNTS - 1; i++) { + ishm_ftbl->fragment[i].prev = NULL; + ishm_ftbl->fragment[i].next = &ishm_ftbl->fragment[i + 1]; + } + ishm_ftbl->fragment[ISHM_NB_FRAGMNTS - 1].prev = NULL; + ishm_ftbl->fragment[ISHM_NB_FRAGMNTS - 1].next = NULL; + ishm_ftbl->unused_fragmnts = &ishm_ftbl->fragment[1]; + + /* + * We run _odp_ishm_init_local() directly here to give the + * possibility to run shm_reserve() before the odp_init_local() + * is performed for the main thread... Many init_global() functions + * indeed assume the availability of odp_shm_reserve()...: + */ + if (do_odp_ishm_init_local()) { + _ODP_ERR("unable to init the main thread\n."); + goto init_glob_err4; + } + + /* get ready to create pools: */ + _odp_ishm_pool_init(); + + /* init cache files */ + hp_init(); + + return 0; + +init_glob_err4: + if (_odp_ishmphy_free_single_va()) + _ODP_ERR("unable to free single VA memory\n."); +init_glob_err3: + if (munmap(ishm_ftbl, sizeof(ishm_ftable_t)) < 0) + _ODP_ERR("unable to munmap main fragment table\n."); +init_glob_err2: + if (munmap(ishm_tbl, sizeof(ishm_table_t)) < 0) + _ODP_ERR("unable to munmap main block table\n."); +init_glob_err1: + return -1; +} + +int _odp_ishm_init_local(void) +{ + /* + * Do not re-run this for the main ODP process, as it has already + * been done in advance at _odp_ishm_init_global() time: + */ + if ((getpid() == odp_global_ro.main_pid) && + (syscall(SYS_gettid) == getpid())) + return 0; + + return do_odp_ishm_init_local(); +} + +static int do_odp_ishm_term_local(void) +{ + int i; + int proc_table_refcnt = 0; + int block_index; + ishm_block_t *block; + + procsync(); + + ishm_tbl->odpthread_cnt--; /* decount ODPthread (pthread or process) */ + + /* + * The ishm_process table is local to each linux process + * Check that no other linux threads (of this linux process) + * still needs the table, and free it if so. + * We protect this with the general ishm lock to avoid + * term race condition of different running threads. + */ + proc_table_refcnt = --ishm_proctable->thrd_refcnt; + if (!proc_table_refcnt) { + /* + * this is the last thread of this process... + * All mappings for this process are about to be lost... + * Go through the table of visible blocks for this process, + * decreasing the refcnt of each visible blocks, and issuing + * warning for those no longer referenced by any process. + * Note that non-referenced blocks are not freed: this is + * deliberate as this would imply that the semantic of the + * freeing function would differ depending on whether we run + * with odp_thread as processes or pthreads. With this approach, + * the user should always free the blocks manually, which is + * more consistent + */ + for (i = 0; i < ishm_proctable->nb_entries; i++) { + block_index = ishm_proctable->entry[i].block_index; + block = &ishm_tbl->block[block_index]; + if ((--block->refcnt) <= 0) { + block->refcnt = 0; + _ODP_DBG("Warning: block %d: name:%s " + "no longer referenced\n", + i, + ishm_tbl->block[i].name[0] ? + ishm_tbl->block[i].name : "<no name>"); + } + } + + free(ishm_proctable); + ishm_proctable = NULL; + } + + return 0; +} + +int _odp_ishm_term_local(void) +{ + int ret; + + odp_spinlock_lock(&ishm_tbl->lock); + + /* postpone last thread term to allow free() by global term functions:*/ + if (ishm_tbl->odpthread_cnt == 1) { + odp_spinlock_unlock(&ishm_tbl->lock); + return 0; + } + + ret = do_odp_ishm_term_local(); + odp_spinlock_unlock(&ishm_tbl->lock); + return ret; +} + +int _odp_ishm_term_global(void) +{ + int ret = 0; + int index; + int fd = ishm_tbl->single_va_fd; + ishm_block_t *block; + + if ((getpid() != odp_global_ro.main_pid) || + (syscall(SYS_gettid) != getpid())) + _ODP_ERR("odp_term_global() must be performed by the main " + "ODP process!\n."); + + /* cleanup possibly non freed memory (and complain a bit): */ + for (index = 0; index < ISHM_MAX_NB_BLOCKS; index++) { + block = &ishm_tbl->block[index]; + if (block->len != 0) { + _ODP_ERR("block '%s' (file %s) was never freed " + "(cleaning up...).\n", + block->name, block->filename); + delete_file(block); + } + } + + /* perform the last thread terminate which was postponed: */ + ret = do_odp_ishm_term_local(); + + /* remove the file from the filesystem, keeping its fd open */ + unlink(ishm_tbl->single_va_filename); + + /* free the fragment table */ + if (munmap(ishm_ftbl, sizeof(ishm_ftable_t)) < 0) { + ret |= -1; + _ODP_ERR("unable to munmap fragment table\n."); + } + /* free the block table */ + if (munmap(ishm_tbl, sizeof(ishm_table_t)) < 0) { + ret |= -1; + _ODP_ERR("unable to munmap main table\n."); + } + + /* free the reserved single VA memory */ + if (_odp_ishmphy_free_single_va()) + ret |= -1; + if ((fd >= 0) && close(fd)) { + ret |= -1; + _ODP_ERR("unable to close single VA\n."); + } + + if (!odp_global_ro.shm_dir_from_env) + free(odp_global_ro.shm_dir); + + hp_term(); + + return ret; +} + +/* + * Print the current ishm status (allocated blocks and VA space map) + * Return the number of allocated blocks (including those not mapped + * by the current odp thread). Also perform a number of sanity check. + * For debug. + */ +int _odp_ishm_status(const char *title) +{ + int i; + char flags[3]; + char huge; + int proc_index; + ishm_fragment_t *fragmnt; + int consecutive_unallocated = 0; /* should never exceed 1 */ + uintptr_t last_address = 0; + ishm_fragment_t *previous = NULL; + int nb_used_frgments = 0; + int nb_unused_frgments = 0; /* nb frag describing a VA area */ + int nb_allocated_frgments = 0; /* nb frag describing an allocated VA */ + int nb_blocks = 0; + int single_va_blocks = 0; + int max_name_len = 0; + uint64_t lost_total = 0; /* statistics for total unused memory */ + uint64_t len_total = 0; /* statistics for total allocated memory */ + + odp_spinlock_lock(&ishm_tbl->lock); + procsync(); + + /* find longest block name */ + for (i = 0; i < ISHM_MAX_NB_BLOCKS; i++) { + int str_len; + + if (ishm_tbl->block[i].len <= 0) + continue; + + str_len = strlen(ishm_tbl->block[i].name); + + if (max_name_len < str_len) + max_name_len = str_len; + } + + _ODP_PRINT("%s\n", title); + _ODP_PRINT(" %-*s flag %-29s %-8s %-8s %-3s %-3s %-3s file\n", + max_name_len, "name", "range", "user_len", "unused", + "seq", "ref", "fd"); + + /* display block table: 1 line per entry +1 extra line if mapped here */ + for (i = 0; i < ISHM_MAX_NB_BLOCKS; i++) { + void *start_addr = NULL; + void *end_addr = NULL; + int entry_fd = -1; + + if (ishm_tbl->block[i].len <= 0) + continue; /* unused block */ + + nb_blocks++; + if (ishm_tbl->block[i].flags & _ODP_ISHM_SINGLE_VA) + single_va_blocks++; + + flags[0] = (ishm_tbl->block[i].flags & _ODP_ISHM_SINGLE_VA) ? + 'S' : '.'; + flags[1] = (ishm_tbl->block[i].flags & _ODP_ISHM_LOCK) ? + 'L' : '.'; + flags[2] = 0; + switch (ishm_tbl->block[i].huge) { + case HUGE: + huge = 'H'; + break; + case NORMAL: + huge = 'N'; + break; + case EXTERNAL: + huge = 'E'; + break; + case CACHED: + huge = 'C'; + break; + default: + huge = '?'; + } + proc_index = procfind_block(i); + lost_total += ishm_tbl->block[i].len - + ishm_tbl->block[i].user_len; + len_total += ishm_tbl->block[i].len; + + if (proc_index >= 0) { + start_addr = ishm_proctable->entry[proc_index].start; + end_addr = (void *)(uintptr_t)((uintptr_t)start_addr + + ishm_tbl->block[i].len); + entry_fd = ishm_proctable->entry[proc_index].fd; + } + + _ODP_PRINT("%2i %-*s %s%c %p-%p %-8" PRIu64 " " + "%-8" PRIu64 " %-3" PRIu64 " %-3" PRIu64 " " + "%-3d %s\n", + i, max_name_len, ishm_tbl->block[i].name, + flags, huge, start_addr, end_addr, + ishm_tbl->block[i].user_len, + ishm_tbl->block[i].len - ishm_tbl->block[i].user_len, + ishm_tbl->block[i].seq, + ishm_tbl->block[i].refcnt, + entry_fd, + ishm_tbl->block[i].filename[0] ? + ishm_tbl->block[i].filename : + "(none)"); + } + _ODP_PRINT("TOTAL: %58s%-8" PRIu64 " %2s%-8" PRIu64 "\n", + "", len_total, + "", lost_total); + _ODP_PRINT("%65s(%" PRIu64 "MB) %4s(%" PRIu64 "MB)\n", + "", len_total / 1024 / 1024, + "", lost_total / 1024 / 1024); + + /* display the virtual space allocations... : */ + _ODP_PRINT("\nishm virtual space:\n"); + for (fragmnt = ishm_ftbl->used_fragmnts; + fragmnt; fragmnt = fragmnt->next) { + if (fragmnt->block_index >= 0) { + nb_allocated_frgments++; + _ODP_PRINT(" %8p - %8p: ALLOCATED by block:%d\n", + fragmnt->start, + (void *)((uintptr_t)fragmnt->start + fragmnt->len - 1), + fragmnt->block_index); + consecutive_unallocated = 0; + } else { + _ODP_PRINT(" %8p - %8p: NOT ALLOCATED\n", + fragmnt->start, + (void *)((uintptr_t)fragmnt->start + fragmnt->len - 1)); + if (consecutive_unallocated++) + _ODP_ERR("defragmentation error\n"); + } + + /* some other sanity checks: */ + if (fragmnt->prev != previous) + _ODP_ERR("chaining error\n"); + + if (fragmnt != ishm_ftbl->used_fragmnts) { + if ((uintptr_t)fragmnt->start != last_address + 1) + _ODP_ERR("lost space error\n"); + } + + last_address = (uintptr_t)fragmnt->start + fragmnt->len - 1; + previous = fragmnt; + nb_used_frgments++; + } + + /* + * the number of blocks with the single_VA flag set should match + * the number of used fragments: + */ + if (single_va_blocks != nb_allocated_frgments) + _ODP_ERR("single_va_blocks != nb_allocated_fragments!\n"); + + /* compute the number of unused fragments*/ + for (fragmnt = ishm_ftbl->unused_fragmnts; + fragmnt; fragmnt = fragmnt->next) + nb_unused_frgments++; + + _ODP_PRINT("ishm: %d fragment used. %d fragments unused. (total=%d)\n", + nb_used_frgments, nb_unused_frgments, + nb_used_frgments + nb_unused_frgments); + + if ((nb_used_frgments + nb_unused_frgments) != ISHM_NB_FRAGMNTS) + _ODP_ERR("lost fragments!\n"); + + if (nb_blocks < ishm_proctable->nb_entries) + _ODP_ERR("process known block cannot exceed main total sum!\n"); + + _ODP_PRINT("\n"); + + odp_spinlock_unlock(&ishm_tbl->lock); + return nb_blocks; +} + +void _odp_ishm_print(int block_index) +{ + ishm_block_t *block; + const char *str; + + odp_spinlock_lock(&ishm_tbl->lock); + + if ((block_index < 0) || + (block_index >= ISHM_MAX_NB_BLOCKS) || + (ishm_tbl->block[block_index].len == 0)) { + odp_spinlock_unlock(&ishm_tbl->lock); + _ODP_ERR("Request for info on an invalid block\n"); + return; + } + + block = &ishm_tbl->block[block_index]; + + _ODP_PRINT("\nSHM block info\n--------------\n"); + _ODP_PRINT(" name: %s\n", block->name); + _ODP_PRINT(" file: %s\n", block->filename); + _ODP_PRINT(" expt: %s\n", block->exptname); + _ODP_PRINT(" user_flags: 0x%x\n", block->user_flags); + _ODP_PRINT(" flags: 0x%x\n", block->flags); + _ODP_PRINT(" user_len: %" PRIu64 "\n", block->user_len); + _ODP_PRINT(" start: %p\n", block->start); + _ODP_PRINT(" len: %" PRIu64 "\n", block->len); + + switch (block->huge) { + case HUGE: + str = "huge"; + break; + case NORMAL: + str = "normal"; + break; + case EXTERNAL: + str = "external"; + break; + case CACHED: + str = "cached"; + break; + default: + str = "??"; + } + + _ODP_PRINT(" page type: %s\n", str); + _ODP_PRINT(" seq: %" PRIu64 "\n", block->seq); + _ODP_PRINT(" refcnt: %" PRIu64 "\n", block->refcnt); + _ODP_PRINT("\n"); + + odp_spinlock_unlock(&ishm_tbl->lock); +} + +int32_t odp_system_meminfo(odp_system_meminfo_t *info, odp_system_memblock_t memblock[], + int32_t max_num) +{ + ishm_block_t *block; + int name_len, proc_index; + int32_t i; + uintptr_t addr; + uint64_t len, lost, page_size; + uint64_t lost_total = 0; + uint64_t len_total = 0; + int32_t num = 0; + const uint64_t huge_sz = odp_sys_huge_page_size(); + const uint64_t normal_sz = odp_sys_page_size(); + + odp_spinlock_lock(&ishm_tbl->lock); + procsync(); + + for (i = 0; i < ISHM_MAX_NB_BLOCKS; i++) { + block = &ishm_tbl->block[i]; + + len = block->len; + if (len == 0) + continue; + + lost = len - block->user_len; + + if (num < max_num) { + odp_system_memblock_t *mb = &memblock[num]; + + name_len = strlen(block->name); + if (name_len >= ODP_SYSTEM_MEMBLOCK_NAME_LEN) + name_len = ODP_SYSTEM_MEMBLOCK_NAME_LEN - 1; + + memcpy(mb->name, block->name, name_len); + mb->name[name_len] = 0; + + addr = 0; + proc_index = procfind_block(i); + if (proc_index >= 0) + addr = (uintptr_t)ishm_proctable->entry[proc_index].start; + + page_size = 0; + if (block->huge == HUGE) + page_size = huge_sz; + else if (block->huge == NORMAL) + page_size = normal_sz; + + mb->addr = addr; + mb->used = len; + mb->overhead = lost; + mb->page_size = page_size; + } + + len_total += len; + lost_total += lost; + + num++; + } + + odp_spinlock_unlock(&ishm_tbl->lock); + + info->total_mapped = len_total; + info->total_used = len_total; + info->total_overhead = lost_total; + + return num; +} |