diff options
Diffstat (limited to 'contrib')
71 files changed, 7236 insertions, 4337 deletions
diff --git a/contrib/elf2dmp/Makefile.objs b/contrib/elf2dmp/Makefile.objs deleted file mode 100644 index e3140f58cf..0000000000 --- a/contrib/elf2dmp/Makefile.objs +++ /dev/null @@ -1 +0,0 @@ -elf2dmp-obj-y = main.o addrspace.o download.o pdb.o qemu_elf.o diff --git a/contrib/elf2dmp/addrspace.c b/contrib/elf2dmp/addrspace.c index 8a76069cb5..81295a1153 100644 --- a/contrib/elf2dmp/addrspace.c +++ b/contrib/elf2dmp/addrspace.c @@ -11,9 +11,10 @@ static struct pa_block *pa_space_find_block(struct pa_space *ps, uint64_t pa) { size_t i; + for (i = 0; i < ps->block_nr; i++) { if (ps->block[i].paddr <= pa && - pa <= ps->block[i].paddr + ps->block[i].size) { + pa < ps->block[i].paddr + ps->block[i].size) { return ps->block + i; } } @@ -21,7 +22,7 @@ static struct pa_block *pa_space_find_block(struct pa_space *ps, uint64_t pa) return NULL; } -static uint8_t *pa_space_resolve(struct pa_space *ps, uint64_t pa) +static void *pa_space_resolve(struct pa_space *ps, uint64_t pa) { struct pa_block *block = pa_space_find_block(ps, pa); @@ -32,7 +33,44 @@ static uint8_t *pa_space_resolve(struct pa_space *ps, uint64_t pa) return block->addr + (pa - block->paddr); } -int pa_space_create(struct pa_space *ps, QEMU_Elf *qemu_elf) +static bool pa_space_read64(struct pa_space *ps, uint64_t pa, uint64_t *value) +{ + uint64_t *resolved = pa_space_resolve(ps, pa); + + if (!resolved) { + return false; + } + + *value = *resolved; + + return true; +} + +static void pa_block_align(struct pa_block *b) +{ + uint64_t low_align = ((b->paddr - 1) | ELF2DMP_PAGE_MASK) + 1 - b->paddr; + uint64_t high_align = (b->paddr + b->size) & ELF2DMP_PAGE_MASK; + + if (low_align == 0 && high_align == 0) { + return; + } + + if (low_align + high_align < b->size) { + printf("Block 0x%"PRIx64"+:0x%"PRIx64" will be aligned to " + "0x%"PRIx64"+:0x%"PRIx64"\n", b->paddr, b->size, + b->paddr + low_align, b->size - low_align - high_align); + b->size -= low_align + high_align; + } else { + printf("Block 0x%"PRIx64"+:0x%"PRIx64" is too small to align\n", + b->paddr, b->size); + b->size = 0; + } + + b->addr += low_align; + b->paddr += low_align; +} + +void pa_space_create(struct pa_space *ps, QEMU_Elf *qemu_elf) { Elf64_Half phdr_nr = elf_getphdrnum(qemu_elf->map); Elf64_Phdr *phdr = elf64_getphdr(qemu_elf->map); @@ -47,29 +85,28 @@ int pa_space_create(struct pa_space *ps, QEMU_Elf *qemu_elf) } } - ps->block = malloc(sizeof(*ps->block) * ps->block_nr); - if (!ps->block) { - return 1; - } + ps->block = g_new(struct pa_block, ps->block_nr); for (i = 0; i < phdr_nr; i++) { - if (phdr[i].p_type == PT_LOAD) { + if (phdr[i].p_type == PT_LOAD && phdr[i].p_offset < qemu_elf->size) { ps->block[block_i] = (struct pa_block) { .addr = (uint8_t *)qemu_elf->map + phdr[i].p_offset, .paddr = phdr[i].p_paddr, - .size = phdr[i].p_filesz, + .size = MIN(phdr[i].p_filesz, + qemu_elf->size - phdr[i].p_offset), }; - block_i++; + pa_block_align(&ps->block[block_i]); + block_i = ps->block[block_i].size ? (block_i + 1) : block_i; } } - return 0; + ps->block_nr = block_i; } void pa_space_destroy(struct pa_space *ps) { ps->block_nr = 0; - free(ps->block); + g_free(ps->block); } void va_space_set_dtb(struct va_space *vs, uint64_t dtb) @@ -83,19 +120,20 @@ void va_space_create(struct va_space *vs, struct pa_space *ps, uint64_t dtb) va_space_set_dtb(vs, dtb); } -static uint64_t get_pml4e(struct va_space *vs, uint64_t va) +static bool get_pml4e(struct va_space *vs, uint64_t va, uint64_t *value) { uint64_t pa = (vs->dtb & 0xffffffffff000) | ((va & 0xff8000000000) >> 36); - return *(uint64_t *)pa_space_resolve(vs->ps, pa); + return pa_space_read64(vs->ps, pa, value); } -static uint64_t get_pdpi(struct va_space *vs, uint64_t va, uint64_t pml4e) +static bool get_pdpi(struct va_space *vs, uint64_t va, uint64_t pml4e, + uint64_t *value) { uint64_t pdpte_paddr = (pml4e & 0xffffffffff000) | ((va & 0x7FC0000000) >> 27); - return *(uint64_t *)pa_space_resolve(vs->ps, pdpte_paddr); + return pa_space_read64(vs->ps, pdpte_paddr, value); } static uint64_t pde_index(uint64_t va) @@ -108,11 +146,12 @@ static uint64_t pdba_base(uint64_t pdpe) return pdpe & 0xFFFFFFFFFF000; } -static uint64_t get_pgd(struct va_space *vs, uint64_t va, uint64_t pdpe) +static bool get_pgd(struct va_space *vs, uint64_t va, uint64_t pdpe, + uint64_t *value) { uint64_t pgd_entry = pdba_base(pdpe) + pde_index(va) * 8; - return *(uint64_t *)pa_space_resolve(vs->ps, pgd_entry); + return pa_space_read64(vs->ps, pgd_entry, value); } static uint64_t pte_index(uint64_t va) @@ -125,11 +164,12 @@ static uint64_t ptba_base(uint64_t pde) return pde & 0xFFFFFFFFFF000; } -static uint64_t get_pte(struct va_space *vs, uint64_t va, uint64_t pgd) +static bool get_pte(struct va_space *vs, uint64_t va, uint64_t pgd, + uint64_t *value) { uint64_t pgd_val = ptba_base(pgd) + pte_index(va) * 8; - return *(uint64_t *)pa_space_resolve(vs->ps, pgd_val); + return pa_space_read64(vs->ps, pgd_val, value); } static uint64_t get_paddr(uint64_t va, uint64_t pte) @@ -161,13 +201,11 @@ static uint64_t va_space_va2pa(struct va_space *vs, uint64_t va) { uint64_t pml4e, pdpe, pgd, pte; - pml4e = get_pml4e(vs, va); - if (!is_present(pml4e)) { + if (!get_pml4e(vs, va, &pml4e) || !is_present(pml4e)) { return INVALID_PA; } - pdpe = get_pdpi(vs, va, pml4e); - if (!is_present(pdpe)) { + if (!get_pdpi(vs, va, pml4e, &pdpe) || !is_present(pdpe)) { return INVALID_PA; } @@ -175,8 +213,7 @@ static uint64_t va_space_va2pa(struct va_space *vs, uint64_t va) return get_1GB_paddr(va, pdpe); } - pgd = get_pgd(vs, va, pdpe); - if (!is_present(pgd)) { + if (!get_pgd(vs, va, pdpe, &pgd) || !is_present(pgd)) { return INVALID_PA; } @@ -184,8 +221,7 @@ static uint64_t va_space_va2pa(struct va_space *vs, uint64_t va) return get_2MB_paddr(va, pgd); } - pte = get_pte(vs, va, pgd); - if (!is_present(pte)) { + if (!get_pte(vs, va, pgd, &pte) || !is_present(pte)) { return INVALID_PA; } @@ -203,19 +239,19 @@ void *va_space_resolve(struct va_space *vs, uint64_t va) return pa_space_resolve(vs->ps, pa); } -int va_space_rw(struct va_space *vs, uint64_t addr, - void *buf, size_t size, int is_write) +bool va_space_rw(struct va_space *vs, uint64_t addr, + void *buf, size_t size, int is_write) { while (size) { - uint64_t page = addr & PFN_MASK; - size_t s = (page + PAGE_SIZE) - addr; + uint64_t page = addr & ELF2DMP_PFN_MASK; + size_t s = (page + ELF2DMP_PAGE_SIZE) - addr; void *ptr; s = (s > size) ? size : s; ptr = va_space_resolve(vs, addr); if (!ptr) { - return 1; + return false; } if (is_write) { @@ -229,5 +265,5 @@ int va_space_rw(struct va_space *vs, uint64_t addr, addr += s; } - return 0; + return true; } diff --git a/contrib/elf2dmp/addrspace.h b/contrib/elf2dmp/addrspace.h index d87f6a18c6..2ad30a9da4 100644 --- a/contrib/elf2dmp/addrspace.h +++ b/contrib/elf2dmp/addrspace.h @@ -10,9 +10,10 @@ #include "qemu_elf.h" -#define PAGE_BITS 12 -#define PAGE_SIZE (1ULL << PAGE_BITS) -#define PFN_MASK (~(PAGE_SIZE - 1)) +#define ELF2DMP_PAGE_BITS 12 +#define ELF2DMP_PAGE_SIZE (1ULL << ELF2DMP_PAGE_BITS) +#define ELF2DMP_PAGE_MASK (ELF2DMP_PAGE_SIZE - 1) +#define ELF2DMP_PFN_MASK (~(ELF2DMP_PAGE_SIZE - 1)) #define INVALID_PA UINT64_MAX @@ -32,13 +33,13 @@ struct va_space { struct pa_space *ps; }; -int pa_space_create(struct pa_space *ps, QEMU_Elf *qemu_elf); +void pa_space_create(struct pa_space *ps, QEMU_Elf *qemu_elf); void pa_space_destroy(struct pa_space *ps); void va_space_create(struct va_space *vs, struct pa_space *ps, uint64_t dtb); void va_space_set_dtb(struct va_space *vs, uint64_t dtb); void *va_space_resolve(struct va_space *vs, uint64_t va); -int va_space_rw(struct va_space *vs, uint64_t addr, - void *buf, size_t size, int is_write); +bool va_space_rw(struct va_space *vs, uint64_t addr, + void *buf, size_t size, int is_write); #endif /* ADDRSPACE_H */ diff --git a/contrib/elf2dmp/download.c b/contrib/elf2dmp/download.c index d09e607431..21306b3fd4 100644 --- a/contrib/elf2dmp/download.c +++ b/contrib/elf2dmp/download.c @@ -9,39 +9,35 @@ #include <curl/curl.h> #include "download.h" -int download_url(const char *name, const char *url) +bool download_url(const char *name, const char *url) { - int err = 0; + bool success = false; FILE *file; CURL *curl = curl_easy_init(); if (!curl) { - return 1; + return false; } file = fopen(name, "wb"); if (!file) { - err = 1; goto out_curl; } - curl_easy_setopt(curl, CURLOPT_URL, url); - curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, NULL); - curl_easy_setopt(curl, CURLOPT_WRITEDATA, file); - curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1); - curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0); - - if (curl_easy_perform(curl) != CURLE_OK) { - err = 1; - fclose(file); + if (curl_easy_setopt(curl, CURLOPT_URL, url) != CURLE_OK + || curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, NULL) != CURLE_OK + || curl_easy_setopt(curl, CURLOPT_WRITEDATA, file) != CURLE_OK + || curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1) != CURLE_OK + || curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0) != CURLE_OK + || curl_easy_perform(curl) != CURLE_OK) { unlink(name); - goto out_curl; + fclose(file); + } else { + success = !fclose(file); } - err = fclose(file); - out_curl: curl_easy_cleanup(curl); - return err; + return success; } diff --git a/contrib/elf2dmp/download.h b/contrib/elf2dmp/download.h index 5c274925f7..f65adb5d08 100644 --- a/contrib/elf2dmp/download.h +++ b/contrib/elf2dmp/download.h @@ -8,6 +8,6 @@ #ifndef DOWNLOAD_H #define DOWNLOAD_H -int download_url(const char *name, const char *url); +bool download_url(const char *name, const char *url); #endif /* DOWNLOAD_H */ diff --git a/contrib/elf2dmp/kdbg.h b/contrib/elf2dmp/kdbg.h index 851b57c321..002e3d0cd5 100644 --- a/contrib/elf2dmp/kdbg.h +++ b/contrib/elf2dmp/kdbg.h @@ -25,11 +25,15 @@ typedef struct DBGKD_GET_VERSION64 { uint64_t DebuggerDataList; } DBGKD_GET_VERSION64; +#ifndef _WIN32 +typedef struct LIST_ENTRY64 { + struct LIST_ENTRY64 *Flink; + struct LIST_ENTRY64 *Blink; +} LIST_ENTRY64; +#endif + typedef struct DBGKD_DEBUG_DATA_HEADER64 { - struct LIST_ENTRY64 { - struct LIST_ENTRY64 *Flink; - struct LIST_ENTRY64 *Blink; - } List; + LIST_ENTRY64 List; uint32_t OwnerTag; uint32_t Size; } DBGKD_DEBUG_DATA_HEADER64; diff --git a/contrib/elf2dmp/main.c b/contrib/elf2dmp/main.c index 7115b0d6d0..d046a72ae6 100644 --- a/contrib/elf2dmp/main.c +++ b/contrib/elf2dmp/main.c @@ -6,6 +6,8 @@ */ #include "qemu/osdep.h" +#include "qemu/bitops.h" + #include "err.h" #include "addrspace.h" #include "pe.h" @@ -16,8 +18,10 @@ #define SYM_URL_BASE "https://msdl.microsoft.com/download/symbols/" #define PDB_NAME "ntkrnlmp.pdb" +#define PE_NAME "ntoskrnl.exe" #define INITIAL_MXCSR 0x1f80 +#define MAX_NUMBER_OF_RUNS 42 typedef struct idt_desc { uint16_t offset1; /* offset bits 0..15 */ @@ -41,12 +45,8 @@ static const uint64_t SharedUserData = 0xfffff78000000000; #define KUSD_OFFSET_PRODUCT_TYPE 0x264 #define SYM_RESOLVE(base, r, s) ((s = pdb_resolve(base, r, #s)),\ - s ? printf(#s" = 0x%016lx\n", s) : eprintf("Failed to resolve "#s"\n"), s) - -static uint64_t rol(uint64_t x, uint64_t y) -{ - return (x << y) | (x >> (64 - y)); -} + s ? printf(#s" = 0x%016"PRIx64"\n", s) :\ + eprintf("Failed to resolve "#s"\n"), s) /* * Decoding algorithm can be found in Volatility project @@ -60,7 +60,7 @@ static void kdbg_decode(uint64_t *dst, uint64_t *src, size_t size, uint64_t block; block = src[i]; - block = rol(block ^ kwn, (uint8_t)kwn); + block = rol64(block ^ kwn, kwn); block = __builtin_bswap64(block ^ kdbe) ^ kwa; dst[i] = block; } @@ -75,9 +75,9 @@ static KDDEBUGGER_DATA64 *get_kdbg(uint64_t KernBase, struct pdb_reader *pdb, bool decode = false; uint64_t kwn, kwa, KdpDataBlockEncoded; - if (va_space_rw(vs, - KdDebuggerDataBlock + offsetof(KDDEBUGGER_DATA64, Header), - &kdbg_hdr, sizeof(kdbg_hdr), 0)) { + if (!va_space_rw(vs, + KdDebuggerDataBlock + offsetof(KDDEBUGGER_DATA64, Header), + &kdbg_hdr, sizeof(kdbg_hdr), 0)) { eprintf("Failed to extract KDBG header\n"); return NULL; } @@ -93,13 +93,13 @@ static KDDEBUGGER_DATA64 *get_kdbg(uint64_t KernBase, struct pdb_reader *pdb, return NULL; } - if (va_space_rw(vs, KiWaitNever, &kwn, sizeof(kwn), 0) || - va_space_rw(vs, KiWaitAlways, &kwa, sizeof(kwa), 0)) { + if (!va_space_rw(vs, KiWaitNever, &kwn, sizeof(kwn), 0) || + !va_space_rw(vs, KiWaitAlways, &kwa, sizeof(kwa), 0)) { return NULL; } - printf("[KiWaitNever] = 0x%016lx\n", kwn); - printf("[KiWaitAlways] = 0x%016lx\n", kwa); + printf("[KiWaitNever] = 0x%016"PRIx64"\n", kwn); + printf("[KiWaitAlways] = 0x%016"PRIx64"\n", kwa); /* * If KDBG header can be decoded, KDBG size is available @@ -116,13 +116,11 @@ static KDDEBUGGER_DATA64 *get_kdbg(uint64_t KernBase, struct pdb_reader *pdb, } } - kdbg = malloc(kdbg_hdr.Size); - if (!kdbg) { - return NULL; - } + kdbg = g_malloc(kdbg_hdr.Size); - if (va_space_rw(vs, KdDebuggerDataBlock, kdbg, kdbg_hdr.Size, 0)) { + if (!va_space_rw(vs, KdDebuggerDataBlock, kdbg, kdbg_hdr.Size, 0)) { eprintf("Failed to extract entire KDBG\n"); + g_free(kdbg); return NULL; } @@ -139,10 +137,10 @@ static KDDEBUGGER_DATA64 *get_kdbg(uint64_t KernBase, struct pdb_reader *pdb, return kdbg; } -static void win_context_init_from_qemu_cpu_state(WinContext *ctx, +static void win_context_init_from_qemu_cpu_state(WinContext64 *ctx, QEMUCPUState *s) { - WinContext win_ctx = (WinContext){ + WinContext64 win_ctx = (WinContext64){ .ContextFlags = WIN_CTX_X64 | WIN_CTX_INT | WIN_CTX_SEG | WIN_CTX_CTL, .MxCsr = INITIAL_MXCSR, @@ -184,13 +182,13 @@ static void win_context_init_from_qemu_cpu_state(WinContext *ctx, * Finds paging-structure hierarchy base, * if previously set doesn't give access to kernel structures */ -static int fix_dtb(struct va_space *vs, QEMU_Elf *qe) +static bool fix_dtb(struct va_space *vs, QEMU_Elf *qe) { /* * Firstly, test previously set DTB. */ if (va_space_resolve(vs, SharedUserData)) { - return 0; + return true; } /* @@ -202,9 +200,9 @@ static int fix_dtb(struct va_space *vs, QEMU_Elf *qe) if (is_system(s)) { va_space_set_dtb(vs, s->cr[3]); - printf("DTB 0x%016lx has been found from CPU #%zu" + printf("DTB 0x%016"PRIx64" has been found from CPU #%zu" " as system task CR3\n", vs->dtb, i); - return !(va_space_resolve(vs, SharedUserData)); + return va_space_resolve(vs, SharedUserData); } } @@ -218,21 +216,58 @@ static int fix_dtb(struct va_space *vs, QEMU_Elf *qe) uint64_t *cr3 = va_space_resolve(vs, Prcb + 0x7000); if (!cr3) { - return 1; + return false; } va_space_set_dtb(vs, *cr3); - printf("DirectoryTableBase = 0x%016lx has been found from CPU #0" + printf("DirectoryTableBase = 0x%016"PRIx64" has been found from CPU #0" " as interrupt handling CR3\n", vs->dtb); - return !(va_space_resolve(vs, SharedUserData)); + return va_space_resolve(vs, SharedUserData); } - return 1; + return true; } -static int fill_header(WinDumpHeader64 *hdr, struct pa_space *ps, - struct va_space *vs, uint64_t KdDebuggerDataBlock, - KDDEBUGGER_DATA64 *kdbg, uint64_t KdVersionBlock, int nr_cpus) +static void try_merge_runs(struct pa_space *ps, + WinDumpPhyMemDesc64 *PhysicalMemoryBlock) +{ + unsigned int merge_cnt = 0, run_idx = 0; + + PhysicalMemoryBlock->NumberOfRuns = 0; + + for (size_t idx = 0; idx < ps->block_nr; idx++) { + struct pa_block *blk = ps->block + idx; + struct pa_block *next = blk + 1; + + PhysicalMemoryBlock->NumberOfPages += blk->size / ELF2DMP_PAGE_SIZE; + + if (idx + 1 != ps->block_nr && blk->paddr + blk->size == next->paddr) { + printf("Block #%zu 0x%"PRIx64"+:0x%"PRIx64" and %u previous will be" + " merged\n", idx, blk->paddr, blk->size, merge_cnt); + merge_cnt++; + } else { + struct pa_block *first_merged = blk - merge_cnt; + + printf("Block #%zu 0x%"PRIx64"+:0x%"PRIx64" and %u previous will be" + " merged to 0x%"PRIx64"+:0x%"PRIx64" (run #%u)\n", + idx, blk->paddr, blk->size, merge_cnt, first_merged->paddr, + blk->paddr + blk->size - first_merged->paddr, run_idx); + PhysicalMemoryBlock->Run[run_idx] = (WinDumpPhyMemRun64) { + .BasePage = first_merged->paddr / ELF2DMP_PAGE_SIZE, + .PageCount = (blk->paddr + blk->size - first_merged->paddr) / + ELF2DMP_PAGE_SIZE, + }; + PhysicalMemoryBlock->NumberOfRuns++; + run_idx++; + merge_cnt = 0; + } + } +} + +static bool fill_header(WinDumpHeader64 *hdr, struct pa_space *ps, + struct va_space *vs, uint64_t KdDebuggerDataBlock, + KDDEBUGGER_DATA64 *kdbg, uint64_t KdVersionBlock, + int nr_cpus) { uint32_t *suite_mask = va_space_resolve(vs, SharedUserData + KUSD_OFFSET_SUITE_MASK); @@ -240,18 +275,17 @@ static int fill_header(WinDumpHeader64 *hdr, struct pa_space *ps, KUSD_OFFSET_PRODUCT_TYPE); DBGKD_GET_VERSION64 kvb; WinDumpHeader64 h; - size_t i; - QEMU_BUILD_BUG_ON(KUSD_OFFSET_SUITE_MASK >= PAGE_SIZE); - QEMU_BUILD_BUG_ON(KUSD_OFFSET_PRODUCT_TYPE >= PAGE_SIZE); + QEMU_BUILD_BUG_ON(KUSD_OFFSET_SUITE_MASK >= ELF2DMP_PAGE_SIZE); + QEMU_BUILD_BUG_ON(KUSD_OFFSET_PRODUCT_TYPE >= ELF2DMP_PAGE_SIZE); if (!suite_mask || !product_type) { - return 1; + return false; } - if (va_space_rw(vs, KdVersionBlock, &kvb, sizeof(kvb), 0)) { + if (!va_space_rw(vs, KdVersionBlock, &kvb, sizeof(kvb), 0)) { eprintf("Failed to extract KdVersionBlock\n"); - return 1; + return false; } h = (WinDumpHeader64) { @@ -278,64 +312,116 @@ static int fill_header(WinDumpHeader64 *hdr, struct pa_space *ps, .RequiredDumpSpace = sizeof(h), }; - for (i = 0; i < ps->block_nr; i++) { - h.PhysicalMemoryBlock.NumberOfPages += ps->block[i].size / PAGE_SIZE; - h.PhysicalMemoryBlock.Run[i] = (WinDumpPhyMemRun64) { - .BasePage = ps->block[i].paddr / PAGE_SIZE, - .PageCount = ps->block[i].size / PAGE_SIZE, - }; + if (h.PhysicalMemoryBlock.NumberOfRuns <= MAX_NUMBER_OF_RUNS) { + for (size_t idx = 0; idx < ps->block_nr; idx++) { + h.PhysicalMemoryBlock.NumberOfPages += + ps->block[idx].size / ELF2DMP_PAGE_SIZE; + h.PhysicalMemoryBlock.Run[idx] = (WinDumpPhyMemRun64) { + .BasePage = ps->block[idx].paddr / ELF2DMP_PAGE_SIZE, + .PageCount = ps->block[idx].size / ELF2DMP_PAGE_SIZE, + }; + } + } else { + try_merge_runs(ps, &h.PhysicalMemoryBlock); } - h.RequiredDumpSpace += h.PhysicalMemoryBlock.NumberOfPages << PAGE_BITS; + h.RequiredDumpSpace += + h.PhysicalMemoryBlock.NumberOfPages << ELF2DMP_PAGE_BITS; *hdr = h; - return 0; + return true; } -static int fill_context(KDDEBUGGER_DATA64 *kdbg, - struct va_space *vs, QEMU_Elf *qe) +/* + * fill_context() continues even if it fails to fill contexts of some CPUs. + * A dump may still contain valuable information even if it lacks contexts of + * some CPUs due to dump corruption or a failure before starting CPUs. + */ +static void fill_context(KDDEBUGGER_DATA64 *kdbg, + struct va_space *vs, QEMU_Elf *qe) { - int i; + int i; + for (i = 0; i < qe->state_nr; i++) { uint64_t Prcb; uint64_t Context; - WinContext ctx; + WinContext64 ctx; QEMUCPUState *s = qe->state[i]; - if (va_space_rw(vs, kdbg->KiProcessorBlock + sizeof(Prcb) * i, - &Prcb, sizeof(Prcb), 0)) { + if (!va_space_rw(vs, kdbg->KiProcessorBlock + sizeof(Prcb) * i, + &Prcb, sizeof(Prcb), 0)) { eprintf("Failed to read CPU #%d PRCB location\n", i); - return 1; + continue; + } + + if (!Prcb) { + eprintf("Context for CPU #%d is missing\n", i); + continue; } - if (va_space_rw(vs, Prcb + kdbg->OffsetPrcbContext, - &Context, sizeof(Context), 0)) { + if (!va_space_rw(vs, Prcb + kdbg->OffsetPrcbContext, + &Context, sizeof(Context), 0)) { eprintf("Failed to read CPU #%d ContextFrame location\n", i); - return 1; + continue; } printf("Filling context for CPU #%d...\n", i); win_context_init_from_qemu_cpu_state(&ctx, s); - if (va_space_rw(vs, Context, &ctx, sizeof(ctx), 1)) { + if (!va_space_rw(vs, Context, &ctx, sizeof(ctx), 1)) { eprintf("Failed to fill CPU #%d context\n", i); - return 1; + continue; } } +} - return 0; +static bool pe_get_data_dir_entry(uint64_t base, void *start_addr, int idx, + void *entry, size_t size, struct va_space *vs) +{ + const char e_magic[2] = "MZ"; + const char Signature[4] = "PE\0\0"; + IMAGE_DOS_HEADER *dos_hdr = start_addr; + IMAGE_NT_HEADERS64 nt_hdrs; + IMAGE_FILE_HEADER *file_hdr = &nt_hdrs.FileHeader; + IMAGE_OPTIONAL_HEADER64 *opt_hdr = &nt_hdrs.OptionalHeader; + IMAGE_DATA_DIRECTORY *data_dir = nt_hdrs.OptionalHeader.DataDirectory; + + QEMU_BUILD_BUG_ON(sizeof(*dos_hdr) >= ELF2DMP_PAGE_SIZE); + + if (memcmp(&dos_hdr->e_magic, e_magic, sizeof(e_magic))) { + return false; + } + + if (!va_space_rw(vs, base + dos_hdr->e_lfanew, + &nt_hdrs, sizeof(nt_hdrs), 0)) { + return false; + } + + if (memcmp(&nt_hdrs.Signature, Signature, sizeof(Signature)) || + file_hdr->Machine != 0x8664 || opt_hdr->Magic != 0x020b) { + return false; + } + + if (!va_space_rw(vs, base + data_dir[idx].VirtualAddress, entry, size, 0)) { + return false; + } + + printf("Data directory entry #%d: RVA = 0x%08"PRIx32"\n", idx, + (uint32_t)data_dir[idx].VirtualAddress); + + return true; } -static int write_dump(struct pa_space *ps, - WinDumpHeader64 *hdr, const char *name) +static bool write_dump(struct pa_space *ps, + WinDumpHeader64 *hdr, const char *name) { FILE *dmp_file = fopen(name, "wb"); size_t i; if (!dmp_file) { eprintf("Failed to open output file \'%s\'\n", name); - return 1; + return false; } printf("Writing header to file...\n"); @@ -343,118 +429,86 @@ static int write_dump(struct pa_space *ps, if (fwrite(hdr, sizeof(*hdr), 1, dmp_file) != 1) { eprintf("Failed to write dump header\n"); fclose(dmp_file); - return 1; + return false; } for (i = 0; i < ps->block_nr; i++) { struct pa_block *b = &ps->block[i]; - printf("Writing block #%zu/%zu to file...\n", i, ps->block_nr); + printf("Writing block #%zu/%zu of %"PRIu64" bytes to file...\n", i, + ps->block_nr, b->size); if (fwrite(b->addr, b->size, 1, dmp_file) != 1) { - eprintf("Failed to write dump header\n"); + eprintf("Failed to write block\n"); fclose(dmp_file); - return 1; + return false; } } - return fclose(dmp_file); + return !fclose(dmp_file); } -static int pe_get_pdb_symstore_hash(uint64_t base, void *start_addr, - char *hash, struct va_space *vs) +static bool pe_check_pdb_name(uint64_t base, void *start_addr, + struct va_space *vs, OMFSignatureRSDS *rsds) { - const char e_magic[2] = "MZ"; - const char Signature[4] = "PE\0\0"; const char sign_rsds[4] = "RSDS"; - IMAGE_DOS_HEADER *dos_hdr = start_addr; - IMAGE_NT_HEADERS64 nt_hdrs; - IMAGE_FILE_HEADER *file_hdr = &nt_hdrs.FileHeader; - IMAGE_OPTIONAL_HEADER64 *opt_hdr = &nt_hdrs.OptionalHeader; - IMAGE_DATA_DIRECTORY *data_dir = nt_hdrs.OptionalHeader.DataDirectory; IMAGE_DEBUG_DIRECTORY debug_dir; - OMFSignatureRSDS rsds; - char *pdb_name; - size_t pdb_name_sz; - size_t i; + char pdb_name[sizeof(PDB_NAME)]; - QEMU_BUILD_BUG_ON(sizeof(*dos_hdr) >= PAGE_SIZE); - - if (memcmp(&dos_hdr->e_magic, e_magic, sizeof(e_magic))) { - return 1; - } - - if (va_space_rw(vs, base + dos_hdr->e_lfanew, - &nt_hdrs, sizeof(nt_hdrs), 0)) { - return 1; - } - - if (memcmp(&nt_hdrs.Signature, Signature, sizeof(Signature)) || - file_hdr->Machine != 0x8664 || opt_hdr->Magic != 0x020b) { - return 1; - } - - printf("Debug Directory RVA = 0x%016x\n", - data_dir[IMAGE_FILE_DEBUG_DIRECTORY].VirtualAddress); - - if (va_space_rw(vs, - base + data_dir[IMAGE_FILE_DEBUG_DIRECTORY].VirtualAddress, - &debug_dir, sizeof(debug_dir), 0)) { - return 1; + if (!pe_get_data_dir_entry(base, start_addr, IMAGE_FILE_DEBUG_DIRECTORY, + &debug_dir, sizeof(debug_dir), vs)) { + eprintf("Failed to get Debug Directory\n"); + return false; } if (debug_dir.Type != IMAGE_DEBUG_TYPE_CODEVIEW) { - return 1; + eprintf("Debug Directory type is not CodeView\n"); + return false; } - if (va_space_rw(vs, - base + debug_dir.AddressOfRawData, - &rsds, sizeof(rsds), 0)) { - return 1; + if (!va_space_rw(vs, base + debug_dir.AddressOfRawData, + rsds, sizeof(*rsds), 0)) { + eprintf("Failed to resolve OMFSignatureRSDS\n"); + return false; } - printf("CodeView signature is \'%.4s\'\n", rsds.Signature); - - if (memcmp(&rsds.Signature, sign_rsds, sizeof(sign_rsds))) { - return 1; + if (memcmp(&rsds->Signature, sign_rsds, sizeof(sign_rsds))) { + eprintf("CodeView signature is \'%.4s\', \'%.4s\' expected\n", + rsds->Signature, sign_rsds); + return false; } - pdb_name_sz = debug_dir.SizeOfData - sizeof(rsds); - pdb_name = malloc(pdb_name_sz); - if (!pdb_name) { - return 1; + if (debug_dir.SizeOfData - sizeof(*rsds) != sizeof(PDB_NAME)) { + eprintf("PDB name size doesn't match\n"); + return false; } - if (va_space_rw(vs, base + debug_dir.AddressOfRawData + - offsetof(OMFSignatureRSDS, name), pdb_name, pdb_name_sz, 0)) { - free(pdb_name); - return 1; + if (!va_space_rw(vs, base + debug_dir.AddressOfRawData + + offsetof(OMFSignatureRSDS, name), + pdb_name, sizeof(PDB_NAME), 0)) { + eprintf("Failed to resolve PDB name\n"); + return false; } printf("PDB name is \'%s\', \'%s\' expected\n", pdb_name, PDB_NAME); - if (strcmp(pdb_name, PDB_NAME)) { - eprintf("Unexpected PDB name, it seems the kernel isn't found\n"); - free(pdb_name); - return 1; - } - - free(pdb_name); + return !strcmp(pdb_name, PDB_NAME); +} - sprintf(hash, "%.08x%.04x%.04x%.02x%.02x", rsds.guid.a, rsds.guid.b, - rsds.guid.c, rsds.guid.d[0], rsds.guid.d[1]); +static void pe_get_pdb_symstore_hash(OMFSignatureRSDS *rsds, char *hash) +{ + sprintf(hash, "%.08x%.04x%.04x%.02x%.02x", rsds->guid.a, rsds->guid.b, + rsds->guid.c, rsds->guid.d[0], rsds->guid.d[1]); hash += 20; - for (i = 0; i < 6; i++, hash += 2) { - sprintf(hash, "%.02x", rsds.guid.e[i]); + for (unsigned int i = 0; i < 6; i++, hash += 2) { + sprintf(hash, "%.02x", rsds->guid.e[i]); } - sprintf(hash, "%.01x", rsds.age); - - return 0; + sprintf(hash, "%.01x", rsds->age); } int main(int argc, char *argv[]) { - int err = 0; + int err = 1; QEMU_Elf qemu_elf; struct pa_space ps; struct va_space vs; @@ -470,119 +524,112 @@ int main(int argc, char *argv[]) uint64_t KdDebuggerDataBlock; KDDEBUGGER_DATA64 *kdbg; uint64_t KdVersionBlock; + bool kernel_found = false; + OMFSignatureRSDS rsds; if (argc != 3) { eprintf("usage:\n\t%s elf_file dmp_file\n", argv[0]); return 1; } - if (QEMU_Elf_init(&qemu_elf, argv[1])) { + if (!QEMU_Elf_init(&qemu_elf, argv[1])) { eprintf("Failed to initialize QEMU ELF dump\n"); return 1; } - if (pa_space_create(&ps, &qemu_elf)) { - eprintf("Failed to initialize physical address space\n"); - err = 1; - goto out_elf; - } + pa_space_create(&ps, &qemu_elf); state = qemu_elf.state[0]; - printf("CPU #0 CR3 is 0x%016lx\n", state->cr[3]); + printf("CPU #0 CR3 is 0x%016"PRIx64"\n", state->cr[3]); va_space_create(&vs, &ps, state->cr[3]); - if (fix_dtb(&vs, &qemu_elf)) { + if (!fix_dtb(&vs, &qemu_elf)) { eprintf("Failed to find paging base\n"); - err = 1; - goto out_elf; + goto out_ps; } - printf("CPU #0 IDT is at 0x%016lx\n", state->idt.base); + printf("CPU #0 IDT is at 0x%016"PRIx64"\n", state->idt.base); - if (va_space_rw(&vs, state->idt.base, - &first_idt_desc, sizeof(first_idt_desc), 0)) { + if (!va_space_rw(&vs, state->idt.base, + &first_idt_desc, sizeof(first_idt_desc), 0)) { eprintf("Failed to get CPU #0 IDT[0]\n"); - err = 1; goto out_ps; } - printf("CPU #0 IDT[0] -> 0x%016lx\n", idt_desc_addr(first_idt_desc)); + printf("CPU #0 IDT[0] -> 0x%016"PRIx64"\n", idt_desc_addr(first_idt_desc)); - KernBase = idt_desc_addr(first_idt_desc) & ~(PAGE_SIZE - 1); - printf("Searching kernel downwards from 0x%16lx...\n", KernBase); + KernBase = idt_desc_addr(first_idt_desc) & ~(ELF2DMP_PAGE_SIZE - 1); + printf("Searching kernel downwards from 0x%016"PRIx64"...\n", KernBase); - for (; KernBase >= 0xfffff78000000000; KernBase -= PAGE_SIZE) { + for (; KernBase >= 0xfffff78000000000; KernBase -= ELF2DMP_PAGE_SIZE) { nt_start_addr = va_space_resolve(&vs, KernBase); if (!nt_start_addr) { continue; } if (*(uint16_t *)nt_start_addr == 0x5a4d) { /* MZ */ - break; + printf("Checking candidate KernBase = 0x%016"PRIx64"\n", KernBase); + if (pe_check_pdb_name(KernBase, nt_start_addr, &vs, &rsds)) { + kernel_found = true; + break; + } } } - printf("KernBase = 0x%16lx, signature is \'%.2s\'\n", KernBase, - (char *)nt_start_addr); - - if (pe_get_pdb_symstore_hash(KernBase, nt_start_addr, pdb_hash, &vs)) { - eprintf("Failed to get PDB symbol store hash\n"); - err = 1; + if (!kernel_found) { + eprintf("Failed to find NT kernel image\n"); goto out_ps; } + printf("KernBase = 0x%016"PRIx64", signature is \'%.2s\'\n", KernBase, + (char *)nt_start_addr); + + pe_get_pdb_symstore_hash(&rsds, pdb_hash); + sprintf(pdb_url, "%s%s/%s/%s", SYM_URL_BASE, PDB_NAME, pdb_hash, PDB_NAME); printf("PDB URL is %s\n", pdb_url); - if (download_url(PDB_NAME, pdb_url)) { + if (!download_url(PDB_NAME, pdb_url)) { eprintf("Failed to download PDB file\n"); - err = 1; goto out_ps; } - if (pdb_init_from_file(PDB_NAME, &pdb)) { + if (!pdb_init_from_file(PDB_NAME, &pdb)) { eprintf("Failed to initialize PDB reader\n"); - err = 1; goto out_pdb_file; } if (!SYM_RESOLVE(KernBase, &pdb, KdDebuggerDataBlock) || !SYM_RESOLVE(KernBase, &pdb, KdVersionBlock)) { - err = 1; goto out_pdb; } kdbg = get_kdbg(KernBase, &pdb, &vs, KdDebuggerDataBlock); if (!kdbg) { - err = 1; goto out_pdb; } - if (fill_header(&header, &ps, &vs, KdDebuggerDataBlock, kdbg, - KdVersionBlock, qemu_elf.state_nr)) { - err = 1; - goto out_pdb; + if (!fill_header(&header, &ps, &vs, KdDebuggerDataBlock, kdbg, + KdVersionBlock, qemu_elf.state_nr)) { + goto out_kdbg; } - if (fill_context(kdbg, &vs, &qemu_elf)) { - err = 1; - goto out_pdb; - } + fill_context(kdbg, &vs, &qemu_elf); - if (write_dump(&ps, &header, argv[2])) { + if (!write_dump(&ps, &header, argv[2])) { eprintf("Failed to save dump\n"); - err = 1; goto out_kdbg; } + err = 0; + out_kdbg: - free(kdbg); + g_free(kdbg); out_pdb: pdb_exit(&pdb); out_pdb_file: unlink(PDB_NAME); out_ps: pa_space_destroy(&ps); -out_elf: QEMU_Elf_exit(&qemu_elf); return err; diff --git a/contrib/elf2dmp/meson.build b/contrib/elf2dmp/meson.build new file mode 100644 index 0000000000..6707d43c4f --- /dev/null +++ b/contrib/elf2dmp/meson.build @@ -0,0 +1,5 @@ +if curl.found() + executable('elf2dmp', files('main.c', 'addrspace.c', 'download.c', 'pdb.c', 'qemu_elf.c'), genh, + dependencies: [glib, curl], + install: true) +endif diff --git a/contrib/elf2dmp/pdb.c b/contrib/elf2dmp/pdb.c index bcb01b414f..492aca4434 100644 --- a/contrib/elf2dmp/pdb.c +++ b/contrib/elf2dmp/pdb.c @@ -19,11 +19,17 @@ */ #include "qemu/osdep.h" +#include "qemu/bswap.h" + #include "pdb.h" #include "err.h" static uint32_t pdb_get_file_size(const struct pdb_reader *r, unsigned idx) { + if (idx >= r->ds.toc->num_files) { + return 0; + } + return r->ds.toc->file_size[idx]; } @@ -66,7 +72,7 @@ uint64_t pdb_find_public_v3_symbol(struct pdb_reader *r, const char *name) uint32_t sect_rva = segment->dword[1]; uint64_t rva = sect_rva + sym->public_v3.offset; - printf("%s: 0x%016x(%d:\'%.8s\') + 0x%08x = 0x%09lx\n", name, + printf("%s: 0x%016x(%d:\'%.8s\') + 0x%08x = 0x%09"PRIx64"\n", name, sect_rva, sym->public_v3.segment, ((char *)segment - 8), sym->public_v3.offset, rva); return rva; @@ -89,18 +95,18 @@ uint64_t pdb_resolve(uint64_t img_base, struct pdb_reader *r, const char *name) static void pdb_reader_ds_exit(struct pdb_reader *r) { - free(r->ds.toc); + g_free(r->ds.toc); } static void pdb_exit_symbols(struct pdb_reader *r) { - free(r->modimage); - free(r->symbols); + g_free(r->modimage); + g_free(r->symbols); } static void pdb_exit_segments(struct pdb_reader *r) { - free(r->segs); + g_free(r->segs); } static void *pdb_ds_read(const PDB_DS_HEADER *header, @@ -115,10 +121,7 @@ static void *pdb_ds_read(const PDB_DS_HEADER *header, nBlocks = (size + header->block_size - 1) / header->block_size; - buffer = malloc(nBlocks * header->block_size); - if (!buffer) { - return NULL; - } + buffer = g_malloc(nBlocks * header->block_size); for (i = 0; i < nBlocks; i++) { memcpy(buffer + i * header->block_size, (const char *)header + @@ -156,167 +159,145 @@ static void *pdb_ds_read_file(struct pdb_reader* r, uint32_t file_number) return pdb_ds_read(r->ds.header, block_list, file_size[file_number]); } -static int pdb_init_segments(struct pdb_reader *r) +static bool pdb_init_segments(struct pdb_reader *r) { - char *segs; - unsigned stream_idx = r->sidx.segments; + unsigned stream_idx = r->segments; - segs = pdb_ds_read_file(r, stream_idx); - if (!segs) { - return 1; + r->segs = pdb_ds_read_file(r, stream_idx); + if (!r->segs) { + return false; } - r->segs = segs; r->segs_size = pdb_get_file_size(r, stream_idx); + if (!r->segs_size) { + return false; + } - return 0; + return true; } -static int pdb_init_symbols(struct pdb_reader *r) +static bool pdb_init_symbols(struct pdb_reader *r) { - int err = 0; PDB_SYMBOLS *symbols; - PDB_STREAM_INDEXES *sidx = &r->sidx; - - memset(sidx, -1, sizeof(*sidx)); symbols = pdb_ds_read_file(r, 3); if (!symbols) { - return 1; + return false; } r->symbols = symbols; - if (symbols->stream_index_size != sizeof(PDB_STREAM_INDEXES)) { - err = 1; - goto out_symbols; - } - - memcpy(sidx, (const char *)symbols + sizeof(PDB_SYMBOLS) + + r->segments = lduw_le_p((const char *)symbols + sizeof(PDB_SYMBOLS) + symbols->module_size + symbols->offset_size + symbols->hash_size + symbols->srcmodule_size + - symbols->pdbimport_size + symbols->unknown2_size, sizeof(*sidx)); + symbols->pdbimport_size + symbols->unknown2_size + + offsetof(PDB_STREAM_INDEXES, segments)); /* Read global symbol table */ r->modimage = pdb_ds_read_file(r, symbols->gsym_file); if (!r->modimage) { - err = 1; goto out_symbols; } - return 0; + return true; out_symbols: - free(symbols); + g_free(symbols); - return err; + return false; } -static int pdb_reader_ds_init(struct pdb_reader *r, PDB_DS_HEADER *hdr) +static bool pdb_reader_ds_init(struct pdb_reader *r, PDB_DS_HEADER *hdr) { + if (hdr->block_size == 0) { + return false; + } + memset(r->file_used, 0, sizeof(r->file_used)); r->ds.header = hdr; r->ds.toc = pdb_ds_read(hdr, (uint32_t *)((uint8_t *)hdr + hdr->toc_page * hdr->block_size), hdr->toc_size); if (!r->ds.toc) { - return 1; + return false; } - return 0; + return true; } -static int pdb_reader_init(struct pdb_reader *r, void *data) +static bool pdb_reader_init(struct pdb_reader *r, void *data) { - int err = 0; const char pdb7[] = "Microsoft C/C++ MSF 7.00"; if (memcmp(data, pdb7, sizeof(pdb7) - 1)) { - return 1; + return false; } - if (pdb_reader_ds_init(r, data)) { - return 1; + if (!pdb_reader_ds_init(r, data)) { + return false; } r->ds.root = pdb_ds_read_file(r, 1); if (!r->ds.root) { - err = 1; goto out_ds; } - if (pdb_init_symbols(r)) { - err = 1; + if (!pdb_init_symbols(r)) { goto out_root; } - if (pdb_init_segments(r)) { - err = 1; + if (!pdb_init_segments(r)) { goto out_sym; } - return 0; + return true; out_sym: pdb_exit_symbols(r); out_root: - free(r->ds.root); + g_free(r->ds.root); out_ds: pdb_reader_ds_exit(r); - return err; + return false; } static void pdb_reader_exit(struct pdb_reader *r) { pdb_exit_segments(r); pdb_exit_symbols(r); - free(r->ds.root); + g_free(r->ds.root); pdb_reader_ds_exit(r); } -int pdb_init_from_file(const char *name, struct pdb_reader *reader) +bool pdb_init_from_file(const char *name, struct pdb_reader *reader) { - int err = 0; - int fd; + GError *gerr = NULL; void *map; - struct stat st; - fd = open(name, O_RDONLY, 0); - if (fd == -1) { - eprintf("Failed to open PDB file \'%s\'\n", name); - return 1; + reader->gmf = g_mapped_file_new(name, TRUE, &gerr); + if (gerr) { + eprintf("Failed to map PDB file \'%s\'\n", name); + g_error_free(gerr); + return false; } - reader->fd = fd; - - fstat(fd, &st); - reader->file_size = st.st_size; - map = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); - if (map == MAP_FAILED) { - eprintf("Failed to map PDB file\n"); - err = 1; - goto out_fd; - } - - if (pdb_reader_init(reader, map)) { - err = 1; + reader->file_size = g_mapped_file_get_length(reader->gmf); + map = g_mapped_file_get_contents(reader->gmf); + if (!pdb_reader_init(reader, map)) { goto out_unmap; } - return 0; + return true; out_unmap: - munmap(map, st.st_size); -out_fd: - close(fd); + g_mapped_file_unref(reader->gmf); - return err; + return false; } void pdb_exit(struct pdb_reader *reader) { - munmap(reader->ds.header, reader->file_size); - close(reader->fd); + g_mapped_file_unref(reader->gmf); pdb_reader_exit(reader); } diff --git a/contrib/elf2dmp/pdb.h b/contrib/elf2dmp/pdb.h index a3a3cac2c1..feddf1862f 100644 --- a/contrib/elf2dmp/pdb.h +++ b/contrib/elf2dmp/pdb.h @@ -9,12 +9,14 @@ #define PDB_H +#ifndef _WIN32 typedef struct GUID { unsigned int Data1; unsigned short Data2; unsigned short Data3; unsigned char Data4[8]; } GUID; +#endif struct PDB_FILE { uint32_t size; @@ -216,7 +218,7 @@ typedef struct pdb_seg { #define IMAGE_FILE_MACHINE_AMD64 0x8664 struct pdb_reader { - int fd; + GMappedFile *gmf; size_t file_size; struct { PDB_DS_HEADER *header; @@ -225,13 +227,13 @@ struct pdb_reader { } ds; uint32_t file_used[1024]; PDB_SYMBOLS *symbols; - PDB_STREAM_INDEXES sidx; + uint16_t segments; uint8_t *modimage; char *segs; size_t segs_size; }; -int pdb_init_from_file(const char *name, struct pdb_reader *reader); +bool pdb_init_from_file(const char *name, struct pdb_reader *reader); void pdb_exit(struct pdb_reader *reader); uint64_t pdb_resolve(uint64_t img_base, struct pdb_reader *r, const char *name); uint64_t pdb_find_public_v3_symbol(struct pdb_reader *reader, const char *name); diff --git a/contrib/elf2dmp/pe.h b/contrib/elf2dmp/pe.h index dafb26afbb..71126af1ac 100644 --- a/contrib/elf2dmp/pe.h +++ b/contrib/elf2dmp/pe.h @@ -9,6 +9,7 @@ #define PE_H +#ifndef _WIN32 typedef struct IMAGE_DOS_HEADER { uint16_t e_magic; /* 0x00: MZ Header signature */ uint16_t e_cblp; /* 0x02: Bytes on last page of file */ @@ -32,75 +33,91 @@ typedef struct IMAGE_DOS_HEADER { } __attribute__ ((packed)) IMAGE_DOS_HEADER; typedef struct IMAGE_FILE_HEADER { - uint16_t Machine; - uint16_t NumberOfSections; - uint32_t TimeDateStamp; - uint32_t PointerToSymbolTable; - uint32_t NumberOfSymbols; - uint16_t SizeOfOptionalHeader; - uint16_t Characteristics; + uint16_t Machine; + uint16_t NumberOfSections; + uint32_t TimeDateStamp; + uint32_t PointerToSymbolTable; + uint32_t NumberOfSymbols; + uint16_t SizeOfOptionalHeader; + uint16_t Characteristics; } __attribute__ ((packed)) IMAGE_FILE_HEADER; typedef struct IMAGE_DATA_DIRECTORY { - uint32_t VirtualAddress; - uint32_t Size; + uint32_t VirtualAddress; + uint32_t Size; } __attribute__ ((packed)) IMAGE_DATA_DIRECTORY; #define IMAGE_NUMBEROF_DIRECTORY_ENTRIES 16 typedef struct IMAGE_OPTIONAL_HEADER64 { - uint16_t Magic; /* 0x20b */ - uint8_t MajorLinkerVersion; - uint8_t MinorLinkerVersion; - uint32_t SizeOfCode; - uint32_t SizeOfInitializedData; - uint32_t SizeOfUninitializedData; - uint32_t AddressOfEntryPoint; - uint32_t BaseOfCode; - uint64_t ImageBase; - uint32_t SectionAlignment; - uint32_t FileAlignment; - uint16_t MajorOperatingSystemVersion; - uint16_t MinorOperatingSystemVersion; - uint16_t MajorImageVersion; - uint16_t MinorImageVersion; - uint16_t MajorSubsystemVersion; - uint16_t MinorSubsystemVersion; - uint32_t Win32VersionValue; - uint32_t SizeOfImage; - uint32_t SizeOfHeaders; - uint32_t CheckSum; - uint16_t Subsystem; - uint16_t DllCharacteristics; - uint64_t SizeOfStackReserve; - uint64_t SizeOfStackCommit; - uint64_t SizeOfHeapReserve; - uint64_t SizeOfHeapCommit; - uint32_t LoaderFlags; - uint32_t NumberOfRvaAndSizes; - IMAGE_DATA_DIRECTORY DataDirectory[IMAGE_NUMBEROF_DIRECTORY_ENTRIES]; + uint16_t Magic; /* 0x20b */ + uint8_t MajorLinkerVersion; + uint8_t MinorLinkerVersion; + uint32_t SizeOfCode; + uint32_t SizeOfInitializedData; + uint32_t SizeOfUninitializedData; + uint32_t AddressOfEntryPoint; + uint32_t BaseOfCode; + uint64_t ImageBase; + uint32_t SectionAlignment; + uint32_t FileAlignment; + uint16_t MajorOperatingSystemVersion; + uint16_t MinorOperatingSystemVersion; + uint16_t MajorImageVersion; + uint16_t MinorImageVersion; + uint16_t MajorSubsystemVersion; + uint16_t MinorSubsystemVersion; + uint32_t Win32VersionValue; + uint32_t SizeOfImage; + uint32_t SizeOfHeaders; + uint32_t CheckSum; + uint16_t Subsystem; + uint16_t DllCharacteristics; + uint64_t SizeOfStackReserve; + uint64_t SizeOfStackCommit; + uint64_t SizeOfHeapReserve; + uint64_t SizeOfHeapCommit; + uint32_t LoaderFlags; + uint32_t NumberOfRvaAndSizes; + IMAGE_DATA_DIRECTORY DataDirectory[IMAGE_NUMBEROF_DIRECTORY_ENTRIES]; } __attribute__ ((packed)) IMAGE_OPTIONAL_HEADER64; typedef struct IMAGE_NT_HEADERS64 { - uint32_t Signature; - IMAGE_FILE_HEADER FileHeader; - IMAGE_OPTIONAL_HEADER64 OptionalHeader; + uint32_t Signature; + IMAGE_FILE_HEADER FileHeader; + IMAGE_OPTIONAL_HEADER64 OptionalHeader; } __attribute__ ((packed)) IMAGE_NT_HEADERS64; -#define IMAGE_FILE_DEBUG_DIRECTORY 6 +typedef struct IMAGE_EXPORT_DIRECTORY { + uint32_t Characteristics; + uint32_t TimeDateStamp; + uint16_t MajorVersion; + uint16_t MinorVersion; + uint32_t Name; + uint32_t Base; + uint32_t NumberOfFunctions; + uint32_t NumberOfNames; + uint32_t AddressOfFunctions; + uint32_t AddressOfNames; + uint32_t AddressOfNameOrdinals; +} __attribute__ ((packed)) IMAGE_EXPORT_DIRECTORY; typedef struct IMAGE_DEBUG_DIRECTORY { - uint32_t Characteristics; - uint32_t TimeDateStamp; - uint16_t MajorVersion; - uint16_t MinorVersion; - uint32_t Type; - uint32_t SizeOfData; - uint32_t AddressOfRawData; - uint32_t PointerToRawData; + uint32_t Characteristics; + uint32_t TimeDateStamp; + uint16_t MajorVersion; + uint16_t MinorVersion; + uint32_t Type; + uint32_t SizeOfData; + uint32_t AddressOfRawData; + uint32_t PointerToRawData; } __attribute__ ((packed)) IMAGE_DEBUG_DIRECTORY; #define IMAGE_DEBUG_TYPE_CODEVIEW 2 +#endif + +#define IMAGE_FILE_EXPORT_DIRECTORY 0 +#define IMAGE_FILE_DEBUG_DIRECTORY 6 typedef struct guid_t { uint32_t a; diff --git a/contrib/elf2dmp/qemu_elf.c b/contrib/elf2dmp/qemu_elf.c index e9c0d2534a..c9bad6e82c 100644 --- a/contrib/elf2dmp/qemu_elf.c +++ b/contrib/elf2dmp/qemu_elf.c @@ -6,6 +6,7 @@ */ #include "qemu/osdep.h" +#include "qemu/host-utils.h" #include "err.h" #include "qemu_elf.h" @@ -15,36 +16,11 @@ #define ROUND_UP(n, d) (((n) + (d) - 1) & -(0 ? (n) : (d))) #endif -#ifndef DIV_ROUND_UP -#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) -#endif - -#define ELF_NOTE_SIZE(hdr_size, name_size, desc_size) \ - ((DIV_ROUND_UP((hdr_size), 4) + \ - DIV_ROUND_UP((name_size), 4) + \ - DIV_ROUND_UP((desc_size), 4)) * 4) - int is_system(QEMUCPUState *s) { return s->gs.base >> 63; } -static char *nhdr_get_name(Elf64_Nhdr *nhdr) -{ - return (char *)nhdr + ROUND_UP(sizeof(*nhdr), 4); -} - -static void *nhdr_get_desc(Elf64_Nhdr *nhdr) -{ - return nhdr_get_name(nhdr) + ROUND_UP(nhdr->n_namesz, 4); -} - -static Elf64_Nhdr *nhdr_get_next(Elf64_Nhdr *nhdr) -{ - return (void *)((uint8_t *)nhdr + ELF_NOTE_SIZE(sizeof(*nhdr), - nhdr->n_namesz, nhdr->n_descsz)); -} - Elf64_Phdr *elf64_getphdr(void *map) { Elf64_Ehdr *ehdr = map; @@ -60,105 +36,235 @@ Elf64_Half elf_getphdrnum(void *map) return ehdr->e_phnum; } -static int init_states(QEMU_Elf *qe) +static bool advance_note_offset(uint64_t *offsetp, uint64_t size, uint64_t end) +{ + uint64_t offset = *offsetp; + + if (uadd64_overflow(offset, size, &offset) || offset > UINT64_MAX - 3) { + return false; + } + + offset = ROUND_UP(offset, 4); + + if (offset > end) { + return false; + } + + *offsetp = offset; + + return true; +} + +static bool init_states(QEMU_Elf *qe) { Elf64_Phdr *phdr = elf64_getphdr(qe->map); - Elf64_Nhdr *start = (void *)((uint8_t *)qe->map + phdr[0].p_offset); - Elf64_Nhdr *end = (void *)((uint8_t *)start + phdr[0].p_memsz); Elf64_Nhdr *nhdr; - size_t cpu_nr = 0; + GPtrArray *states; + QEMUCPUState *state; + uint32_t state_size; + uint64_t offset; + uint64_t end_offset; + char *name; if (phdr[0].p_type != PT_NOTE) { eprintf("Failed to find PT_NOTE\n"); - return 1; + return false; } qe->has_kernel_gs_base = 1; + offset = phdr[0].p_offset; + states = g_ptr_array_new(); + + if (uadd64_overflow(offset, phdr[0].p_memsz, &end_offset) || + end_offset > qe->size) { + end_offset = qe->size; + } - for (nhdr = start; nhdr < end; nhdr = nhdr_get_next(nhdr)) { - if (!strcmp(nhdr_get_name(nhdr), QEMU_NOTE_NAME)) { - QEMUCPUState *state = nhdr_get_desc(nhdr); + while (offset < end_offset) { + nhdr = (void *)((uint8_t *)qe->map + offset); + + if (!advance_note_offset(&offset, sizeof(*nhdr), end_offset)) { + break; + } + + name = (char *)qe->map + offset; + + if (!advance_note_offset(&offset, nhdr->n_namesz, end_offset)) { + break; + } - if (state->size < sizeof(*state)) { - eprintf("CPU #%zu: QEMU CPU state size %u doesn't match\n", - cpu_nr, state->size); + state = (void *)((uint8_t *)qe->map + offset); + + if (!advance_note_offset(&offset, nhdr->n_descsz, end_offset)) { + break; + } + + if (!strcmp(name, QEMU_NOTE_NAME) && + nhdr->n_descsz >= offsetof(QEMUCPUState, kernel_gs_base)) { + state_size = MIN(state->size, nhdr->n_descsz); + + if (state_size < sizeof(*state)) { + eprintf("CPU #%u: QEMU CPU state size %u doesn't match\n", + states->len, state_size); /* * We assume either every QEMU CPU state has KERNEL_GS_BASE or * no one has. */ qe->has_kernel_gs_base = 0; } - cpu_nr++; + g_ptr_array_add(states, state); } } - printf("%zu CPU states has been found\n", cpu_nr); + printf("%u CPU states has been found\n", states->len); + + qe->state_nr = states->len; + qe->state = (void *)g_ptr_array_free(states, FALSE); - qe->state = malloc(sizeof(*qe->state) * cpu_nr); - if (!qe->state) { - return 1; + return true; +} + +static void exit_states(QEMU_Elf *qe) +{ + g_free(qe->state); +} + +static bool check_ehdr(QEMU_Elf *qe) +{ + Elf64_Ehdr *ehdr = qe->map; + uint64_t phendoff; + + if (sizeof(Elf64_Ehdr) > qe->size) { + eprintf("Invalid input dump file size\n"); + return false; } - cpu_nr = 0; + if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG)) { + eprintf("Invalid ELF signature, input file is not ELF\n"); + return false; + } - for (nhdr = start; nhdr < end; nhdr = nhdr_get_next(nhdr)) { - if (!strcmp(nhdr_get_name(nhdr), QEMU_NOTE_NAME)) { - qe->state[cpu_nr] = nhdr_get_desc(nhdr); - cpu_nr++; - } + if (ehdr->e_ident[EI_CLASS] != ELFCLASS64 || + ehdr->e_ident[EI_DATA] != ELFDATA2LSB) { + eprintf("Invalid ELF class or byte order, must be 64-bit LE\n"); + return false; } - qe->state_nr = cpu_nr; + if (ehdr->e_ident[EI_VERSION] != EV_CURRENT) { + eprintf("Invalid ELF version\n"); + return false; + } - return 0; -} + if (ehdr->e_machine != EM_X86_64) { + eprintf("Invalid input dump architecture, only x86_64 is supported\n"); + return false; + } -static void exit_states(QEMU_Elf *qe) -{ - free(qe->state); + if (ehdr->e_type != ET_CORE) { + eprintf("Invalid ELF type, must be core file\n"); + return false; + } + + /* + * ELF dump file must contain one PT_NOTE and at least one PT_LOAD to + * restore physical address space. + */ + if (ehdr->e_phnum < 2) { + eprintf("Invalid number of ELF program headers\n"); + return false; + } + + if (umul64_overflow(ehdr->e_phnum, sizeof(Elf64_Phdr), &phendoff) || + uadd64_overflow(phendoff, ehdr->e_phoff, &phendoff) || + phendoff > qe->size) { + eprintf("phdrs do not fit in file\n"); + return false; + } + + return true; } -int QEMU_Elf_init(QEMU_Elf *qe, const char *filename) +static bool QEMU_Elf_map(QEMU_Elf *qe, const char *filename) { - int err = 0; +#ifdef CONFIG_LINUX struct stat st; + int fd; - qe->fd = open(filename, O_RDONLY, 0); - if (qe->fd == -1) { + printf("Using Linux mmap\n"); + + fd = open(filename, O_RDONLY, 0); + if (fd == -1) { eprintf("Failed to open ELF dump file \'%s\'\n", filename); - return 1; + return false; } - fstat(qe->fd, &st); + if (fstat(fd, &st)) { + eprintf("Failed to get size of ELF dump file\n"); + close(fd); + return false; + } qe->size = st.st_size; qe->map = mmap(NULL, qe->size, PROT_READ | PROT_WRITE, - MAP_PRIVATE, qe->fd, 0); + MAP_PRIVATE | MAP_NORESERVE, fd, 0); if (qe->map == MAP_FAILED) { eprintf("Failed to map ELF file\n"); - err = 1; - goto out_fd; + close(fd); + return false; } - if (init_states(qe)) { - eprintf("Failed to extract QEMU CPU states\n"); - err = 1; - goto out_unmap; + close(fd); +#else + GError *gerr = NULL; + + printf("Using GLib mmap\n"); + + qe->gmf = g_mapped_file_new(filename, TRUE, &gerr); + if (gerr) { + eprintf("Failed to map ELF dump file \'%s\'\n", filename); + g_error_free(gerr); + return false; } - return 0; + qe->map = g_mapped_file_get_contents(qe->gmf); + qe->size = g_mapped_file_get_length(qe->gmf); +#endif -out_unmap: + return true; +} + +static void QEMU_Elf_unmap(QEMU_Elf *qe) +{ +#ifdef CONFIG_LINUX munmap(qe->map, qe->size); -out_fd: - close(qe->fd); +#else + g_mapped_file_unref(qe->gmf); +#endif +} - return err; +bool QEMU_Elf_init(QEMU_Elf *qe, const char *filename) +{ + if (!QEMU_Elf_map(qe, filename)) { + return false; + } + + if (!check_ehdr(qe)) { + eprintf("Input file has the wrong format\n"); + QEMU_Elf_unmap(qe); + return false; + } + + if (!init_states(qe)) { + eprintf("Failed to extract QEMU CPU states\n"); + QEMU_Elf_unmap(qe); + return false; + } + + return true; } void QEMU_Elf_exit(QEMU_Elf *qe) { exit_states(qe); - munmap(qe->map, qe->size); - close(qe->fd); + QEMU_Elf_unmap(qe); } diff --git a/contrib/elf2dmp/qemu_elf.h b/contrib/elf2dmp/qemu_elf.h index 86e6e688fb..adc50238b4 100644 --- a/contrib/elf2dmp/qemu_elf.h +++ b/contrib/elf2dmp/qemu_elf.h @@ -2,13 +2,12 @@ * Copyright (c) 2018 Virtuozzo International GmbH * * This work is licensed under the terms of the GNU GPL, version 2 or later. - * */ -#ifndef QEMU_ELF_H -#define QEMU_ELF_H +#ifndef ELF2DMP_QEMU_ELF_H +#define ELF2DMP_QEMU_ELF_H -#include <elf.h> +#include "elf.h" typedef struct QEMUCPUSegment { uint32_t selector; @@ -33,7 +32,9 @@ typedef struct QEMUCPUState { int is_system(QEMUCPUState *s); typedef struct QEMU_Elf { - int fd; +#ifndef CONFIG_LINUX + GMappedFile *gmf; +#endif size_t size; void *map; QEMUCPUState **state; @@ -41,10 +42,10 @@ typedef struct QEMU_Elf { int has_kernel_gs_base; } QEMU_Elf; -int QEMU_Elf_init(QEMU_Elf *qe, const char *filename); +bool QEMU_Elf_init(QEMU_Elf *qe, const char *filename); void QEMU_Elf_exit(QEMU_Elf *qe); Elf64_Phdr *elf64_getphdr(void *map); Elf64_Half elf_getphdrnum(void *map); -#endif /* QEMU_ELF_H */ +#endif /* ELF2DMP_QEMU_ELF_H */ diff --git a/contrib/gitdm/aliases b/contrib/gitdm/aliases index 07fd3391a5..e26b00a71d 100644 --- a/contrib/gitdm/aliases +++ b/contrib/gitdm/aliases @@ -1,6 +1,22 @@ # -# This is the email aliases file, mapping secondary addresses -# onto a single, canonical address. Duplicates some info from .mailmap +# This is the email aliases file, mapping secondary addresses onto a +# single, canonical address. It duplicates some info from .mailmap so +# if you are adding something here also consider if the .mailmap needs +# updating. +# +# If you just want to avoid gitdm complaining about author fields +# which are actually email addresses with the message: +# +# "...is an author name, probably not what you want" +# +# you can just apply --use-mailmap to you git-log command, e.g: +# +# git log --use-mailmap --numstat --since "last 2 years" | $GITDM +# +# however that will have the effect of squashing multiple addresses to +# a canonical address which will distort the stats of those who +# contribute in both personal and professional capacities from +# different addresses. # # weird commits @@ -15,6 +31,14 @@ pbrook@c046a42c-6fe2-441c-8c8c-71466251a162 paul@codesourcery.com ths@c046a42c-6fe2-441c-8c8c-71466251a162 ths@networkno.de malc@c046a42c-6fe2-441c-8c8c-71466251a162 av1474@comtv.ru +# canonical emails +liq3ea@163.com liq3ea@gmail.com + +# some broken DCO tags +yuval.shaia.ml.gmail.com yuval.shaia.ml@gmail.com +jasowang jasowang@redhat.com +nicta.com.au peter.chubb@nicta.com.au + # There is also a: # (no author) <(no author)@c046a42c-6fe2-441c-8c8c-71466251a162> # for the cvs2svn initialization commit e63c3dc74bf. diff --git a/contrib/gitdm/domain-map b/contrib/gitdm/domain-map index 0ab41ee27a..bf1dce03fd 100644 --- a/contrib/gitdm/domain-map +++ b/contrib/gitdm/domain-map @@ -4,18 +4,55 @@ # This maps email domains to nice easy to read company names # +linux.alibaba.com Alibaba +amazon.com Amazon +amazon.co.uk Amazon +amazon.de Amazon amd.com AMD +aspeedtech.com ASPEED Technology Inc. +baidu.com Baidu +bytedance.com ByteDance +cestc.cn Cestc +cmss.chinamobile.com China Mobile +citrix.com Citrix +crudebyte.com Crudebyte +chinatelecom.cn China Telecom +daynix.com Daynix +eldorado.org.br Instituto de Pesquisas Eldorado +fb.com Facebook +fujitsu.com Fujitsu +google.com Google greensocs.com GreenSocs +hisilicon.com Huawei +huawei.com Huawei ibm.com IBM igalia.com Igalia +intel.com Intel linaro.org Linaro +loongson.cn Loongson Technology +lwn.net LWN +microsoft.com Microsoft +mvista.com MontaVista nokia.com Nokia +nuviainc.com NUVIA +nvidia.com NVIDIA oracle.com Oracle proxmox.com Proxmox +quicinc.com Qualcomm Innovation Center redhat.com Red Hat +rev.ng rev.ng Labs +rivosinc.com Rivos Inc +rt-rk.com RT-RK +samsung.com Samsung siemens.com Siemens sifive.com SiFive +suse.com SUSE suse.de SUSE +syrmia.com SYRMIA +ventanamicro.com Ventana Micro Systems virtuozzo.com Virtuozzo +vrull.eu VRULL wdc.com Western Digital -xilinx.com Xilinx +windriver.com Wind River +yadro.com YADRO +yandex-team.ru Yandex diff --git a/contrib/gitdm/filetypes.txt b/contrib/gitdm/filetypes.txt index 15d6f803b9..b1d01c0992 100644 --- a/contrib/gitdm/filetypes.txt +++ b/contrib/gitdm/filetypes.txt @@ -4,7 +4,7 @@ # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or -# (at your option any later version. +# (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -12,8 +12,7 @@ # GNU Library General Public License for more details. # # You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# along with this program. If not, see <https://www.gnu.org/licenses/>. # # Authors : Gregorio Robles <grex@gsyc.escet.urjc.es> # Authors : GermĂ¡n PĂ³o-Caamaño <gpoo@gnome.org> @@ -22,7 +21,7 @@ # in the gitdm sample-config directory. # # This file contains associations parameters regarding filetypes -# (documentation, develompent, multimedia, images...) +# (documentation, development, multimedia, images...) # # format: # filetype <type> <regex> [<comment>] @@ -34,7 +33,7 @@ # If there is an filetype which is not in order but has values, it will # be added at the end. # -order build,tests,code,documentation,devel-doc,blobs +order build,interface,tests,code,documentation,devel-doc,blobs # # @@ -42,7 +41,7 @@ order build,tests,code,documentation,devel-doc,blobs # (most common languages first # filetype code \.c$ # C -filetype code \.inc.c$ # C +filetype code \.c.inc$ # C filetype code \.C$ # C++ filetype code \.cpp$ # C++ filetype code \.c\+\+$ # C++ @@ -59,8 +58,8 @@ filetype code \.s$ # Assembly filetype code \.S$ # Assembly filetype code \.asm$ # Assembly filetype code \.awk$ # awk -filetype code ^common$ # script fragements -filetype code ^common.*$ # script fragements +filetype code ^common$ # script fragments +filetype code ^common.*$ # script fragments filetype code (qom|qmp)-\w+$ # python script fragments # diff --git a/contrib/gitdm/group-map-academics b/contrib/gitdm/group-map-academics index 08f9d81d13..082458e1bd 100644 --- a/contrib/gitdm/group-map-academics +++ b/contrib/gitdm/group-map-academics @@ -12,3 +12,16 @@ ispras.ru # Columbia University cs.columbia.edu cota@braap.org + +uni-paderborn.de +edu +edu.cn + +# Boston University +bu.edu + +# Institute of Software Chinese Academy of Sciences +iscas.ac.cn + +# UniversitĂ© Grenoble Alpes +univ-grenoble-alpes.fr diff --git a/contrib/gitdm/group-map-alibaba b/contrib/gitdm/group-map-alibaba new file mode 100644 index 0000000000..4c34446d34 --- /dev/null +++ b/contrib/gitdm/group-map-alibaba @@ -0,0 +1,7 @@ +# +# Alibaba contributors including its subsidiaries +# + +# c-sky.com, now part of T-Head, wholly-owned entity of Alibaba Group +ren_guo@c-sky.com +zhiwei_liu@c-sky.com diff --git a/contrib/gitdm/group-map-amd b/contrib/gitdm/group-map-amd new file mode 100644 index 0000000000..bda4239a8a --- /dev/null +++ b/contrib/gitdm/group-map-amd @@ -0,0 +1,8 @@ +# AMD acquired Xilinx and contributors have been slowly updating emails + +edgar.iglesias@xilinx.com +fnu.vikram@xilinx.com +francisco.iglesias@xilinx.com +sai.pavan.boddu@xilinx.com +stefano.stabellini@xilinx.com +tong.ho@xilinx.com diff --git a/contrib/gitdm/group-map-facebook b/contrib/gitdm/group-map-facebook new file mode 100644 index 0000000000..38589f8fb9 --- /dev/null +++ b/contrib/gitdm/group-map-facebook @@ -0,0 +1,5 @@ +# +# Some Facebook contributors also occasionally use personal email addresses. +# + +peter@pjd.dev diff --git a/contrib/gitdm/group-map-ibm b/contrib/gitdm/group-map-ibm index 22727319b3..24d8dc1b86 100644 --- a/contrib/gitdm/group-map-ibm +++ b/contrib/gitdm/group-map-ibm @@ -11,3 +11,5 @@ groug@kaod.org jcfaracco@gmail.com joel@jms.id.au sjitindarsingh@gmail.com +tommusta@gmail.com +idan.horowitz@gmail.com diff --git a/contrib/gitdm/group-map-individuals b/contrib/gitdm/group-map-individuals index afdbe7d460..d7116f5444 100644 --- a/contrib/gitdm/group-map-individuals +++ b/contrib/gitdm/group-map-individuals @@ -2,9 +2,43 @@ # Individual and personal contributors # # This is simply to allow prolific developers with no company -# affiliations to be grouped together in the summary stats. +# affiliations (or non-company related personal work) to be grouped +# together in the summary stats. # f4bug@amsat.org mjt@tls.msk.ru mark.cave-ayland@ilande.co.uk +rth@twiddle.net +noring@nocrew.org +samuel.thibault@ens-lyon.org +aurelien@aurel32.net +balaton@eik.bme.hu +e.emanuelegiuseppe@gmail.com +andrew.smirnov@gmail.com +sw@weilnetz.de +deller@gmx.de +fthain@telegraphics.com.au +vr_qemu@t-online.de +nieklinnenbank@gmail.com +devnexen@gmail.com +pauldzim@gmail.com +ani@anisinha.ca +sundeep.lkml@gmail.com +mrolnik@gmail.com +huth@tuxfamily.org +jhogan@kernel.org +atar4qemu@gmail.com +minwoo.im.dev@gmail.com +bmeng.cn@gmail.com +liq3ea@gmail.com +chetan4windows@gmail.com +akihiko.odaki@gmail.com +paul@nowt.org +git@xen0n.name +simon@simonsafar.com +research_trasio@irq.a4lg.com +shentey@gmail.com +bmeng@tinylab.org +strahinja.p.jankovic@gmail.com +Jason@zx2c4.com diff --git a/contrib/gitdm/group-map-interns b/contrib/gitdm/group-map-interns new file mode 100644 index 0000000000..fe33a3231e --- /dev/null +++ b/contrib/gitdm/group-map-interns @@ -0,0 +1,13 @@ +# +# Group together everyone working as an intern via one of the various +# outreach programs. +# + +# GSoC 2020 Virtual FIDO/U2F security key +cesar.belley@lse.epita.fr + +# GSoC 2020 TCG performance +ahmedkhaledkaraman@gmail.com + +# GSoC 2021 TCG plugins +ma.mandourr@gmail.com diff --git a/contrib/gitdm/group-map-janustech b/contrib/gitdm/group-map-janustech new file mode 100644 index 0000000000..4ae7cc24f2 --- /dev/null +++ b/contrib/gitdm/group-map-janustech @@ -0,0 +1,5 @@ +# +# Janus Technologies contributors using non-corporate email +# + +marcel.apfelbaum@gmail.com diff --git a/contrib/gitdm/group-map-netflix b/contrib/gitdm/group-map-netflix new file mode 100644 index 0000000000..468f95dcb2 --- /dev/null +++ b/contrib/gitdm/group-map-netflix @@ -0,0 +1,5 @@ +# +# Netflix contributors using their personal emails +# + +imp@bsdimp.com diff --git a/contrib/gitdm/group-map-redhat b/contrib/gitdm/group-map-redhat index 6d05c6b54f..02507b7b53 100644 --- a/contrib/gitdm/group-map-redhat +++ b/contrib/gitdm/group-map-redhat @@ -5,3 +5,5 @@ david@gibson.dropbear.id.au laurent@vivier.eu pjp@fedoraproject.org +armbru@pond.sub.org +nirsof@gmail.com diff --git a/contrib/gitdm/group-map-robots b/contrib/gitdm/group-map-robots new file mode 100644 index 0000000000..ffd956c2eb --- /dev/null +++ b/contrib/gitdm/group-map-robots @@ -0,0 +1,7 @@ +# +# There are various automatic robots that occasionally scan and report +# bugs. Let's group them together here. +# + +# Euler Robot +euler.robot@huawei.com diff --git a/contrib/gitdm/group-map-wavecomp b/contrib/gitdm/group-map-wavecomp index 2801a966b6..c5c57f0eaf 100644 --- a/contrib/gitdm/group-map-wavecomp +++ b/contrib/gitdm/group-map-wavecomp @@ -5,13 +5,25 @@ aleksandar.markovic@imgtec.com aleksandar.markovic@mips.com +alex.smith@imgtec.com +andrew.bennett@imgtec.com amarkovic@wavecomp.com arikalo@wavecomp.com +chris@mips.com dnikolic@wavecomp.com +ericj@mips.com +goran.ferenc@imgtec.com +james.cowgill@mips.com +james.hogan@imgtec.com james.hogan@mips.com leon.alrae@imgtec.com +matt.redfearn@imgtec.com matthew.fortune@mips.com +miodrag.dinic@imgtec.com +paul@archlinuxmips.org paul.burton@imgtec.com +petar.jovanovic@imgtec.com +petarj@mips.com pburton@wavecomp.com smarkovic@wavecomp.com yongbok.kim@imgtec.com diff --git a/contrib/ivshmem-client/Makefile.objs b/contrib/ivshmem-client/Makefile.objs deleted file mode 100644 index bfab2d20dd..0000000000 --- a/contrib/ivshmem-client/Makefile.objs +++ /dev/null @@ -1 +0,0 @@ -ivshmem-client-obj-y = ivshmem-client.o main.o diff --git a/contrib/ivshmem-client/ivshmem-client.c b/contrib/ivshmem-client/ivshmem-client.c index 44ae3646e1..182c79d27c 100644 --- a/contrib/ivshmem-client/ivshmem-client.c +++ b/contrib/ivshmem-client/ivshmem-client.c @@ -10,7 +10,6 @@ #include <sys/socket.h> #include <sys/un.h> -#include "qemu-common.h" #include "qemu/queue.h" #include "ivshmem-client.h" @@ -179,7 +178,7 @@ ivshmem_client_init(IvshmemClient *client, const char *unix_sock_path, int ivshmem_client_connect(IvshmemClient *client) { - struct sockaddr_un sun; + struct sockaddr_un s_un; int fd, ret; int64_t tmp; @@ -193,16 +192,16 @@ ivshmem_client_connect(IvshmemClient *client) return -1; } - sun.sun_family = AF_UNIX; - ret = snprintf(sun.sun_path, sizeof(sun.sun_path), "%s", + s_un.sun_family = AF_UNIX; + ret = snprintf(s_un.sun_path, sizeof(s_un.sun_path), "%s", client->unix_sock_path); - if (ret < 0 || ret >= sizeof(sun.sun_path)) { + if (ret < 0 || ret >= sizeof(s_un.sun_path)) { IVSHMEM_CLIENT_DEBUG(client, "could not copy unix socket path\n"); goto err_close; } - if (connect(client->sock_fd, (struct sockaddr *)&sun, sizeof(sun)) < 0) { - IVSHMEM_CLIENT_DEBUG(client, "cannot connect to %s: %s\n", sun.sun_path, + if (connect(client->sock_fd, (struct sockaddr *)&s_un, sizeof(s_un)) < 0) { + IVSHMEM_CLIENT_DEBUG(client, "cannot connect to %s: %s\n", s_un.sun_path, strerror(errno)); goto err_close; } diff --git a/contrib/ivshmem-client/ivshmem-client.h b/contrib/ivshmem-client/ivshmem-client.h index fe3cc4a03d..fc45a38060 100644 --- a/contrib/ivshmem-client/ivshmem-client.h +++ b/contrib/ivshmem-client/ivshmem-client.h @@ -174,7 +174,7 @@ int ivshmem_client_notify_all_vects(const IvshmemClient *client, const IvshmemClientPeer *peer); /** - * Broadcat a notification to all vectors of all peers + * Broadcast a notification to all vectors of all peers * * @client: The ivshmem client * diff --git a/contrib/ivshmem-client/main.c b/contrib/ivshmem-client/main.c index 33ae1daa15..21f38f3fec 100644 --- a/contrib/ivshmem-client/main.c +++ b/contrib/ivshmem-client/main.c @@ -7,7 +7,6 @@ */ #include "qemu/osdep.h" -#include "qemu-common.h" #include "ivshmem-client.h" diff --git a/contrib/ivshmem-client/meson.build b/contrib/ivshmem-client/meson.build new file mode 100644 index 0000000000..3c8b09af4b --- /dev/null +++ b/contrib/ivshmem-client/meson.build @@ -0,0 +1,4 @@ +executable('ivshmem-client', files('ivshmem-client.c', 'main.c'), genh, + dependencies: glib, + build_by_default: host_os == 'linux', + install: false) diff --git a/contrib/ivshmem-server/Makefile.objs b/contrib/ivshmem-server/Makefile.objs deleted file mode 100644 index c060dd3698..0000000000 --- a/contrib/ivshmem-server/Makefile.objs +++ /dev/null @@ -1 +0,0 @@ -ivshmem-server-obj-y = ivshmem-server.o main.o diff --git a/contrib/ivshmem-server/ivshmem-server.c b/contrib/ivshmem-server/ivshmem-server.c index e2f295bd43..2f3c7320a6 100644 --- a/contrib/ivshmem-server/ivshmem-server.c +++ b/contrib/ivshmem-server/ivshmem-server.c @@ -6,7 +6,6 @@ * top-level directory. */ #include "qemu/osdep.h" -#include "qemu-common.h" #include "qemu/host-utils.h" #include "qemu/sockets.h" @@ -147,7 +146,7 @@ ivshmem_server_handle_new_conn(IvshmemServer *server) return -1; } - qemu_set_nonblock(newfd); + qemu_socket_set_nonblock(newfd); IVSHMEM_SERVER_DEBUG(server, "accept()=%d\n", newfd); /* allocate new structure for this peer */ @@ -289,7 +288,7 @@ ivshmem_server_init(IvshmemServer *server, const char *unix_sock_path, int ivshmem_server_start(IvshmemServer *server) { - struct sockaddr_un sun; + struct sockaddr_un s_un; int shm_fd, sock_fd, ret; /* open shm file */ @@ -328,15 +327,15 @@ ivshmem_server_start(IvshmemServer *server) goto err_close_shm; } - sun.sun_family = AF_UNIX; - ret = snprintf(sun.sun_path, sizeof(sun.sun_path), "%s", + s_un.sun_family = AF_UNIX; + ret = snprintf(s_un.sun_path, sizeof(s_un.sun_path), "%s", server->unix_sock_path); - if (ret < 0 || ret >= sizeof(sun.sun_path)) { + if (ret < 0 || ret >= sizeof(s_un.sun_path)) { IVSHMEM_SERVER_DEBUG(server, "could not copy unix socket path\n"); goto err_close_sock; } - if (bind(sock_fd, (struct sockaddr *)&sun, sizeof(sun)) < 0) { - IVSHMEM_SERVER_DEBUG(server, "cannot connect to %s: %s\n", sun.sun_path, + if (bind(sock_fd, (struct sockaddr *)&s_un, sizeof(s_un)) < 0) { + IVSHMEM_SERVER_DEBUG(server, "cannot connect to %s: %s\n", s_un.sun_path, strerror(errno)); goto err_close_sock; } @@ -354,6 +353,9 @@ ivshmem_server_start(IvshmemServer *server) err_close_sock: close(sock_fd); err_close_shm: + if (server->use_shm_open) { + shm_unlink(server->shm_path); + } close(shm_fd); return -1; } @@ -371,6 +373,9 @@ ivshmem_server_close(IvshmemServer *server) } unlink(server->unix_sock_path); + if (server->use_shm_open) { + shm_unlink(server->shm_path); + } close(server->sock_fd); close(server->shm_fd); server->sock_fd = -1; diff --git a/contrib/ivshmem-server/main.c b/contrib/ivshmem-server/main.c index 197c79c57e..5901f17707 100644 --- a/contrib/ivshmem-server/main.c +++ b/contrib/ivshmem-server/main.c @@ -17,7 +17,7 @@ #define IVSHMEM_SERVER_DEFAULT_PID_FILE "/var/run/ivshmem-server.pid" #define IVSHMEM_SERVER_DEFAULT_UNIX_SOCK_PATH "/tmp/ivshmem_socket" #define IVSHMEM_SERVER_DEFAULT_SHM_PATH "ivshmem" -#define IVSHMEM_SERVER_DEFAULT_SHM_SIZE (4*1024*1024) +#define IVSHMEM_SERVER_DEFAULT_SHM_SIZE (4 * 1024 * 1024) #define IVSHMEM_SERVER_DEFAULT_N_VECTORS 1 /* used to quit on signal SIGTERM */ @@ -69,7 +69,7 @@ static void ivshmem_server_parse_args(IvshmemServerArgs *args, int argc, char *argv[]) { int c; - unsigned long long v; + uint64_t v; Error *err = NULL; while ((c = getopt(argc, argv, "hvFp:S:m:M:l:n:")) != -1) { @@ -103,8 +103,8 @@ ivshmem_server_parse_args(IvshmemServerArgs *args, int argc, char *argv[]) break; case 'l': /* shm size */ - parse_option_size("shm_size", optarg, &args->shm_size, &err); - if (err) { + if (!parse_option_size("shm_size", optarg, &args->shm_size, + &err)) { error_report_err(err); ivshmem_server_help(argv[0]); exit(1); @@ -112,7 +112,7 @@ ivshmem_server_parse_args(IvshmemServerArgs *args, int argc, char *argv[]) break; case 'n': /* number of vectors */ - if (parse_uint_full(optarg, &v, 0) < 0) { + if (parse_uint_full(optarg, 0, &v) < 0) { fprintf(stderr, "cannot parse n_vectors\n"); ivshmem_server_help(argv[0]); exit(1); @@ -223,8 +223,9 @@ main(int argc, char *argv[]) sa_quit.sa_handler = ivshmem_server_quit_cb; sa_quit.sa_flags = 0; if (sigemptyset(&sa_quit.sa_mask) == -1 || - sigaction(SIGTERM, &sa_quit, 0) == -1) { - perror("failed to add SIGTERM handler; sigaction"); + sigaction(SIGTERM, &sa_quit, 0) == -1 || + sigaction(SIGINT, &sa_quit, 0) == -1) { + perror("failed to add signal handler; sigaction"); goto err; } diff --git a/contrib/ivshmem-server/meson.build b/contrib/ivshmem-server/meson.build new file mode 100644 index 0000000000..1c8fea6594 --- /dev/null +++ b/contrib/ivshmem-server/meson.build @@ -0,0 +1,4 @@ +executable('ivshmem-server', files('ivshmem-server.c', 'main.c'), genh, + dependencies: [qemuutil, rt], + build_by_default: host_os == 'linux', + install: false) diff --git a/contrib/libvhost-user/Makefile.objs b/contrib/libvhost-user/Makefile.objs deleted file mode 100644 index ef3778edd4..0000000000 --- a/contrib/libvhost-user/Makefile.objs +++ /dev/null @@ -1 +0,0 @@ -libvhost-user-obj-y += libvhost-user.o libvhost-user-glib.o diff --git a/contrib/libvhost-user/libvhost-user-glib.c b/contrib/libvhost-user/libvhost-user-glib.c deleted file mode 100644 index 545f089587..0000000000 --- a/contrib/libvhost-user/libvhost-user-glib.c +++ /dev/null @@ -1,154 +0,0 @@ -/* - * Vhost User library - * - * Copyright (c) 2016 Nutanix Inc. All rights reserved. - * Copyright (c) 2017 Red Hat, Inc. - * - * Authors: - * Marc-AndrĂ© Lureau <mlureau@redhat.com> - * Felipe Franciosi <felipe@nutanix.com> - * - * This work is licensed under the terms of the GNU GPL, version 2 or - * later. See the COPYING file in the top-level directory. - */ - -#include "qemu/osdep.h" - -#include "libvhost-user-glib.h" - -/* glib event loop integration for libvhost-user and misc callbacks */ - -G_STATIC_ASSERT((int)G_IO_IN == (int)VU_WATCH_IN); -G_STATIC_ASSERT((int)G_IO_OUT == (int)VU_WATCH_OUT); -G_STATIC_ASSERT((int)G_IO_PRI == (int)VU_WATCH_PRI); -G_STATIC_ASSERT((int)G_IO_ERR == (int)VU_WATCH_ERR); -G_STATIC_ASSERT((int)G_IO_HUP == (int)VU_WATCH_HUP); - -typedef struct VugSrc { - GSource parent; - VuDev *dev; - GPollFD gfd; -} VugSrc; - -static gboolean -vug_src_prepare(GSource *gsrc, gint *timeout) -{ - g_assert(timeout); - - *timeout = -1; - return FALSE; -} - -static gboolean -vug_src_check(GSource *gsrc) -{ - VugSrc *src = (VugSrc *)gsrc; - - g_assert(src); - - return src->gfd.revents & src->gfd.events; -} - -static gboolean -vug_src_dispatch(GSource *gsrc, GSourceFunc cb, gpointer data) -{ - VugSrc *src = (VugSrc *)gsrc; - - g_assert(src); - - ((vu_watch_cb)cb)(src->dev, src->gfd.revents, data); - - return G_SOURCE_CONTINUE; -} - -static GSourceFuncs vug_src_funcs = { - vug_src_prepare, - vug_src_check, - vug_src_dispatch, - NULL -}; - -static GSource * -vug_source_new(VuDev *dev, int fd, GIOCondition cond, - vu_watch_cb vu_cb, gpointer data) -{ - GSource *gsrc; - VugSrc *src; - guint id; - - g_assert(dev); - g_assert(fd >= 0); - g_assert(vu_cb); - - gsrc = g_source_new(&vug_src_funcs, sizeof(VugSrc)); - g_source_set_callback(gsrc, (GSourceFunc)vu_cb, data, NULL); - src = (VugSrc *)gsrc; - src->dev = dev; - src->gfd.fd = fd; - src->gfd.events = cond; - - g_source_add_poll(gsrc, &src->gfd); - id = g_source_attach(gsrc, NULL); - g_assert(id); - g_source_unref(gsrc); - - return gsrc; -} - -static void -set_watch(VuDev *vu_dev, int fd, int vu_evt, vu_watch_cb cb, void *pvt) -{ - GSource *src; - VugDev *dev; - - g_assert(vu_dev); - g_assert(fd >= 0); - g_assert(cb); - - dev = container_of(vu_dev, VugDev, parent); - src = vug_source_new(vu_dev, fd, vu_evt, cb, pvt); - g_hash_table_replace(dev->fdmap, GINT_TO_POINTER(fd), src); -} - -static void -remove_watch(VuDev *vu_dev, int fd) -{ - VugDev *dev; - - g_assert(vu_dev); - g_assert(fd >= 0); - - dev = container_of(vu_dev, VugDev, parent); - g_hash_table_remove(dev->fdmap, GINT_TO_POINTER(fd)); -} - - -static void vug_watch(VuDev *dev, int condition, void *data) -{ - if (!vu_dispatch(dev) != 0) { - dev->panic(dev, "Error processing vhost message"); - } -} - -void -vug_init(VugDev *dev, int socket, - vu_panic_cb panic, const VuDevIface *iface) -{ - g_assert(dev); - g_assert(iface); - - vu_init(&dev->parent, socket, panic, set_watch, remove_watch, iface); - dev->fdmap = g_hash_table_new_full(NULL, NULL, NULL, - (GDestroyNotify) g_source_destroy); - - dev->src = vug_source_new(&dev->parent, socket, G_IO_IN, vug_watch, NULL); -} - -void -vug_deinit(VugDev *dev) -{ - g_assert(dev); - - g_hash_table_unref(dev->fdmap); - g_source_unref(dev->src); -} diff --git a/contrib/libvhost-user/libvhost-user-glib.h b/contrib/libvhost-user/libvhost-user-glib.h deleted file mode 100644 index 6b2110b94c..0000000000 --- a/contrib/libvhost-user/libvhost-user-glib.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Vhost User library - * - * Copyright (c) 2016 Nutanix Inc. All rights reserved. - * Copyright (c) 2017 Red Hat, Inc. - * - * Authors: - * Marc-AndrĂ© Lureau <mlureau@redhat.com> - * Felipe Franciosi <felipe@nutanix.com> - * - * This work is licensed under the terms of the GNU GPL, version 2 or - * later. See the COPYING file in the top-level directory. - */ - -#ifndef LIBVHOST_USER_GLIB_H -#define LIBVHOST_USER_GLIB_H - -#include <glib.h> -#include "libvhost-user.h" - -typedef struct VugDev { - VuDev parent; - - GHashTable *fdmap; /* fd -> gsource */ - GSource *src; -} VugDev; - -void vug_init(VugDev *dev, int socket, - vu_panic_cb panic, const VuDevIface *iface); -void vug_deinit(VugDev *dev); - -#endif /* LIBVHOST_USER_GLIB_H */ diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c deleted file mode 100644 index a6b46cdc03..0000000000 --- a/contrib/libvhost-user/libvhost-user.c +++ /dev/null @@ -1,2110 +0,0 @@ -/* - * Vhost User library - * - * Copyright IBM, Corp. 2007 - * Copyright (c) 2016 Red Hat, Inc. - * - * Authors: - * Anthony Liguori <aliguori@us.ibm.com> - * Marc-AndrĂ© Lureau <mlureau@redhat.com> - * Victor Kaplansky <victork@redhat.com> - * - * This work is licensed under the terms of the GNU GPL, version 2 or - * later. See the COPYING file in the top-level directory. - */ - -/* this code avoids GLib dependency */ -#include <stdlib.h> -#include <stdio.h> -#include <unistd.h> -#include <stdarg.h> -#include <errno.h> -#include <string.h> -#include <assert.h> -#include <inttypes.h> -#include <sys/types.h> -#include <sys/socket.h> -#include <sys/eventfd.h> -#include <sys/mman.h> -#include "qemu/compiler.h" - -#if defined(__linux__) -#include <sys/syscall.h> -#include <fcntl.h> -#include <sys/ioctl.h> -#include <linux/vhost.h> - -#ifdef __NR_userfaultfd -#include <linux/userfaultfd.h> -#endif - -#endif - -#include "qemu/atomic.h" - -#include "libvhost-user.h" - -/* usually provided by GLib */ -#ifndef MIN -#define MIN(x, y) ({ \ - typeof(x) _min1 = (x); \ - typeof(y) _min2 = (y); \ - (void) (&_min1 == &_min2); \ - _min1 < _min2 ? _min1 : _min2; }) -#endif - -#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64) - -/* The version of the protocol we support */ -#define VHOST_USER_VERSION 1 -#define LIBVHOST_USER_DEBUG 0 - -#define DPRINT(...) \ - do { \ - if (LIBVHOST_USER_DEBUG) { \ - fprintf(stderr, __VA_ARGS__); \ - } \ - } while (0) - -static const char * -vu_request_to_string(unsigned int req) -{ -#define REQ(req) [req] = #req - static const char *vu_request_str[] = { - REQ(VHOST_USER_NONE), - REQ(VHOST_USER_GET_FEATURES), - REQ(VHOST_USER_SET_FEATURES), - REQ(VHOST_USER_SET_OWNER), - REQ(VHOST_USER_RESET_OWNER), - REQ(VHOST_USER_SET_MEM_TABLE), - REQ(VHOST_USER_SET_LOG_BASE), - REQ(VHOST_USER_SET_LOG_FD), - REQ(VHOST_USER_SET_VRING_NUM), - REQ(VHOST_USER_SET_VRING_ADDR), - REQ(VHOST_USER_SET_VRING_BASE), - REQ(VHOST_USER_GET_VRING_BASE), - REQ(VHOST_USER_SET_VRING_KICK), - REQ(VHOST_USER_SET_VRING_CALL), - REQ(VHOST_USER_SET_VRING_ERR), - REQ(VHOST_USER_GET_PROTOCOL_FEATURES), - REQ(VHOST_USER_SET_PROTOCOL_FEATURES), - REQ(VHOST_USER_GET_QUEUE_NUM), - REQ(VHOST_USER_SET_VRING_ENABLE), - REQ(VHOST_USER_SEND_RARP), - REQ(VHOST_USER_NET_SET_MTU), - REQ(VHOST_USER_SET_SLAVE_REQ_FD), - REQ(VHOST_USER_IOTLB_MSG), - REQ(VHOST_USER_SET_VRING_ENDIAN), - REQ(VHOST_USER_GET_CONFIG), - REQ(VHOST_USER_SET_CONFIG), - REQ(VHOST_USER_POSTCOPY_ADVISE), - REQ(VHOST_USER_POSTCOPY_LISTEN), - REQ(VHOST_USER_POSTCOPY_END), - REQ(VHOST_USER_MAX), - }; -#undef REQ - - if (req < VHOST_USER_MAX) { - return vu_request_str[req]; - } else { - return "unknown"; - } -} - -static void -vu_panic(VuDev *dev, const char *msg, ...) -{ - char *buf = NULL; - va_list ap; - - va_start(ap, msg); - if (vasprintf(&buf, msg, ap) < 0) { - buf = NULL; - } - va_end(ap); - - dev->broken = true; - dev->panic(dev, buf); - free(buf); - - /* FIXME: find a way to call virtio_error? */ -} - -/* Translate guest physical address to our virtual address. */ -void * -vu_gpa_to_va(VuDev *dev, uint64_t *plen, uint64_t guest_addr) -{ - int i; - - if (*plen == 0) { - return NULL; - } - - /* Find matching memory region. */ - for (i = 0; i < dev->nregions; i++) { - VuDevRegion *r = &dev->regions[i]; - - if ((guest_addr >= r->gpa) && (guest_addr < (r->gpa + r->size))) { - if ((guest_addr + *plen) > (r->gpa + r->size)) { - *plen = r->gpa + r->size - guest_addr; - } - return (void *)(uintptr_t) - guest_addr - r->gpa + r->mmap_addr + r->mmap_offset; - } - } - - return NULL; -} - -/* Translate qemu virtual address to our virtual address. */ -static void * -qva_to_va(VuDev *dev, uint64_t qemu_addr) -{ - int i; - - /* Find matching memory region. */ - for (i = 0; i < dev->nregions; i++) { - VuDevRegion *r = &dev->regions[i]; - - if ((qemu_addr >= r->qva) && (qemu_addr < (r->qva + r->size))) { - return (void *)(uintptr_t) - qemu_addr - r->qva + r->mmap_addr + r->mmap_offset; - } - } - - return NULL; -} - -static void -vmsg_close_fds(VhostUserMsg *vmsg) -{ - int i; - - for (i = 0; i < vmsg->fd_num; i++) { - close(vmsg->fds[i]); - } -} - -/* A test to see if we have userfault available */ -static bool -have_userfault(void) -{ -#if defined(__linux__) && defined(__NR_userfaultfd) &&\ - defined(UFFD_FEATURE_MISSING_SHMEM) &&\ - defined(UFFD_FEATURE_MISSING_HUGETLBFS) - /* Now test the kernel we're running on really has the features */ - int ufd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); - struct uffdio_api api_struct; - if (ufd < 0) { - return false; - } - - api_struct.api = UFFD_API; - api_struct.features = UFFD_FEATURE_MISSING_SHMEM | - UFFD_FEATURE_MISSING_HUGETLBFS; - if (ioctl(ufd, UFFDIO_API, &api_struct)) { - close(ufd); - return false; - } - close(ufd); - return true; - -#else - return false; -#endif -} - -static bool -vu_message_read(VuDev *dev, int conn_fd, VhostUserMsg *vmsg) -{ - char control[CMSG_SPACE(VHOST_MEMORY_MAX_NREGIONS * sizeof(int))] = { }; - struct iovec iov = { - .iov_base = (char *)vmsg, - .iov_len = VHOST_USER_HDR_SIZE, - }; - struct msghdr msg = { - .msg_iov = &iov, - .msg_iovlen = 1, - .msg_control = control, - .msg_controllen = sizeof(control), - }; - size_t fd_size; - struct cmsghdr *cmsg; - int rc; - - do { - rc = recvmsg(conn_fd, &msg, 0); - } while (rc < 0 && (errno == EINTR || errno == EAGAIN)); - - if (rc < 0) { - vu_panic(dev, "Error while recvmsg: %s", strerror(errno)); - return false; - } - - vmsg->fd_num = 0; - for (cmsg = CMSG_FIRSTHDR(&msg); - cmsg != NULL; - cmsg = CMSG_NXTHDR(&msg, cmsg)) - { - if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) { - fd_size = cmsg->cmsg_len - CMSG_LEN(0); - vmsg->fd_num = fd_size / sizeof(int); - memcpy(vmsg->fds, CMSG_DATA(cmsg), fd_size); - break; - } - } - - if (vmsg->size > sizeof(vmsg->payload)) { - vu_panic(dev, - "Error: too big message request: %d, size: vmsg->size: %u, " - "while sizeof(vmsg->payload) = %zu\n", - vmsg->request, vmsg->size, sizeof(vmsg->payload)); - goto fail; - } - - if (vmsg->size) { - do { - rc = read(conn_fd, &vmsg->payload, vmsg->size); - } while (rc < 0 && (errno == EINTR || errno == EAGAIN)); - - if (rc <= 0) { - vu_panic(dev, "Error while reading: %s", strerror(errno)); - goto fail; - } - - assert(rc == vmsg->size); - } - - return true; - -fail: - vmsg_close_fds(vmsg); - - return false; -} - -static bool -vu_message_write(VuDev *dev, int conn_fd, VhostUserMsg *vmsg) -{ - int rc; - uint8_t *p = (uint8_t *)vmsg; - char control[CMSG_SPACE(VHOST_MEMORY_MAX_NREGIONS * sizeof(int))] = { }; - struct iovec iov = { - .iov_base = (char *)vmsg, - .iov_len = VHOST_USER_HDR_SIZE, - }; - struct msghdr msg = { - .msg_iov = &iov, - .msg_iovlen = 1, - .msg_control = control, - }; - struct cmsghdr *cmsg; - - memset(control, 0, sizeof(control)); - assert(vmsg->fd_num <= VHOST_MEMORY_MAX_NREGIONS); - if (vmsg->fd_num > 0) { - size_t fdsize = vmsg->fd_num * sizeof(int); - msg.msg_controllen = CMSG_SPACE(fdsize); - cmsg = CMSG_FIRSTHDR(&msg); - cmsg->cmsg_len = CMSG_LEN(fdsize); - cmsg->cmsg_level = SOL_SOCKET; - cmsg->cmsg_type = SCM_RIGHTS; - memcpy(CMSG_DATA(cmsg), vmsg->fds, fdsize); - } else { - msg.msg_controllen = 0; - } - - do { - rc = sendmsg(conn_fd, &msg, 0); - } while (rc < 0 && (errno == EINTR || errno == EAGAIN)); - - if (vmsg->size) { - do { - if (vmsg->data) { - rc = write(conn_fd, vmsg->data, vmsg->size); - } else { - rc = write(conn_fd, p + VHOST_USER_HDR_SIZE, vmsg->size); - } - } while (rc < 0 && (errno == EINTR || errno == EAGAIN)); - } - - if (rc <= 0) { - vu_panic(dev, "Error while writing: %s", strerror(errno)); - return false; - } - - return true; -} - -static bool -vu_send_reply(VuDev *dev, int conn_fd, VhostUserMsg *vmsg) -{ - /* Set the version in the flags when sending the reply */ - vmsg->flags &= ~VHOST_USER_VERSION_MASK; - vmsg->flags |= VHOST_USER_VERSION; - vmsg->flags |= VHOST_USER_REPLY_MASK; - - return vu_message_write(dev, conn_fd, vmsg); -} - -static bool -vu_process_message_reply(VuDev *dev, const VhostUserMsg *vmsg) -{ - VhostUserMsg msg_reply; - - if ((vmsg->flags & VHOST_USER_NEED_REPLY_MASK) == 0) { - return true; - } - - if (!vu_message_read(dev, dev->slave_fd, &msg_reply)) { - return false; - } - - if (msg_reply.request != vmsg->request) { - DPRINT("Received unexpected msg type. Expected %d received %d", - vmsg->request, msg_reply.request); - return false; - } - - return msg_reply.payload.u64 == 0; -} - -/* Kick the log_call_fd if required. */ -static void -vu_log_kick(VuDev *dev) -{ - if (dev->log_call_fd != -1) { - DPRINT("Kicking the QEMU's log...\n"); - if (eventfd_write(dev->log_call_fd, 1) < 0) { - vu_panic(dev, "Error writing eventfd: %s", strerror(errno)); - } - } -} - -static void -vu_log_page(uint8_t *log_table, uint64_t page) -{ - DPRINT("Logged dirty guest page: %"PRId64"\n", page); - atomic_or(&log_table[page / 8], 1 << (page % 8)); -} - -static void -vu_log_write(VuDev *dev, uint64_t address, uint64_t length) -{ - uint64_t page; - - if (!(dev->features & (1ULL << VHOST_F_LOG_ALL)) || - !dev->log_table || !length) { - return; - } - - assert(dev->log_size > ((address + length - 1) / VHOST_LOG_PAGE / 8)); - - page = address / VHOST_LOG_PAGE; - while (page * VHOST_LOG_PAGE < address + length) { - vu_log_page(dev->log_table, page); - page += VHOST_LOG_PAGE; - } - - vu_log_kick(dev); -} - -static void -vu_kick_cb(VuDev *dev, int condition, void *data) -{ - int index = (intptr_t)data; - VuVirtq *vq = &dev->vq[index]; - int sock = vq->kick_fd; - eventfd_t kick_data; - ssize_t rc; - - rc = eventfd_read(sock, &kick_data); - if (rc == -1) { - vu_panic(dev, "kick eventfd_read(): %s", strerror(errno)); - dev->remove_watch(dev, dev->vq[index].kick_fd); - } else { - DPRINT("Got kick_data: %016"PRIx64" handler:%p idx:%d\n", - kick_data, vq->handler, index); - if (vq->handler) { - vq->handler(dev, index); - } - } -} - -static bool -vu_get_features_exec(VuDev *dev, VhostUserMsg *vmsg) -{ - vmsg->payload.u64 = - 1ULL << VHOST_F_LOG_ALL | - 1ULL << VHOST_USER_F_PROTOCOL_FEATURES; - - if (dev->iface->get_features) { - vmsg->payload.u64 |= dev->iface->get_features(dev); - } - - vmsg->size = sizeof(vmsg->payload.u64); - vmsg->fd_num = 0; - - DPRINT("Sending back to guest u64: 0x%016"PRIx64"\n", vmsg->payload.u64); - - return true; -} - -static void -vu_set_enable_all_rings(VuDev *dev, bool enabled) -{ - int i; - - for (i = 0; i < VHOST_MAX_NR_VIRTQUEUE; i++) { - dev->vq[i].enable = enabled; - } -} - -static bool -vu_set_features_exec(VuDev *dev, VhostUserMsg *vmsg) -{ - DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64); - - dev->features = vmsg->payload.u64; - - if (!(dev->features & VHOST_USER_F_PROTOCOL_FEATURES)) { - vu_set_enable_all_rings(dev, true); - } - - if (dev->iface->set_features) { - dev->iface->set_features(dev, dev->features); - } - - return false; -} - -static bool -vu_set_owner_exec(VuDev *dev, VhostUserMsg *vmsg) -{ - return false; -} - -static void -vu_close_log(VuDev *dev) -{ - if (dev->log_table) { - if (munmap(dev->log_table, dev->log_size) != 0) { - perror("close log munmap() error"); - } - - dev->log_table = NULL; - } - if (dev->log_call_fd != -1) { - close(dev->log_call_fd); - dev->log_call_fd = -1; - } -} - -static bool -vu_reset_device_exec(VuDev *dev, VhostUserMsg *vmsg) -{ - vu_set_enable_all_rings(dev, false); - - return false; -} - -static bool -vu_set_mem_table_exec_postcopy(VuDev *dev, VhostUserMsg *vmsg) -{ - int i; - VhostUserMemory *memory = &vmsg->payload.memory; - dev->nregions = memory->nregions; - - DPRINT("Nregions: %d\n", memory->nregions); - for (i = 0; i < dev->nregions; i++) { - void *mmap_addr; - VhostUserMemoryRegion *msg_region = &memory->regions[i]; - VuDevRegion *dev_region = &dev->regions[i]; - - DPRINT("Region %d\n", i); - DPRINT(" guest_phys_addr: 0x%016"PRIx64"\n", - msg_region->guest_phys_addr); - DPRINT(" memory_size: 0x%016"PRIx64"\n", - msg_region->memory_size); - DPRINT(" userspace_addr 0x%016"PRIx64"\n", - msg_region->userspace_addr); - DPRINT(" mmap_offset 0x%016"PRIx64"\n", - msg_region->mmap_offset); - - dev_region->gpa = msg_region->guest_phys_addr; - dev_region->size = msg_region->memory_size; - dev_region->qva = msg_region->userspace_addr; - dev_region->mmap_offset = msg_region->mmap_offset; - - /* We don't use offset argument of mmap() since the - * mapped address has to be page aligned, and we use huge - * pages. - * In postcopy we're using PROT_NONE here to catch anyone - * accessing it before we userfault - */ - mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset, - PROT_NONE, MAP_SHARED, - vmsg->fds[i], 0); - - if (mmap_addr == MAP_FAILED) { - vu_panic(dev, "region mmap error: %s", strerror(errno)); - } else { - dev_region->mmap_addr = (uint64_t)(uintptr_t)mmap_addr; - DPRINT(" mmap_addr: 0x%016"PRIx64"\n", - dev_region->mmap_addr); - } - - /* Return the address to QEMU so that it can translate the ufd - * fault addresses back. - */ - msg_region->userspace_addr = (uintptr_t)(mmap_addr + - dev_region->mmap_offset); - close(vmsg->fds[i]); - } - - /* Send the message back to qemu with the addresses filled in */ - vmsg->fd_num = 0; - if (!vu_send_reply(dev, dev->sock, vmsg)) { - vu_panic(dev, "failed to respond to set-mem-table for postcopy"); - return false; - } - - /* Wait for QEMU to confirm that it's registered the handler for the - * faults. - */ - if (!vu_message_read(dev, dev->sock, vmsg) || - vmsg->size != sizeof(vmsg->payload.u64) || - vmsg->payload.u64 != 0) { - vu_panic(dev, "failed to receive valid ack for postcopy set-mem-table"); - return false; - } - - /* OK, now we can go and register the memory and generate faults */ - for (i = 0; i < dev->nregions; i++) { - VuDevRegion *dev_region = &dev->regions[i]; - int ret; -#ifdef UFFDIO_REGISTER - /* We should already have an open ufd. Mark each memory - * range as ufd. - * Discard any mapping we have here; note I can't use MADV_REMOVE - * or fallocate to make the hole since I don't want to lose - * data that's already arrived in the shared process. - * TODO: How to do hugepage - */ - ret = madvise((void *)dev_region->mmap_addr, - dev_region->size + dev_region->mmap_offset, - MADV_DONTNEED); - if (ret) { - fprintf(stderr, - "%s: Failed to madvise(DONTNEED) region %d: %s\n", - __func__, i, strerror(errno)); - } - /* Turn off transparent hugepages so we dont get lose wakeups - * in neighbouring pages. - * TODO: Turn this backon later. - */ - ret = madvise((void *)dev_region->mmap_addr, - dev_region->size + dev_region->mmap_offset, - MADV_NOHUGEPAGE); - if (ret) { - /* Note: This can happen legally on kernels that are configured - * without madvise'able hugepages - */ - fprintf(stderr, - "%s: Failed to madvise(NOHUGEPAGE) region %d: %s\n", - __func__, i, strerror(errno)); - } - struct uffdio_register reg_struct; - reg_struct.range.start = (uintptr_t)dev_region->mmap_addr; - reg_struct.range.len = dev_region->size + dev_region->mmap_offset; - reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING; - - if (ioctl(dev->postcopy_ufd, UFFDIO_REGISTER, ®_struct)) { - vu_panic(dev, "%s: Failed to userfault region %d " - "@%p + size:%zx offset: %zx: (ufd=%d)%s\n", - __func__, i, - dev_region->mmap_addr, - dev_region->size, dev_region->mmap_offset, - dev->postcopy_ufd, strerror(errno)); - return false; - } - if (!(reg_struct.ioctls & ((__u64)1 << _UFFDIO_COPY))) { - vu_panic(dev, "%s Region (%d) doesn't support COPY", - __func__, i); - return false; - } - DPRINT("%s: region %d: Registered userfault for %llx + %llx\n", - __func__, i, reg_struct.range.start, reg_struct.range.len); - /* Now it's registered we can let the client at it */ - if (mprotect((void *)dev_region->mmap_addr, - dev_region->size + dev_region->mmap_offset, - PROT_READ | PROT_WRITE)) { - vu_panic(dev, "failed to mprotect region %d for postcopy (%s)", - i, strerror(errno)); - return false; - } - /* TODO: Stash 'zero' support flags somewhere */ -#endif - } - - return false; -} - -static bool -vu_set_mem_table_exec(VuDev *dev, VhostUserMsg *vmsg) -{ - int i; - VhostUserMemory *memory = &vmsg->payload.memory; - - for (i = 0; i < dev->nregions; i++) { - VuDevRegion *r = &dev->regions[i]; - void *m = (void *) (uintptr_t) r->mmap_addr; - - if (m) { - munmap(m, r->size + r->mmap_offset); - } - } - dev->nregions = memory->nregions; - - if (dev->postcopy_listening) { - return vu_set_mem_table_exec_postcopy(dev, vmsg); - } - - DPRINT("Nregions: %d\n", memory->nregions); - for (i = 0; i < dev->nregions; i++) { - void *mmap_addr; - VhostUserMemoryRegion *msg_region = &memory->regions[i]; - VuDevRegion *dev_region = &dev->regions[i]; - - DPRINT("Region %d\n", i); - DPRINT(" guest_phys_addr: 0x%016"PRIx64"\n", - msg_region->guest_phys_addr); - DPRINT(" memory_size: 0x%016"PRIx64"\n", - msg_region->memory_size); - DPRINT(" userspace_addr 0x%016"PRIx64"\n", - msg_region->userspace_addr); - DPRINT(" mmap_offset 0x%016"PRIx64"\n", - msg_region->mmap_offset); - - dev_region->gpa = msg_region->guest_phys_addr; - dev_region->size = msg_region->memory_size; - dev_region->qva = msg_region->userspace_addr; - dev_region->mmap_offset = msg_region->mmap_offset; - - /* We don't use offset argument of mmap() since the - * mapped address has to be page aligned, and we use huge - * pages. */ - mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset, - PROT_READ | PROT_WRITE, MAP_SHARED, - vmsg->fds[i], 0); - - if (mmap_addr == MAP_FAILED) { - vu_panic(dev, "region mmap error: %s", strerror(errno)); - } else { - dev_region->mmap_addr = (uint64_t)(uintptr_t)mmap_addr; - DPRINT(" mmap_addr: 0x%016"PRIx64"\n", - dev_region->mmap_addr); - } - - close(vmsg->fds[i]); - } - - return false; -} - -static bool -vu_set_log_base_exec(VuDev *dev, VhostUserMsg *vmsg) -{ - int fd; - uint64_t log_mmap_size, log_mmap_offset; - void *rc; - - if (vmsg->fd_num != 1 || - vmsg->size != sizeof(vmsg->payload.log)) { - vu_panic(dev, "Invalid log_base message"); - return true; - } - - fd = vmsg->fds[0]; - log_mmap_offset = vmsg->payload.log.mmap_offset; - log_mmap_size = vmsg->payload.log.mmap_size; - DPRINT("Log mmap_offset: %"PRId64"\n", log_mmap_offset); - DPRINT("Log mmap_size: %"PRId64"\n", log_mmap_size); - - rc = mmap(0, log_mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, - log_mmap_offset); - close(fd); - if (rc == MAP_FAILED) { - perror("log mmap error"); - } - - if (dev->log_table) { - munmap(dev->log_table, dev->log_size); - } - dev->log_table = rc; - dev->log_size = log_mmap_size; - - vmsg->size = sizeof(vmsg->payload.u64); - vmsg->fd_num = 0; - - return true; -} - -static bool -vu_set_log_fd_exec(VuDev *dev, VhostUserMsg *vmsg) -{ - if (vmsg->fd_num != 1) { - vu_panic(dev, "Invalid log_fd message"); - return false; - } - - if (dev->log_call_fd != -1) { - close(dev->log_call_fd); - } - dev->log_call_fd = vmsg->fds[0]; - DPRINT("Got log_call_fd: %d\n", vmsg->fds[0]); - - return false; -} - -static bool -vu_set_vring_num_exec(VuDev *dev, VhostUserMsg *vmsg) -{ - unsigned int index = vmsg->payload.state.index; - unsigned int num = vmsg->payload.state.num; - - DPRINT("State.index: %d\n", index); - DPRINT("State.num: %d\n", num); - dev->vq[index].vring.num = num; - - return false; -} - -static bool -vu_set_vring_addr_exec(VuDev *dev, VhostUserMsg *vmsg) -{ - struct vhost_vring_addr *vra = &vmsg->payload.addr; - unsigned int index = vra->index; - VuVirtq *vq = &dev->vq[index]; - - DPRINT("vhost_vring_addr:\n"); - DPRINT(" index: %d\n", vra->index); - DPRINT(" flags: %d\n", vra->flags); - DPRINT(" desc_user_addr: 0x%016llx\n", vra->desc_user_addr); - DPRINT(" used_user_addr: 0x%016llx\n", vra->used_user_addr); - DPRINT(" avail_user_addr: 0x%016llx\n", vra->avail_user_addr); - DPRINT(" log_guest_addr: 0x%016llx\n", vra->log_guest_addr); - - vq->vring.flags = vra->flags; - vq->vring.desc = qva_to_va(dev, vra->desc_user_addr); - vq->vring.used = qva_to_va(dev, vra->used_user_addr); - vq->vring.avail = qva_to_va(dev, vra->avail_user_addr); - vq->vring.log_guest_addr = vra->log_guest_addr; - - DPRINT("Setting virtq addresses:\n"); - DPRINT(" vring_desc at %p\n", vq->vring.desc); - DPRINT(" vring_used at %p\n", vq->vring.used); - DPRINT(" vring_avail at %p\n", vq->vring.avail); - - if (!(vq->vring.desc && vq->vring.used && vq->vring.avail)) { - vu_panic(dev, "Invalid vring_addr message"); - return false; - } - - vq->used_idx = vq->vring.used->idx; - - if (vq->last_avail_idx != vq->used_idx) { - bool resume = dev->iface->queue_is_processed_in_order && - dev->iface->queue_is_processed_in_order(dev, index); - - DPRINT("Last avail index != used index: %u != %u%s\n", - vq->last_avail_idx, vq->used_idx, - resume ? ", resuming" : ""); - - if (resume) { - vq->shadow_avail_idx = vq->last_avail_idx = vq->used_idx; - } - } - - return false; -} - -static bool -vu_set_vring_base_exec(VuDev *dev, VhostUserMsg *vmsg) -{ - unsigned int index = vmsg->payload.state.index; - unsigned int num = vmsg->payload.state.num; - - DPRINT("State.index: %d\n", index); - DPRINT("State.num: %d\n", num); - dev->vq[index].shadow_avail_idx = dev->vq[index].last_avail_idx = num; - - return false; -} - -static bool -vu_get_vring_base_exec(VuDev *dev, VhostUserMsg *vmsg) -{ - unsigned int index = vmsg->payload.state.index; - - DPRINT("State.index: %d\n", index); - vmsg->payload.state.num = dev->vq[index].last_avail_idx; - vmsg->size = sizeof(vmsg->payload.state); - - dev->vq[index].started = false; - if (dev->iface->queue_set_started) { - dev->iface->queue_set_started(dev, index, false); - } - - if (dev->vq[index].call_fd != -1) { - close(dev->vq[index].call_fd); - dev->vq[index].call_fd = -1; - } - if (dev->vq[index].kick_fd != -1) { - dev->remove_watch(dev, dev->vq[index].kick_fd); - close(dev->vq[index].kick_fd); - dev->vq[index].kick_fd = -1; - } - - return true; -} - -static bool -vu_check_queue_msg_file(VuDev *dev, VhostUserMsg *vmsg) -{ - int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK; - - if (index >= VHOST_MAX_NR_VIRTQUEUE) { - vmsg_close_fds(vmsg); - vu_panic(dev, "Invalid queue index: %u", index); - return false; - } - - if (vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK || - vmsg->fd_num != 1) { - vmsg_close_fds(vmsg); - vu_panic(dev, "Invalid fds in request: %d", vmsg->request); - return false; - } - - return true; -} - -static bool -vu_set_vring_kick_exec(VuDev *dev, VhostUserMsg *vmsg) -{ - int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK; - - DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64); - - if (!vu_check_queue_msg_file(dev, vmsg)) { - return false; - } - - if (dev->vq[index].kick_fd != -1) { - dev->remove_watch(dev, dev->vq[index].kick_fd); - close(dev->vq[index].kick_fd); - dev->vq[index].kick_fd = -1; - } - - if (!(vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)) { - dev->vq[index].kick_fd = vmsg->fds[0]; - DPRINT("Got kick_fd: %d for vq: %d\n", vmsg->fds[0], index); - } - - dev->vq[index].started = true; - if (dev->iface->queue_set_started) { - dev->iface->queue_set_started(dev, index, true); - } - - if (dev->vq[index].kick_fd != -1 && dev->vq[index].handler) { - dev->set_watch(dev, dev->vq[index].kick_fd, VU_WATCH_IN, - vu_kick_cb, (void *)(long)index); - - DPRINT("Waiting for kicks on fd: %d for vq: %d\n", - dev->vq[index].kick_fd, index); - } - - return false; -} - -void vu_set_queue_handler(VuDev *dev, VuVirtq *vq, - vu_queue_handler_cb handler) -{ - int qidx = vq - dev->vq; - - vq->handler = handler; - if (vq->kick_fd >= 0) { - if (handler) { - dev->set_watch(dev, vq->kick_fd, VU_WATCH_IN, - vu_kick_cb, (void *)(long)qidx); - } else { - dev->remove_watch(dev, vq->kick_fd); - } - } -} - -bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd, - int size, int offset) -{ - int qidx = vq - dev->vq; - int fd_num = 0; - VhostUserMsg vmsg = { - .request = VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG, - .flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, - .size = sizeof(vmsg.payload.area), - .payload.area = { - .u64 = qidx & VHOST_USER_VRING_IDX_MASK, - .size = size, - .offset = offset, - }, - }; - - if (fd == -1) { - vmsg.payload.area.u64 |= VHOST_USER_VRING_NOFD_MASK; - } else { - vmsg.fds[fd_num++] = fd; - } - - vmsg.fd_num = fd_num; - - if ((dev->protocol_features & VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD) == 0) { - return false; - } - - if (!vu_message_write(dev, dev->slave_fd, &vmsg)) { - return false; - } - - return vu_process_message_reply(dev, &vmsg); -} - -static bool -vu_set_vring_call_exec(VuDev *dev, VhostUserMsg *vmsg) -{ - int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK; - - DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64); - - if (!vu_check_queue_msg_file(dev, vmsg)) { - return false; - } - - if (dev->vq[index].call_fd != -1) { - close(dev->vq[index].call_fd); - dev->vq[index].call_fd = -1; - } - - if (!(vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)) { - dev->vq[index].call_fd = vmsg->fds[0]; - } - - DPRINT("Got call_fd: %d for vq: %d\n", vmsg->fds[0], index); - - return false; -} - -static bool -vu_set_vring_err_exec(VuDev *dev, VhostUserMsg *vmsg) -{ - int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK; - - DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64); - - if (!vu_check_queue_msg_file(dev, vmsg)) { - return false; - } - - if (dev->vq[index].err_fd != -1) { - close(dev->vq[index].err_fd); - dev->vq[index].err_fd = -1; - } - - if (!(vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)) { - dev->vq[index].err_fd = vmsg->fds[0]; - } - - return false; -} - -static bool -vu_get_protocol_features_exec(VuDev *dev, VhostUserMsg *vmsg) -{ - uint64_t features = 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD | - 1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ | - 1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER | - 1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD; - - if (have_userfault()) { - features |= 1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT; - } - - if (dev->iface->get_protocol_features) { - features |= dev->iface->get_protocol_features(dev); - } - - vmsg->payload.u64 = features; - vmsg->size = sizeof(vmsg->payload.u64); - vmsg->fd_num = 0; - - return true; -} - -static bool -vu_set_protocol_features_exec(VuDev *dev, VhostUserMsg *vmsg) -{ - uint64_t features = vmsg->payload.u64; - - DPRINT("u64: 0x%016"PRIx64"\n", features); - - dev->protocol_features = vmsg->payload.u64; - - if (dev->iface->set_protocol_features) { - dev->iface->set_protocol_features(dev, features); - } - - return false; -} - -static bool -vu_get_queue_num_exec(VuDev *dev, VhostUserMsg *vmsg) -{ - DPRINT("Function %s() not implemented yet.\n", __func__); - return false; -} - -static bool -vu_set_vring_enable_exec(VuDev *dev, VhostUserMsg *vmsg) -{ - unsigned int index = vmsg->payload.state.index; - unsigned int enable = vmsg->payload.state.num; - - DPRINT("State.index: %d\n", index); - DPRINT("State.enable: %d\n", enable); - - if (index >= VHOST_MAX_NR_VIRTQUEUE) { - vu_panic(dev, "Invalid vring_enable index: %u", index); - return false; - } - - dev->vq[index].enable = enable; - return false; -} - -static bool -vu_set_slave_req_fd(VuDev *dev, VhostUserMsg *vmsg) -{ - if (vmsg->fd_num != 1) { - vu_panic(dev, "Invalid slave_req_fd message (%d fd's)", vmsg->fd_num); - return false; - } - - if (dev->slave_fd != -1) { - close(dev->slave_fd); - } - dev->slave_fd = vmsg->fds[0]; - DPRINT("Got slave_fd: %d\n", vmsg->fds[0]); - - return false; -} - -static bool -vu_get_config(VuDev *dev, VhostUserMsg *vmsg) -{ - int ret = -1; - - if (dev->iface->get_config) { - ret = dev->iface->get_config(dev, vmsg->payload.config.region, - vmsg->payload.config.size); - } - - if (ret) { - /* resize to zero to indicate an error to master */ - vmsg->size = 0; - } - - return true; -} - -static bool -vu_set_config(VuDev *dev, VhostUserMsg *vmsg) -{ - int ret = -1; - - if (dev->iface->set_config) { - ret = dev->iface->set_config(dev, vmsg->payload.config.region, - vmsg->payload.config.offset, - vmsg->payload.config.size, - vmsg->payload.config.flags); - if (ret) { - vu_panic(dev, "Set virtio configuration space failed"); - } - } - - return false; -} - -static bool -vu_set_postcopy_advise(VuDev *dev, VhostUserMsg *vmsg) -{ - dev->postcopy_ufd = -1; -#ifdef UFFDIO_API - struct uffdio_api api_struct; - - dev->postcopy_ufd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); - vmsg->size = 0; -#endif - - if (dev->postcopy_ufd == -1) { - vu_panic(dev, "Userfaultfd not available: %s", strerror(errno)); - goto out; - } - -#ifdef UFFDIO_API - api_struct.api = UFFD_API; - api_struct.features = 0; - if (ioctl(dev->postcopy_ufd, UFFDIO_API, &api_struct)) { - vu_panic(dev, "Failed UFFDIO_API: %s", strerror(errno)); - close(dev->postcopy_ufd); - dev->postcopy_ufd = -1; - goto out; - } - /* TODO: Stash feature flags somewhere */ -#endif - -out: - /* Return a ufd to the QEMU */ - vmsg->fd_num = 1; - vmsg->fds[0] = dev->postcopy_ufd; - return true; /* = send a reply */ -} - -static bool -vu_set_postcopy_listen(VuDev *dev, VhostUserMsg *vmsg) -{ - vmsg->payload.u64 = -1; - vmsg->size = sizeof(vmsg->payload.u64); - - if (dev->nregions) { - vu_panic(dev, "Regions already registered at postcopy-listen"); - return true; - } - dev->postcopy_listening = true; - - vmsg->flags = VHOST_USER_VERSION | VHOST_USER_REPLY_MASK; - vmsg->payload.u64 = 0; /* Success */ - return true; -} - -static bool -vu_set_postcopy_end(VuDev *dev, VhostUserMsg *vmsg) -{ - DPRINT("%s: Entry\n", __func__); - dev->postcopy_listening = false; - if (dev->postcopy_ufd > 0) { - close(dev->postcopy_ufd); - dev->postcopy_ufd = -1; - DPRINT("%s: Done close\n", __func__); - } - - vmsg->fd_num = 0; - vmsg->payload.u64 = 0; - vmsg->size = sizeof(vmsg->payload.u64); - vmsg->flags = VHOST_USER_VERSION | VHOST_USER_REPLY_MASK; - DPRINT("%s: exit\n", __func__); - return true; -} - -static bool -vu_process_message(VuDev *dev, VhostUserMsg *vmsg) -{ - int do_reply = 0; - - /* Print out generic part of the request. */ - DPRINT("================ Vhost user message ================\n"); - DPRINT("Request: %s (%d)\n", vu_request_to_string(vmsg->request), - vmsg->request); - DPRINT("Flags: 0x%x\n", vmsg->flags); - DPRINT("Size: %d\n", vmsg->size); - - if (vmsg->fd_num) { - int i; - DPRINT("Fds:"); - for (i = 0; i < vmsg->fd_num; i++) { - DPRINT(" %d", vmsg->fds[i]); - } - DPRINT("\n"); - } - - if (dev->iface->process_msg && - dev->iface->process_msg(dev, vmsg, &do_reply)) { - return do_reply; - } - - switch (vmsg->request) { - case VHOST_USER_GET_FEATURES: - return vu_get_features_exec(dev, vmsg); - case VHOST_USER_SET_FEATURES: - return vu_set_features_exec(dev, vmsg); - case VHOST_USER_GET_PROTOCOL_FEATURES: - return vu_get_protocol_features_exec(dev, vmsg); - case VHOST_USER_SET_PROTOCOL_FEATURES: - return vu_set_protocol_features_exec(dev, vmsg); - case VHOST_USER_SET_OWNER: - return vu_set_owner_exec(dev, vmsg); - case VHOST_USER_RESET_OWNER: - return vu_reset_device_exec(dev, vmsg); - case VHOST_USER_SET_MEM_TABLE: - return vu_set_mem_table_exec(dev, vmsg); - case VHOST_USER_SET_LOG_BASE: - return vu_set_log_base_exec(dev, vmsg); - case VHOST_USER_SET_LOG_FD: - return vu_set_log_fd_exec(dev, vmsg); - case VHOST_USER_SET_VRING_NUM: - return vu_set_vring_num_exec(dev, vmsg); - case VHOST_USER_SET_VRING_ADDR: - return vu_set_vring_addr_exec(dev, vmsg); - case VHOST_USER_SET_VRING_BASE: - return vu_set_vring_base_exec(dev, vmsg); - case VHOST_USER_GET_VRING_BASE: - return vu_get_vring_base_exec(dev, vmsg); - case VHOST_USER_SET_VRING_KICK: - return vu_set_vring_kick_exec(dev, vmsg); - case VHOST_USER_SET_VRING_CALL: - return vu_set_vring_call_exec(dev, vmsg); - case VHOST_USER_SET_VRING_ERR: - return vu_set_vring_err_exec(dev, vmsg); - case VHOST_USER_GET_QUEUE_NUM: - return vu_get_queue_num_exec(dev, vmsg); - case VHOST_USER_SET_VRING_ENABLE: - return vu_set_vring_enable_exec(dev, vmsg); - case VHOST_USER_SET_SLAVE_REQ_FD: - return vu_set_slave_req_fd(dev, vmsg); - case VHOST_USER_GET_CONFIG: - return vu_get_config(dev, vmsg); - case VHOST_USER_SET_CONFIG: - return vu_set_config(dev, vmsg); - case VHOST_USER_NONE: - break; - case VHOST_USER_POSTCOPY_ADVISE: - return vu_set_postcopy_advise(dev, vmsg); - case VHOST_USER_POSTCOPY_LISTEN: - return vu_set_postcopy_listen(dev, vmsg); - case VHOST_USER_POSTCOPY_END: - return vu_set_postcopy_end(dev, vmsg); - default: - vmsg_close_fds(vmsg); - vu_panic(dev, "Unhandled request: %d", vmsg->request); - } - - return false; -} - -bool -vu_dispatch(VuDev *dev) -{ - VhostUserMsg vmsg = { 0, }; - int reply_requested; - bool success = false; - - if (!vu_message_read(dev, dev->sock, &vmsg)) { - goto end; - } - - reply_requested = vu_process_message(dev, &vmsg); - if (!reply_requested) { - success = true; - goto end; - } - - if (!vu_send_reply(dev, dev->sock, &vmsg)) { - goto end; - } - - success = true; - -end: - free(vmsg.data); - return success; -} - -void -vu_deinit(VuDev *dev) -{ - int i; - - for (i = 0; i < dev->nregions; i++) { - VuDevRegion *r = &dev->regions[i]; - void *m = (void *) (uintptr_t) r->mmap_addr; - if (m != MAP_FAILED) { - munmap(m, r->size + r->mmap_offset); - } - } - dev->nregions = 0; - - for (i = 0; i < VHOST_MAX_NR_VIRTQUEUE; i++) { - VuVirtq *vq = &dev->vq[i]; - - if (vq->call_fd != -1) { - close(vq->call_fd); - vq->call_fd = -1; - } - - if (vq->kick_fd != -1) { - close(vq->kick_fd); - vq->kick_fd = -1; - } - - if (vq->err_fd != -1) { - close(vq->err_fd); - vq->err_fd = -1; - } - } - - - vu_close_log(dev); - if (dev->slave_fd != -1) { - close(dev->slave_fd); - dev->slave_fd = -1; - } - - if (dev->sock != -1) { - close(dev->sock); - } -} - -void -vu_init(VuDev *dev, - int socket, - vu_panic_cb panic, - vu_set_watch_cb set_watch, - vu_remove_watch_cb remove_watch, - const VuDevIface *iface) -{ - int i; - - assert(socket >= 0); - assert(set_watch); - assert(remove_watch); - assert(iface); - assert(panic); - - memset(dev, 0, sizeof(*dev)); - - dev->sock = socket; - dev->panic = panic; - dev->set_watch = set_watch; - dev->remove_watch = remove_watch; - dev->iface = iface; - dev->log_call_fd = -1; - dev->slave_fd = -1; - for (i = 0; i < VHOST_MAX_NR_VIRTQUEUE; i++) { - dev->vq[i] = (VuVirtq) { - .call_fd = -1, .kick_fd = -1, .err_fd = -1, - .notification = true, - }; - } -} - -VuVirtq * -vu_get_queue(VuDev *dev, int qidx) -{ - assert(qidx < VHOST_MAX_NR_VIRTQUEUE); - return &dev->vq[qidx]; -} - -bool -vu_queue_enabled(VuDev *dev, VuVirtq *vq) -{ - return vq->enable; -} - -bool -vu_queue_started(const VuDev *dev, const VuVirtq *vq) -{ - return vq->started; -} - -static inline uint16_t -vring_avail_flags(VuVirtq *vq) -{ - return vq->vring.avail->flags; -} - -static inline uint16_t -vring_avail_idx(VuVirtq *vq) -{ - vq->shadow_avail_idx = vq->vring.avail->idx; - - return vq->shadow_avail_idx; -} - -static inline uint16_t -vring_avail_ring(VuVirtq *vq, int i) -{ - return vq->vring.avail->ring[i]; -} - -static inline uint16_t -vring_get_used_event(VuVirtq *vq) -{ - return vring_avail_ring(vq, vq->vring.num); -} - -static int -virtqueue_num_heads(VuDev *dev, VuVirtq *vq, unsigned int idx) -{ - uint16_t num_heads = vring_avail_idx(vq) - idx; - - /* Check it isn't doing very strange things with descriptor numbers. */ - if (num_heads > vq->vring.num) { - vu_panic(dev, "Guest moved used index from %u to %u", - idx, vq->shadow_avail_idx); - return -1; - } - if (num_heads) { - /* On success, callers read a descriptor at vq->last_avail_idx. - * Make sure descriptor read does not bypass avail index read. */ - smp_rmb(); - } - - return num_heads; -} - -static bool -virtqueue_get_head(VuDev *dev, VuVirtq *vq, - unsigned int idx, unsigned int *head) -{ - /* Grab the next descriptor number they're advertising, and increment - * the index we've seen. */ - *head = vring_avail_ring(vq, idx % vq->vring.num); - - /* If their number is silly, that's a fatal mistake. */ - if (*head >= vq->vring.num) { - vu_panic(dev, "Guest says index %u is available", head); - return false; - } - - return true; -} - -static int -virtqueue_read_indirect_desc(VuDev *dev, struct vring_desc *desc, - uint64_t addr, size_t len) -{ - struct vring_desc *ori_desc; - uint64_t read_len; - - if (len > (VIRTQUEUE_MAX_SIZE * sizeof(struct vring_desc))) { - return -1; - } - - if (len == 0) { - return -1; - } - - while (len) { - read_len = len; - ori_desc = vu_gpa_to_va(dev, &read_len, addr); - if (!ori_desc) { - return -1; - } - - memcpy(desc, ori_desc, read_len); - len -= read_len; - addr += read_len; - desc += read_len; - } - - return 0; -} - -enum { - VIRTQUEUE_READ_DESC_ERROR = -1, - VIRTQUEUE_READ_DESC_DONE = 0, /* end of chain */ - VIRTQUEUE_READ_DESC_MORE = 1, /* more buffers in chain */ -}; - -static int -virtqueue_read_next_desc(VuDev *dev, struct vring_desc *desc, - int i, unsigned int max, unsigned int *next) -{ - /* If this descriptor says it doesn't chain, we're done. */ - if (!(desc[i].flags & VRING_DESC_F_NEXT)) { - return VIRTQUEUE_READ_DESC_DONE; - } - - /* Check they're not leading us off end of descriptors. */ - *next = desc[i].next; - /* Make sure compiler knows to grab that: we don't want it changing! */ - smp_wmb(); - - if (*next >= max) { - vu_panic(dev, "Desc next is %u", next); - return VIRTQUEUE_READ_DESC_ERROR; - } - - return VIRTQUEUE_READ_DESC_MORE; -} - -void -vu_queue_get_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int *in_bytes, - unsigned int *out_bytes, - unsigned max_in_bytes, unsigned max_out_bytes) -{ - unsigned int idx; - unsigned int total_bufs, in_total, out_total; - int rc; - - idx = vq->last_avail_idx; - - total_bufs = in_total = out_total = 0; - if (unlikely(dev->broken) || - unlikely(!vq->vring.avail)) { - goto done; - } - - while ((rc = virtqueue_num_heads(dev, vq, idx)) > 0) { - unsigned int max, desc_len, num_bufs, indirect = 0; - uint64_t desc_addr, read_len; - struct vring_desc *desc; - struct vring_desc desc_buf[VIRTQUEUE_MAX_SIZE]; - unsigned int i; - - max = vq->vring.num; - num_bufs = total_bufs; - if (!virtqueue_get_head(dev, vq, idx++, &i)) { - goto err; - } - desc = vq->vring.desc; - - if (desc[i].flags & VRING_DESC_F_INDIRECT) { - if (desc[i].len % sizeof(struct vring_desc)) { - vu_panic(dev, "Invalid size for indirect buffer table"); - goto err; - } - - /* If we've got too many, that implies a descriptor loop. */ - if (num_bufs >= max) { - vu_panic(dev, "Looped descriptor"); - goto err; - } - - /* loop over the indirect descriptor table */ - indirect = 1; - desc_addr = desc[i].addr; - desc_len = desc[i].len; - max = desc_len / sizeof(struct vring_desc); - read_len = desc_len; - desc = vu_gpa_to_va(dev, &read_len, desc_addr); - if (unlikely(desc && read_len != desc_len)) { - /* Failed to use zero copy */ - desc = NULL; - if (!virtqueue_read_indirect_desc(dev, desc_buf, - desc_addr, - desc_len)) { - desc = desc_buf; - } - } - if (!desc) { - vu_panic(dev, "Invalid indirect buffer table"); - goto err; - } - num_bufs = i = 0; - } - - do { - /* If we've got too many, that implies a descriptor loop. */ - if (++num_bufs > max) { - vu_panic(dev, "Looped descriptor"); - goto err; - } - - if (desc[i].flags & VRING_DESC_F_WRITE) { - in_total += desc[i].len; - } else { - out_total += desc[i].len; - } - if (in_total >= max_in_bytes && out_total >= max_out_bytes) { - goto done; - } - rc = virtqueue_read_next_desc(dev, desc, i, max, &i); - } while (rc == VIRTQUEUE_READ_DESC_MORE); - - if (rc == VIRTQUEUE_READ_DESC_ERROR) { - goto err; - } - - if (!indirect) { - total_bufs = num_bufs; - } else { - total_bufs++; - } - } - if (rc < 0) { - goto err; - } -done: - if (in_bytes) { - *in_bytes = in_total; - } - if (out_bytes) { - *out_bytes = out_total; - } - return; - -err: - in_total = out_total = 0; - goto done; -} - -bool -vu_queue_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int in_bytes, - unsigned int out_bytes) -{ - unsigned int in_total, out_total; - - vu_queue_get_avail_bytes(dev, vq, &in_total, &out_total, - in_bytes, out_bytes); - - return in_bytes <= in_total && out_bytes <= out_total; -} - -/* Fetch avail_idx from VQ memory only when we really need to know if - * guest has added some buffers. */ -bool -vu_queue_empty(VuDev *dev, VuVirtq *vq) -{ - if (unlikely(dev->broken) || - unlikely(!vq->vring.avail)) { - return true; - } - - if (vq->shadow_avail_idx != vq->last_avail_idx) { - return false; - } - - return vring_avail_idx(vq) == vq->last_avail_idx; -} - -static inline -bool has_feature(uint64_t features, unsigned int fbit) -{ - assert(fbit < 64); - return !!(features & (1ULL << fbit)); -} - -static inline -bool vu_has_feature(VuDev *dev, - unsigned int fbit) -{ - return has_feature(dev->features, fbit); -} - -static bool -vring_notify(VuDev *dev, VuVirtq *vq) -{ - uint16_t old, new; - bool v; - - /* We need to expose used array entries before checking used event. */ - smp_mb(); - - /* Always notify when queue is empty (when feature acknowledge) */ - if (vu_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY) && - !vq->inuse && vu_queue_empty(dev, vq)) { - return true; - } - - if (!vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) { - return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT); - } - - v = vq->signalled_used_valid; - vq->signalled_used_valid = true; - old = vq->signalled_used; - new = vq->signalled_used = vq->used_idx; - return !v || vring_need_event(vring_get_used_event(vq), new, old); -} - -void -vu_queue_notify(VuDev *dev, VuVirtq *vq) -{ - if (unlikely(dev->broken) || - unlikely(!vq->vring.avail)) { - return; - } - - if (!vring_notify(dev, vq)) { - DPRINT("skipped notify...\n"); - return; - } - - if (eventfd_write(vq->call_fd, 1) < 0) { - vu_panic(dev, "Error writing eventfd: %s", strerror(errno)); - } -} - -static inline void -vring_used_flags_set_bit(VuVirtq *vq, int mask) -{ - uint16_t *flags; - - flags = (uint16_t *)((char*)vq->vring.used + - offsetof(struct vring_used, flags)); - *flags |= mask; -} - -static inline void -vring_used_flags_unset_bit(VuVirtq *vq, int mask) -{ - uint16_t *flags; - - flags = (uint16_t *)((char*)vq->vring.used + - offsetof(struct vring_used, flags)); - *flags &= ~mask; -} - -static inline void -vring_set_avail_event(VuVirtq *vq, uint16_t val) -{ - if (!vq->notification) { - return; - } - - *((uint16_t *) &vq->vring.used->ring[vq->vring.num]) = val; -} - -void -vu_queue_set_notification(VuDev *dev, VuVirtq *vq, int enable) -{ - vq->notification = enable; - if (vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) { - vring_set_avail_event(vq, vring_avail_idx(vq)); - } else if (enable) { - vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY); - } else { - vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY); - } - if (enable) { - /* Expose avail event/used flags before caller checks the avail idx. */ - smp_mb(); - } -} - -static void -virtqueue_map_desc(VuDev *dev, - unsigned int *p_num_sg, struct iovec *iov, - unsigned int max_num_sg, bool is_write, - uint64_t pa, size_t sz) -{ - unsigned num_sg = *p_num_sg; - - assert(num_sg <= max_num_sg); - - if (!sz) { - vu_panic(dev, "virtio: zero sized buffers are not allowed"); - return; - } - - while (sz) { - uint64_t len = sz; - - if (num_sg == max_num_sg) { - vu_panic(dev, "virtio: too many descriptors in indirect table"); - return; - } - - iov[num_sg].iov_base = vu_gpa_to_va(dev, &len, pa); - if (iov[num_sg].iov_base == NULL) { - vu_panic(dev, "virtio: invalid address for buffers"); - return; - } - iov[num_sg].iov_len = len; - num_sg++; - sz -= len; - pa += len; - } - - *p_num_sg = num_sg; -} - -/* Round number down to multiple */ -#define ALIGN_DOWN(n, m) ((n) / (m) * (m)) - -/* Round number up to multiple */ -#define ALIGN_UP(n, m) ALIGN_DOWN((n) + (m) - 1, (m)) - -static void * -virtqueue_alloc_element(size_t sz, - unsigned out_num, unsigned in_num) -{ - VuVirtqElement *elem; - size_t in_sg_ofs = ALIGN_UP(sz, __alignof__(elem->in_sg[0])); - size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]); - size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]); - - assert(sz >= sizeof(VuVirtqElement)); - elem = malloc(out_sg_end); - elem->out_num = out_num; - elem->in_num = in_num; - elem->in_sg = (void *)elem + in_sg_ofs; - elem->out_sg = (void *)elem + out_sg_ofs; - return elem; -} - -void * -vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz) -{ - unsigned int i, head, max, desc_len; - uint64_t desc_addr, read_len; - VuVirtqElement *elem; - unsigned out_num, in_num; - struct iovec iov[VIRTQUEUE_MAX_SIZE]; - struct vring_desc desc_buf[VIRTQUEUE_MAX_SIZE]; - struct vring_desc *desc; - int rc; - - if (unlikely(dev->broken) || - unlikely(!vq->vring.avail)) { - return NULL; - } - - if (vu_queue_empty(dev, vq)) { - return NULL; - } - /* Needed after virtio_queue_empty(), see comment in - * virtqueue_num_heads(). */ - smp_rmb(); - - /* When we start there are none of either input nor output. */ - out_num = in_num = 0; - - max = vq->vring.num; - if (vq->inuse >= vq->vring.num) { - vu_panic(dev, "Virtqueue size exceeded"); - return NULL; - } - - if (!virtqueue_get_head(dev, vq, vq->last_avail_idx++, &head)) { - return NULL; - } - - if (vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) { - vring_set_avail_event(vq, vq->last_avail_idx); - } - - i = head; - desc = vq->vring.desc; - if (desc[i].flags & VRING_DESC_F_INDIRECT) { - if (desc[i].len % sizeof(struct vring_desc)) { - vu_panic(dev, "Invalid size for indirect buffer table"); - } - - /* loop over the indirect descriptor table */ - desc_addr = desc[i].addr; - desc_len = desc[i].len; - max = desc_len / sizeof(struct vring_desc); - read_len = desc_len; - desc = vu_gpa_to_va(dev, &read_len, desc_addr); - if (unlikely(desc && read_len != desc_len)) { - /* Failed to use zero copy */ - desc = NULL; - if (!virtqueue_read_indirect_desc(dev, desc_buf, - desc_addr, - desc_len)) { - desc = desc_buf; - } - } - if (!desc) { - vu_panic(dev, "Invalid indirect buffer table"); - return NULL; - } - i = 0; - } - - /* Collect all the descriptors */ - do { - if (desc[i].flags & VRING_DESC_F_WRITE) { - virtqueue_map_desc(dev, &in_num, iov + out_num, - VIRTQUEUE_MAX_SIZE - out_num, true, - desc[i].addr, desc[i].len); - } else { - if (in_num) { - vu_panic(dev, "Incorrect order for descriptors"); - return NULL; - } - virtqueue_map_desc(dev, &out_num, iov, - VIRTQUEUE_MAX_SIZE, false, - desc[i].addr, desc[i].len); - } - - /* If we've got too many, that implies a descriptor loop. */ - if ((in_num + out_num) > max) { - vu_panic(dev, "Looped descriptor"); - } - rc = virtqueue_read_next_desc(dev, desc, i, max, &i); - } while (rc == VIRTQUEUE_READ_DESC_MORE); - - if (rc == VIRTQUEUE_READ_DESC_ERROR) { - return NULL; - } - - /* Now copy what we have collected and mapped */ - elem = virtqueue_alloc_element(sz, out_num, in_num); - elem->index = head; - for (i = 0; i < out_num; i++) { - elem->out_sg[i] = iov[i]; - } - for (i = 0; i < in_num; i++) { - elem->in_sg[i] = iov[out_num + i]; - } - - vq->inuse++; - - return elem; -} - -bool -vu_queue_rewind(VuDev *dev, VuVirtq *vq, unsigned int num) -{ - if (num > vq->inuse) { - return false; - } - vq->last_avail_idx -= num; - vq->inuse -= num; - return true; -} - -static inline -void vring_used_write(VuDev *dev, VuVirtq *vq, - struct vring_used_elem *uelem, int i) -{ - struct vring_used *used = vq->vring.used; - - used->ring[i] = *uelem; - vu_log_write(dev, vq->vring.log_guest_addr + - offsetof(struct vring_used, ring[i]), - sizeof(used->ring[i])); -} - - -static void -vu_log_queue_fill(VuDev *dev, VuVirtq *vq, - const VuVirtqElement *elem, - unsigned int len) -{ - struct vring_desc *desc = vq->vring.desc; - unsigned int i, max, min, desc_len; - uint64_t desc_addr, read_len; - struct vring_desc desc_buf[VIRTQUEUE_MAX_SIZE]; - unsigned num_bufs = 0; - - max = vq->vring.num; - i = elem->index; - - if (desc[i].flags & VRING_DESC_F_INDIRECT) { - if (desc[i].len % sizeof(struct vring_desc)) { - vu_panic(dev, "Invalid size for indirect buffer table"); - } - - /* loop over the indirect descriptor table */ - desc_addr = desc[i].addr; - desc_len = desc[i].len; - max = desc_len / sizeof(struct vring_desc); - read_len = desc_len; - desc = vu_gpa_to_va(dev, &read_len, desc_addr); - if (unlikely(desc && read_len != desc_len)) { - /* Failed to use zero copy */ - desc = NULL; - if (!virtqueue_read_indirect_desc(dev, desc_buf, - desc_addr, - desc_len)) { - desc = desc_buf; - } - } - if (!desc) { - vu_panic(dev, "Invalid indirect buffer table"); - return; - } - i = 0; - } - - do { - if (++num_bufs > max) { - vu_panic(dev, "Looped descriptor"); - return; - } - - if (desc[i].flags & VRING_DESC_F_WRITE) { - min = MIN(desc[i].len, len); - vu_log_write(dev, desc[i].addr, min); - len -= min; - } - - } while (len > 0 && - (virtqueue_read_next_desc(dev, desc, i, max, &i) - == VIRTQUEUE_READ_DESC_MORE)); -} - -void -vu_queue_fill(VuDev *dev, VuVirtq *vq, - const VuVirtqElement *elem, - unsigned int len, unsigned int idx) -{ - struct vring_used_elem uelem; - - if (unlikely(dev->broken) || - unlikely(!vq->vring.avail)) { - return; - } - - vu_log_queue_fill(dev, vq, elem, len); - - idx = (idx + vq->used_idx) % vq->vring.num; - - uelem.id = elem->index; - uelem.len = len; - vring_used_write(dev, vq, &uelem, idx); -} - -static inline -void vring_used_idx_set(VuDev *dev, VuVirtq *vq, uint16_t val) -{ - vq->vring.used->idx = val; - vu_log_write(dev, - vq->vring.log_guest_addr + offsetof(struct vring_used, idx), - sizeof(vq->vring.used->idx)); - - vq->used_idx = val; -} - -void -vu_queue_flush(VuDev *dev, VuVirtq *vq, unsigned int count) -{ - uint16_t old, new; - - if (unlikely(dev->broken) || - unlikely(!vq->vring.avail)) { - return; - } - - /* Make sure buffer is written before we update index. */ - smp_wmb(); - - old = vq->used_idx; - new = old + count; - vring_used_idx_set(dev, vq, new); - vq->inuse -= count; - if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old))) { - vq->signalled_used_valid = false; - } -} - -void -vu_queue_push(VuDev *dev, VuVirtq *vq, - const VuVirtqElement *elem, unsigned int len) -{ - vu_queue_fill(dev, vq, elem, len, 0); - vu_queue_flush(dev, vq, 1); -} diff --git a/contrib/libvhost-user/libvhost-user.h b/contrib/libvhost-user/libvhost-user.h deleted file mode 100644 index 4aa55b4d2d..0000000000 --- a/contrib/libvhost-user/libvhost-user.h +++ /dev/null @@ -1,539 +0,0 @@ -/* - * Vhost User library - * - * Copyright (c) 2016 Red Hat, Inc. - * - * Authors: - * Victor Kaplansky <victork@redhat.com> - * Marc-AndrĂ© Lureau <mlureau@redhat.com> - * - * This work is licensed under the terms of the GNU GPL, version 2 or - * later. See the COPYING file in the top-level directory. - */ - -#ifndef LIBVHOST_USER_H -#define LIBVHOST_USER_H - -#include <stdint.h> -#include <stdbool.h> -#include <stddef.h> -#include <sys/poll.h> -#include <linux/vhost.h> -#include "standard-headers/linux/virtio_ring.h" - -/* Based on qemu/hw/virtio/vhost-user.c */ -#define VHOST_USER_F_PROTOCOL_FEATURES 30 -#define VHOST_LOG_PAGE 4096 - -#define VHOST_MAX_NR_VIRTQUEUE 8 -#define VIRTQUEUE_MAX_SIZE 1024 - -#define VHOST_MEMORY_MAX_NREGIONS 8 - -typedef enum VhostSetConfigType { - VHOST_SET_CONFIG_TYPE_MASTER = 0, - VHOST_SET_CONFIG_TYPE_MIGRATION = 1, -} VhostSetConfigType; - -/* - * Maximum size of virtio device config space - */ -#define VHOST_USER_MAX_CONFIG_SIZE 256 - -enum VhostUserProtocolFeature { - VHOST_USER_PROTOCOL_F_MQ = 0, - VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, - VHOST_USER_PROTOCOL_F_RARP = 2, - VHOST_USER_PROTOCOL_F_REPLY_ACK = 3, - VHOST_USER_PROTOCOL_F_NET_MTU = 4, - VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5, - VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6, - VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7, - VHOST_USER_PROTOCOL_F_PAGEFAULT = 8, - VHOST_USER_PROTOCOL_F_CONFIG = 9, - VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10, - VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11, - - VHOST_USER_PROTOCOL_F_MAX -}; - -#define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1) - -typedef enum VhostUserRequest { - VHOST_USER_NONE = 0, - VHOST_USER_GET_FEATURES = 1, - VHOST_USER_SET_FEATURES = 2, - VHOST_USER_SET_OWNER = 3, - VHOST_USER_RESET_OWNER = 4, - VHOST_USER_SET_MEM_TABLE = 5, - VHOST_USER_SET_LOG_BASE = 6, - VHOST_USER_SET_LOG_FD = 7, - VHOST_USER_SET_VRING_NUM = 8, - VHOST_USER_SET_VRING_ADDR = 9, - VHOST_USER_SET_VRING_BASE = 10, - VHOST_USER_GET_VRING_BASE = 11, - VHOST_USER_SET_VRING_KICK = 12, - VHOST_USER_SET_VRING_CALL = 13, - VHOST_USER_SET_VRING_ERR = 14, - VHOST_USER_GET_PROTOCOL_FEATURES = 15, - VHOST_USER_SET_PROTOCOL_FEATURES = 16, - VHOST_USER_GET_QUEUE_NUM = 17, - VHOST_USER_SET_VRING_ENABLE = 18, - VHOST_USER_SEND_RARP = 19, - VHOST_USER_NET_SET_MTU = 20, - VHOST_USER_SET_SLAVE_REQ_FD = 21, - VHOST_USER_IOTLB_MSG = 22, - VHOST_USER_SET_VRING_ENDIAN = 23, - VHOST_USER_GET_CONFIG = 24, - VHOST_USER_SET_CONFIG = 25, - VHOST_USER_CREATE_CRYPTO_SESSION = 26, - VHOST_USER_CLOSE_CRYPTO_SESSION = 27, - VHOST_USER_POSTCOPY_ADVISE = 28, - VHOST_USER_POSTCOPY_LISTEN = 29, - VHOST_USER_POSTCOPY_END = 30, - VHOST_USER_MAX -} VhostUserRequest; - -typedef enum VhostUserSlaveRequest { - VHOST_USER_SLAVE_NONE = 0, - VHOST_USER_SLAVE_IOTLB_MSG = 1, - VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2, - VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3, - VHOST_USER_SLAVE_MAX -} VhostUserSlaveRequest; - -typedef struct VhostUserMemoryRegion { - uint64_t guest_phys_addr; - uint64_t memory_size; - uint64_t userspace_addr; - uint64_t mmap_offset; -} VhostUserMemoryRegion; - -typedef struct VhostUserMemory { - uint32_t nregions; - uint32_t padding; - VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; -} VhostUserMemory; - -typedef struct VhostUserLog { - uint64_t mmap_size; - uint64_t mmap_offset; -} VhostUserLog; - -typedef struct VhostUserConfig { - uint32_t offset; - uint32_t size; - uint32_t flags; - uint8_t region[VHOST_USER_MAX_CONFIG_SIZE]; -} VhostUserConfig; - -static VhostUserConfig c __attribute__ ((unused)); -#define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \ - + sizeof(c.size) \ - + sizeof(c.flags)) - -typedef struct VhostUserVringArea { - uint64_t u64; - uint64_t size; - uint64_t offset; -} VhostUserVringArea; - -#if defined(_WIN32) -# define VU_PACKED __attribute__((gcc_struct, packed)) -#else -# define VU_PACKED __attribute__((packed)) -#endif - -typedef struct VhostUserMsg { - VhostUserRequest request; - -#define VHOST_USER_VERSION_MASK (0x3) -#define VHOST_USER_REPLY_MASK (0x1 << 2) -#define VHOST_USER_NEED_REPLY_MASK (0x1 << 3) - uint32_t flags; - uint32_t size; /* the following payload size */ - - union { -#define VHOST_USER_VRING_IDX_MASK (0xff) -#define VHOST_USER_VRING_NOFD_MASK (0x1 << 8) - uint64_t u64; - struct vhost_vring_state state; - struct vhost_vring_addr addr; - VhostUserMemory memory; - VhostUserLog log; - VhostUserConfig config; - VhostUserVringArea area; - } payload; - - int fds[VHOST_MEMORY_MAX_NREGIONS]; - int fd_num; - uint8_t *data; -} VU_PACKED VhostUserMsg; - -typedef struct VuDevRegion { - /* Guest Physical address. */ - uint64_t gpa; - /* Memory region size. */ - uint64_t size; - /* QEMU virtual address (userspace). */ - uint64_t qva; - /* Starting offset in our mmaped space. */ - uint64_t mmap_offset; - /* Start address of mmaped space. */ - uint64_t mmap_addr; -} VuDevRegion; - -typedef struct VuDev VuDev; - -typedef uint64_t (*vu_get_features_cb) (VuDev *dev); -typedef void (*vu_set_features_cb) (VuDev *dev, uint64_t features); -typedef int (*vu_process_msg_cb) (VuDev *dev, VhostUserMsg *vmsg, - int *do_reply); -typedef void (*vu_queue_set_started_cb) (VuDev *dev, int qidx, bool started); -typedef bool (*vu_queue_is_processed_in_order_cb) (VuDev *dev, int qidx); -typedef int (*vu_get_config_cb) (VuDev *dev, uint8_t *config, uint32_t len); -typedef int (*vu_set_config_cb) (VuDev *dev, const uint8_t *data, - uint32_t offset, uint32_t size, - uint32_t flags); - -typedef struct VuDevIface { - /* called by VHOST_USER_GET_FEATURES to get the features bitmask */ - vu_get_features_cb get_features; - /* enable vhost implementation features */ - vu_set_features_cb set_features; - /* get the protocol feature bitmask from the underlying vhost - * implementation */ - vu_get_features_cb get_protocol_features; - /* enable protocol features in the underlying vhost implementation. */ - vu_set_features_cb set_protocol_features; - /* process_msg is called for each vhost-user message received */ - /* skip libvhost-user processing if return value != 0 */ - vu_process_msg_cb process_msg; - /* tells when queues can be processed */ - vu_queue_set_started_cb queue_set_started; - /* - * If the queue is processed in order, in which case it will be - * resumed to vring.used->idx. This can help to support resuming - * on unmanaged exit/crash. - */ - vu_queue_is_processed_in_order_cb queue_is_processed_in_order; - /* get the config space of the device */ - vu_get_config_cb get_config; - /* set the config space of the device */ - vu_set_config_cb set_config; -} VuDevIface; - -typedef void (*vu_queue_handler_cb) (VuDev *dev, int qidx); - -typedef struct VuRing { - unsigned int num; - struct vring_desc *desc; - struct vring_avail *avail; - struct vring_used *used; - uint64_t log_guest_addr; - uint32_t flags; -} VuRing; - -typedef struct VuVirtq { - VuRing vring; - - /* Next head to pop */ - uint16_t last_avail_idx; - - /* Last avail_idx read from VQ. */ - uint16_t shadow_avail_idx; - - uint16_t used_idx; - - /* Last used index value we have signalled on */ - uint16_t signalled_used; - - /* Last used index value we have signalled on */ - bool signalled_used_valid; - - /* Notification enabled? */ - bool notification; - - int inuse; - - vu_queue_handler_cb handler; - - int call_fd; - int kick_fd; - int err_fd; - unsigned int enable; - bool started; -} VuVirtq; - -enum VuWatchCondtion { - VU_WATCH_IN = POLLIN, - VU_WATCH_OUT = POLLOUT, - VU_WATCH_PRI = POLLPRI, - VU_WATCH_ERR = POLLERR, - VU_WATCH_HUP = POLLHUP, -}; - -typedef void (*vu_panic_cb) (VuDev *dev, const char *err); -typedef void (*vu_watch_cb) (VuDev *dev, int condition, void *data); -typedef void (*vu_set_watch_cb) (VuDev *dev, int fd, int condition, - vu_watch_cb cb, void *data); -typedef void (*vu_remove_watch_cb) (VuDev *dev, int fd); - -struct VuDev { - int sock; - uint32_t nregions; - VuDevRegion regions[VHOST_MEMORY_MAX_NREGIONS]; - VuVirtq vq[VHOST_MAX_NR_VIRTQUEUE]; - int log_call_fd; - int slave_fd; - uint64_t log_size; - uint8_t *log_table; - uint64_t features; - uint64_t protocol_features; - bool broken; - - /* @set_watch: add or update the given fd to the watch set, - * call cb when condition is met */ - vu_set_watch_cb set_watch; - - /* @remove_watch: remove the given fd from the watch set */ - vu_remove_watch_cb remove_watch; - - /* @panic: encountered an unrecoverable error, you may try to - * re-initialize */ - vu_panic_cb panic; - const VuDevIface *iface; - - /* Postcopy data */ - int postcopy_ufd; - bool postcopy_listening; -}; - -typedef struct VuVirtqElement { - unsigned int index; - unsigned int out_num; - unsigned int in_num; - struct iovec *in_sg; - struct iovec *out_sg; -} VuVirtqElement; - -/** - * vu_init: - * @dev: a VuDev context - * @socket: the socket connected to vhost-user master - * @panic: a panic callback - * @set_watch: a set_watch callback - * @remove_watch: a remove_watch callback - * @iface: a VuDevIface structure with vhost-user device callbacks - * - * Intializes a VuDev vhost-user context. - **/ -void vu_init(VuDev *dev, - int socket, - vu_panic_cb panic, - vu_set_watch_cb set_watch, - vu_remove_watch_cb remove_watch, - const VuDevIface *iface); - - -/** - * vu_deinit: - * @dev: a VuDev context - * - * Cleans up the VuDev context - */ -void vu_deinit(VuDev *dev); - -/** - * vu_dispatch: - * @dev: a VuDev context - * - * Process one vhost-user message. - * - * Returns: TRUE on success, FALSE on failure. - */ -bool vu_dispatch(VuDev *dev); - -/** - * vu_gpa_to_va: - * @dev: a VuDev context - * @plen: guest memory size - * @guest_addr: guest address - * - * Translate a guest address to a pointer. Returns NULL on failure. - */ -void *vu_gpa_to_va(VuDev *dev, uint64_t *plen, uint64_t guest_addr); - -/** - * vu_get_queue: - * @dev: a VuDev context - * @qidx: queue index - * - * Returns the queue number @qidx. - */ -VuVirtq *vu_get_queue(VuDev *dev, int qidx); - -/** - * vu_set_queue_handler: - * @dev: a VuDev context - * @vq: a VuVirtq queue - * @handler: the queue handler callback - * - * Set the queue handler. This function may be called several times - * for the same queue. If called with NULL @handler, the handler is - * removed. - */ -void vu_set_queue_handler(VuDev *dev, VuVirtq *vq, - vu_queue_handler_cb handler); - -/** - * vu_set_queue_host_notifier: - * @dev: a VuDev context - * @vq: a VuVirtq queue - * @fd: a file descriptor - * @size: host page size - * @offset: notifier offset in @fd file - * - * Set queue's host notifier. This function may be called several - * times for the same queue. If called with -1 @fd, the notifier - * is removed. - */ -bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd, - int size, int offset); - -/** - * vu_queue_set_notification: - * @dev: a VuDev context - * @vq: a VuVirtq queue - * @enable: state - * - * Set whether the queue notifies (via event index or interrupt) - */ -void vu_queue_set_notification(VuDev *dev, VuVirtq *vq, int enable); - -/** - * vu_queue_enabled: - * @dev: a VuDev context - * @vq: a VuVirtq queue - * - * Returns: whether the queue is enabled. - */ -bool vu_queue_enabled(VuDev *dev, VuVirtq *vq); - -/** - * vu_queue_started: - * @dev: a VuDev context - * @vq: a VuVirtq queue - * - * Returns: whether the queue is started. - */ -bool vu_queue_started(const VuDev *dev, const VuVirtq *vq); - -/** - * vu_queue_empty: - * @dev: a VuDev context - * @vq: a VuVirtq queue - * - * Returns: true if the queue is empty or not ready. - */ -bool vu_queue_empty(VuDev *dev, VuVirtq *vq); - -/** - * vu_queue_notify: - * @dev: a VuDev context - * @vq: a VuVirtq queue - * - * Request to notify the queue via callfd (skipped if unnecessary) - */ -void vu_queue_notify(VuDev *dev, VuVirtq *vq); - -/** - * vu_queue_pop: - * @dev: a VuDev context - * @vq: a VuVirtq queue - * @sz: the size of struct to return (must be >= VuVirtqElement) - * - * Returns: a VuVirtqElement filled from the queue or NULL. The - * returned element must be free()-d by the caller. - */ -void *vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz); - -/** - * vu_queue_rewind: - * @dev: a VuDev context - * @vq: a VuVirtq queue - * @num: number of elements to push back - * - * Pretend that elements weren't popped from the virtqueue. The next - * virtqueue_pop() will refetch the oldest element. - * - * Returns: true on success, false if @num is greater than the number of in use - * elements. - */ -bool vu_queue_rewind(VuDev *dev, VuVirtq *vq, unsigned int num); - -/** - * vu_queue_fill: - * @dev: a VuDev context - * @vq: a VuVirtq queue - * @elem: a VuVirtqElement - * @len: length in bytes to write - * @idx: optional offset for the used ring index (0 in general) - * - * Fill the used ring with @elem element. - */ -void vu_queue_fill(VuDev *dev, VuVirtq *vq, - const VuVirtqElement *elem, - unsigned int len, unsigned int idx); - -/** - * vu_queue_push: - * @dev: a VuDev context - * @vq: a VuVirtq queue - * @elem: a VuVirtqElement - * @len: length in bytes to write - * - * Helper that combines vu_queue_fill() with a vu_queue_flush(). - */ -void vu_queue_push(VuDev *dev, VuVirtq *vq, - const VuVirtqElement *elem, unsigned int len); - -/** - * vu_queue_flush: - * @dev: a VuDev context - * @vq: a VuVirtq queue - * @num: number of elements to flush - * - * Mark the last number of elements as done (used.idx is updated by - * num elements). -*/ -void vu_queue_flush(VuDev *dev, VuVirtq *vq, unsigned int num); - -/** - * vu_queue_get_avail_bytes: - * @dev: a VuDev context - * @vq: a VuVirtq queue - * @in_bytes: in bytes - * @out_bytes: out bytes - * @max_in_bytes: stop counting after max_in_bytes - * @max_out_bytes: stop counting after max_out_bytes - * - * Count the number of available bytes, up to max_in_bytes/max_out_bytes. - */ -void vu_queue_get_avail_bytes(VuDev *vdev, VuVirtq *vq, unsigned int *in_bytes, - unsigned int *out_bytes, - unsigned max_in_bytes, unsigned max_out_bytes); - -/** - * vu_queue_avail_bytes: - * @dev: a VuDev context - * @vq: a VuVirtq queue - * @in_bytes: expected in bytes - * @out_bytes: expected out bytes - * - * Returns: true if in_bytes <= in_total && out_bytes <= out_total - */ -bool vu_queue_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int in_bytes, - unsigned int out_bytes); - -#endif /* LIBVHOST_USER_H */ diff --git a/contrib/plugins/Makefile b/contrib/plugins/Makefile new file mode 100644 index 0000000000..0b64d2c1e3 --- /dev/null +++ b/contrib/plugins/Makefile @@ -0,0 +1,68 @@ +# -*- Mode: makefile -*- +# +# This Makefile example is fairly independent from the main makefile +# so users can take and adapt it for their build. We only really +# include config-host.mak so we don't have to repeat probing for +# programs that the main configure has already done for us. +# + +include config-host.mak + +TOP_SRC_PATH = $(SRC_PATH)/../.. + +VPATH += $(SRC_PATH) + +NAMES := +NAMES += execlog +NAMES += hotblocks +NAMES += hotpages +NAMES += howvec + +# The lockstep example communicates using unix sockets, +# and can't be easily made to work on windows. +ifneq ($(CONFIG_WIN32),y) +NAMES += lockstep +endif + +NAMES += hwprofile +NAMES += cache +NAMES += drcov + +ifeq ($(CONFIG_WIN32),y) +SO_SUFFIX := .dll +LDLIBS += $(shell $(PKG_CONFIG) --libs glib-2.0) +else +SO_SUFFIX := .so +endif + +SONAMES := $(addsuffix $(SO_SUFFIX),$(addprefix lib,$(NAMES))) + +# The main QEMU uses Glib extensively so it's perfectly fine to use it +# in plugins (which many example do). +PLUGIN_CFLAGS := $(shell $(PKG_CONFIG) --cflags glib-2.0) +PLUGIN_CFLAGS += -fPIC -Wall +PLUGIN_CFLAGS += -I$(TOP_SRC_PATH)/include/qemu + +all: $(SONAMES) + +%.o: %.c + $(CC) $(CFLAGS) $(PLUGIN_CFLAGS) -c -o $@ $< + +ifeq ($(CONFIG_WIN32),y) +lib%$(SO_SUFFIX): %.o win32_linker.o ../../plugins/libqemu_plugin_api.a + $(CC) -shared -o $@ $^ $(LDLIBS) +else ifeq ($(CONFIG_DARWIN),y) +lib%$(SO_SUFFIX): %.o + $(CC) -bundle -Wl,-undefined,dynamic_lookup -o $@ $^ $(LDLIBS) +else +lib%$(SO_SUFFIX): %.o + $(CC) -shared -o $@ $^ $(LDLIBS) +endif + + +clean: + rm -f *.o *$(SO_SUFFIX) *.d + rm -Rf .libs + +.PHONY: all clean +.SECONDARY: diff --git a/contrib/plugins/cache.c b/contrib/plugins/cache.c new file mode 100644 index 0000000000..c5c8ac75a9 --- /dev/null +++ b/contrib/plugins/cache.c @@ -0,0 +1,859 @@ +/* + * Copyright (C) 2021, Mahmoud Mandour <ma.mandourr@gmail.com> + * + * License: GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include <inttypes.h> +#include <stdio.h> +#include <glib.h> + +#include <qemu-plugin.h> + +#define STRTOLL(x) g_ascii_strtoll(x, NULL, 10) + +QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION; + +static enum qemu_plugin_mem_rw rw = QEMU_PLUGIN_MEM_RW; + +static GHashTable *miss_ht; + +static GMutex hashtable_lock; +static GRand *rng; + +static int limit; +static bool sys; + +enum EvictionPolicy { + LRU, + FIFO, + RAND, +}; + +enum EvictionPolicy policy; + +/* + * A CacheSet is a set of cache blocks. A memory block that maps to a set can be + * put in any of the blocks inside the set. The number of block per set is + * called the associativity (assoc). + * + * Each block contains the stored tag and a valid bit. Since this is not + * a functional simulator, the data itself is not stored. We only identify + * whether a block is in the cache or not by searching for its tag. + * + * In order to search for memory data in the cache, the set identifier and tag + * are extracted from the address and the set is probed to see whether a tag + * match occur. + * + * An address is logically divided into three portions: The block offset, + * the set number, and the tag. + * + * The set number is used to identify the set in which the block may exist. + * The tag is compared against all the tags of a set to search for a match. If a + * match is found, then the access is a hit. + * + * The CacheSet also contains bookkeaping information about eviction details. + */ + +typedef struct { + uint64_t tag; + bool valid; +} CacheBlock; + +typedef struct { + CacheBlock *blocks; + uint64_t *lru_priorities; + uint64_t lru_gen_counter; + GQueue *fifo_queue; +} CacheSet; + +typedef struct { + CacheSet *sets; + int num_sets; + int cachesize; + int assoc; + int blksize_shift; + uint64_t set_mask; + uint64_t tag_mask; + uint64_t accesses; + uint64_t misses; +} Cache; + +typedef struct { + char *disas_str; + const char *symbol; + uint64_t addr; + uint64_t l1_dmisses; + uint64_t l1_imisses; + uint64_t l2_misses; +} InsnData; + +void (*update_hit)(Cache *cache, int set, int blk); +void (*update_miss)(Cache *cache, int set, int blk); + +void (*metadata_init)(Cache *cache); +void (*metadata_destroy)(Cache *cache); + +static int cores; +static Cache **l1_dcaches, **l1_icaches; + +static bool use_l2; +static Cache **l2_ucaches; + +static GMutex *l1_dcache_locks; +static GMutex *l1_icache_locks; +static GMutex *l2_ucache_locks; + +static uint64_t l1_dmem_accesses; +static uint64_t l1_imem_accesses; +static uint64_t l1_imisses; +static uint64_t l1_dmisses; + +static uint64_t l2_mem_accesses; +static uint64_t l2_misses; + +static int pow_of_two(int num) +{ + g_assert((num & (num - 1)) == 0); + int ret = 0; + while (num /= 2) { + ret++; + } + return ret; +} + +/* + * LRU evection policy: For each set, a generation counter is maintained + * alongside a priority array. + * + * On each set access, the generation counter is incremented. + * + * On a cache hit: The hit-block is assigned the current generation counter, + * indicating that it is the most recently used block. + * + * On a cache miss: The block with the least priority is searched and replaced + * with the newly-cached block, of which the priority is set to the current + * generation number. + */ + +static void lru_priorities_init(Cache *cache) +{ + int i; + + for (i = 0; i < cache->num_sets; i++) { + cache->sets[i].lru_priorities = g_new0(uint64_t, cache->assoc); + cache->sets[i].lru_gen_counter = 0; + } +} + +static void lru_update_blk(Cache *cache, int set_idx, int blk_idx) +{ + CacheSet *set = &cache->sets[set_idx]; + set->lru_priorities[blk_idx] = cache->sets[set_idx].lru_gen_counter; + set->lru_gen_counter++; +} + +static int lru_get_lru_block(Cache *cache, int set_idx) +{ + int i, min_idx, min_priority; + + min_priority = cache->sets[set_idx].lru_priorities[0]; + min_idx = 0; + + for (i = 1; i < cache->assoc; i++) { + if (cache->sets[set_idx].lru_priorities[i] < min_priority) { + min_priority = cache->sets[set_idx].lru_priorities[i]; + min_idx = i; + } + } + return min_idx; +} + +static void lru_priorities_destroy(Cache *cache) +{ + int i; + + for (i = 0; i < cache->num_sets; i++) { + g_free(cache->sets[i].lru_priorities); + } +} + +/* + * FIFO eviction policy: a FIFO queue is maintained for each CacheSet that + * stores accesses to the cache. + * + * On a compulsory miss: The block index is enqueued to the fifo_queue to + * indicate that it's the latest cached block. + * + * On a conflict miss: The first-in block is removed from the cache and the new + * block is put in its place and enqueued to the FIFO queue. + */ + +static void fifo_init(Cache *cache) +{ + int i; + + for (i = 0; i < cache->num_sets; i++) { + cache->sets[i].fifo_queue = g_queue_new(); + } +} + +static int fifo_get_first_block(Cache *cache, int set) +{ + GQueue *q = cache->sets[set].fifo_queue; + return GPOINTER_TO_INT(g_queue_pop_tail(q)); +} + +static void fifo_update_on_miss(Cache *cache, int set, int blk_idx) +{ + GQueue *q = cache->sets[set].fifo_queue; + g_queue_push_head(q, GINT_TO_POINTER(blk_idx)); +} + +static void fifo_destroy(Cache *cache) +{ + int i; + + for (i = 0; i < cache->num_sets; i++) { + g_queue_free(cache->sets[i].fifo_queue); + } +} + +static inline uint64_t extract_tag(Cache *cache, uint64_t addr) +{ + return addr & cache->tag_mask; +} + +static inline uint64_t extract_set(Cache *cache, uint64_t addr) +{ + return (addr & cache->set_mask) >> cache->blksize_shift; +} + +static const char *cache_config_error(int blksize, int assoc, int cachesize) +{ + if (cachesize % blksize != 0) { + return "cache size must be divisible by block size"; + } else if (cachesize % (blksize * assoc) != 0) { + return "cache size must be divisible by set size (assoc * block size)"; + } else { + return NULL; + } +} + +static bool bad_cache_params(int blksize, int assoc, int cachesize) +{ + return (cachesize % blksize) != 0 || (cachesize % (blksize * assoc) != 0); +} + +static Cache *cache_init(int blksize, int assoc, int cachesize) +{ + Cache *cache; + int i; + uint64_t blk_mask; + + /* + * This function shall not be called directly, and hence expects suitable + * parameters. + */ + g_assert(!bad_cache_params(blksize, assoc, cachesize)); + + cache = g_new(Cache, 1); + cache->assoc = assoc; + cache->cachesize = cachesize; + cache->num_sets = cachesize / (blksize * assoc); + cache->sets = g_new(CacheSet, cache->num_sets); + cache->blksize_shift = pow_of_two(blksize); + cache->accesses = 0; + cache->misses = 0; + + for (i = 0; i < cache->num_sets; i++) { + cache->sets[i].blocks = g_new0(CacheBlock, assoc); + } + + blk_mask = blksize - 1; + cache->set_mask = ((cache->num_sets - 1) << cache->blksize_shift); + cache->tag_mask = ~(cache->set_mask | blk_mask); + + if (metadata_init) { + metadata_init(cache); + } + + return cache; +} + +static Cache **caches_init(int blksize, int assoc, int cachesize) +{ + Cache **caches; + int i; + + if (bad_cache_params(blksize, assoc, cachesize)) { + return NULL; + } + + caches = g_new(Cache *, cores); + + for (i = 0; i < cores; i++) { + caches[i] = cache_init(blksize, assoc, cachesize); + } + + return caches; +} + +static int get_invalid_block(Cache *cache, uint64_t set) +{ + int i; + + for (i = 0; i < cache->assoc; i++) { + if (!cache->sets[set].blocks[i].valid) { + return i; + } + } + + return -1; +} + +static int get_replaced_block(Cache *cache, int set) +{ + switch (policy) { + case RAND: + return g_rand_int_range(rng, 0, cache->assoc); + case LRU: + return lru_get_lru_block(cache, set); + case FIFO: + return fifo_get_first_block(cache, set); + default: + g_assert_not_reached(); + } +} + +static int in_cache(Cache *cache, uint64_t addr) +{ + int i; + uint64_t tag, set; + + tag = extract_tag(cache, addr); + set = extract_set(cache, addr); + + for (i = 0; i < cache->assoc; i++) { + if (cache->sets[set].blocks[i].tag == tag && + cache->sets[set].blocks[i].valid) { + return i; + } + } + + return -1; +} + +/** + * access_cache(): Simulate a cache access + * @cache: The cache under simulation + * @addr: The address of the requested memory location + * + * Returns true if the requested data is hit in the cache and false when missed. + * The cache is updated on miss for the next access. + */ +static bool access_cache(Cache *cache, uint64_t addr) +{ + int hit_blk, replaced_blk; + uint64_t tag, set; + + tag = extract_tag(cache, addr); + set = extract_set(cache, addr); + + hit_blk = in_cache(cache, addr); + if (hit_blk != -1) { + if (update_hit) { + update_hit(cache, set, hit_blk); + } + return true; + } + + replaced_blk = get_invalid_block(cache, set); + + if (replaced_blk == -1) { + replaced_blk = get_replaced_block(cache, set); + } + + if (update_miss) { + update_miss(cache, set, replaced_blk); + } + + cache->sets[set].blocks[replaced_blk].tag = tag; + cache->sets[set].blocks[replaced_blk].valid = true; + + return false; +} + +static void vcpu_mem_access(unsigned int vcpu_index, qemu_plugin_meminfo_t info, + uint64_t vaddr, void *userdata) +{ + uint64_t effective_addr; + struct qemu_plugin_hwaddr *hwaddr; + int cache_idx; + InsnData *insn; + bool hit_in_l1; + + hwaddr = qemu_plugin_get_hwaddr(info, vaddr); + if (hwaddr && qemu_plugin_hwaddr_is_io(hwaddr)) { + return; + } + + effective_addr = hwaddr ? qemu_plugin_hwaddr_phys_addr(hwaddr) : vaddr; + cache_idx = vcpu_index % cores; + + g_mutex_lock(&l1_dcache_locks[cache_idx]); + hit_in_l1 = access_cache(l1_dcaches[cache_idx], effective_addr); + if (!hit_in_l1) { + insn = userdata; + __atomic_fetch_add(&insn->l1_dmisses, 1, __ATOMIC_SEQ_CST); + l1_dcaches[cache_idx]->misses++; + } + l1_dcaches[cache_idx]->accesses++; + g_mutex_unlock(&l1_dcache_locks[cache_idx]); + + if (hit_in_l1 || !use_l2) { + /* No need to access L2 */ + return; + } + + g_mutex_lock(&l2_ucache_locks[cache_idx]); + if (!access_cache(l2_ucaches[cache_idx], effective_addr)) { + insn = userdata; + __atomic_fetch_add(&insn->l2_misses, 1, __ATOMIC_SEQ_CST); + l2_ucaches[cache_idx]->misses++; + } + l2_ucaches[cache_idx]->accesses++; + g_mutex_unlock(&l2_ucache_locks[cache_idx]); +} + +static void vcpu_insn_exec(unsigned int vcpu_index, void *userdata) +{ + uint64_t insn_addr; + InsnData *insn; + int cache_idx; + bool hit_in_l1; + + insn_addr = ((InsnData *) userdata)->addr; + + cache_idx = vcpu_index % cores; + g_mutex_lock(&l1_icache_locks[cache_idx]); + hit_in_l1 = access_cache(l1_icaches[cache_idx], insn_addr); + if (!hit_in_l1) { + insn = userdata; + __atomic_fetch_add(&insn->l1_imisses, 1, __ATOMIC_SEQ_CST); + l1_icaches[cache_idx]->misses++; + } + l1_icaches[cache_idx]->accesses++; + g_mutex_unlock(&l1_icache_locks[cache_idx]); + + if (hit_in_l1 || !use_l2) { + /* No need to access L2 */ + return; + } + + g_mutex_lock(&l2_ucache_locks[cache_idx]); + if (!access_cache(l2_ucaches[cache_idx], insn_addr)) { + insn = userdata; + __atomic_fetch_add(&insn->l2_misses, 1, __ATOMIC_SEQ_CST); + l2_ucaches[cache_idx]->misses++; + } + l2_ucaches[cache_idx]->accesses++; + g_mutex_unlock(&l2_ucache_locks[cache_idx]); +} + +static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb) +{ + size_t n_insns; + size_t i; + InsnData *data; + + n_insns = qemu_plugin_tb_n_insns(tb); + for (i = 0; i < n_insns; i++) { + struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i); + uint64_t effective_addr; + + if (sys) { + effective_addr = (uint64_t) qemu_plugin_insn_haddr(insn); + } else { + effective_addr = (uint64_t) qemu_plugin_insn_vaddr(insn); + } + + /* + * Instructions might get translated multiple times, we do not create + * new entries for those instructions. Instead, we fetch the same + * entry from the hash table and register it for the callback again. + */ + g_mutex_lock(&hashtable_lock); + data = g_hash_table_lookup(miss_ht, GUINT_TO_POINTER(effective_addr)); + if (data == NULL) { + data = g_new0(InsnData, 1); + data->disas_str = qemu_plugin_insn_disas(insn); + data->symbol = qemu_plugin_insn_symbol(insn); + data->addr = effective_addr; + g_hash_table_insert(miss_ht, GUINT_TO_POINTER(effective_addr), + (gpointer) data); + } + g_mutex_unlock(&hashtable_lock); + + qemu_plugin_register_vcpu_mem_cb(insn, vcpu_mem_access, + QEMU_PLUGIN_CB_NO_REGS, + rw, data); + + qemu_plugin_register_vcpu_insn_exec_cb(insn, vcpu_insn_exec, + QEMU_PLUGIN_CB_NO_REGS, data); + } +} + +static void insn_free(gpointer data) +{ + InsnData *insn = (InsnData *) data; + g_free(insn->disas_str); + g_free(insn); +} + +static void cache_free(Cache *cache) +{ + for (int i = 0; i < cache->num_sets; i++) { + g_free(cache->sets[i].blocks); + } + + if (metadata_destroy) { + metadata_destroy(cache); + } + + g_free(cache->sets); + g_free(cache); +} + +static void caches_free(Cache **caches) +{ + int i; + + for (i = 0; i < cores; i++) { + cache_free(caches[i]); + } +} + +static void append_stats_line(GString *line, + uint64_t l1_daccess, uint64_t l1_dmisses, + uint64_t l1_iaccess, uint64_t l1_imisses, + uint64_t l2_access, uint64_t l2_misses) +{ + double l1_dmiss_rate = ((double) l1_dmisses) / (l1_daccess) * 100.0; + double l1_imiss_rate = ((double) l1_imisses) / (l1_iaccess) * 100.0; + + g_string_append_printf(line, "%-14" PRIu64 " %-12" PRIu64 " %9.4lf%%" + " %-14" PRIu64 " %-12" PRIu64 " %9.4lf%%", + l1_daccess, + l1_dmisses, + l1_daccess ? l1_dmiss_rate : 0.0, + l1_iaccess, + l1_imisses, + l1_iaccess ? l1_imiss_rate : 0.0); + + if (l2_access && l2_misses) { + double l2_miss_rate = ((double) l2_misses) / (l2_access) * 100.0; + g_string_append_printf(line, + " %-12" PRIu64 " %-11" PRIu64 " %10.4lf%%", + l2_access, + l2_misses, + l2_access ? l2_miss_rate : 0.0); + } + + g_string_append(line, "\n"); +} + +static void sum_stats(void) +{ + int i; + + g_assert(cores > 1); + for (i = 0; i < cores; i++) { + l1_imisses += l1_icaches[i]->misses; + l1_dmisses += l1_dcaches[i]->misses; + l1_imem_accesses += l1_icaches[i]->accesses; + l1_dmem_accesses += l1_dcaches[i]->accesses; + + if (use_l2) { + l2_misses += l2_ucaches[i]->misses; + l2_mem_accesses += l2_ucaches[i]->accesses; + } + } +} + +static int dcmp(gconstpointer a, gconstpointer b) +{ + InsnData *insn_a = (InsnData *) a; + InsnData *insn_b = (InsnData *) b; + + return insn_a->l1_dmisses < insn_b->l1_dmisses ? 1 : -1; +} + +static int icmp(gconstpointer a, gconstpointer b) +{ + InsnData *insn_a = (InsnData *) a; + InsnData *insn_b = (InsnData *) b; + + return insn_a->l1_imisses < insn_b->l1_imisses ? 1 : -1; +} + +static int l2_cmp(gconstpointer a, gconstpointer b) +{ + InsnData *insn_a = (InsnData *) a; + InsnData *insn_b = (InsnData *) b; + + return insn_a->l2_misses < insn_b->l2_misses ? 1 : -1; +} + +static void log_stats(void) +{ + int i; + Cache *icache, *dcache, *l2_cache; + + g_autoptr(GString) rep = g_string_new("core #, data accesses, data misses," + " dmiss rate, insn accesses," + " insn misses, imiss rate"); + + if (use_l2) { + g_string_append(rep, ", l2 accesses, l2 misses, l2 miss rate"); + } + + g_string_append(rep, "\n"); + + for (i = 0; i < cores; i++) { + g_string_append_printf(rep, "%-8d", i); + dcache = l1_dcaches[i]; + icache = l1_icaches[i]; + l2_cache = use_l2 ? l2_ucaches[i] : NULL; + append_stats_line(rep, dcache->accesses, dcache->misses, + icache->accesses, icache->misses, + l2_cache ? l2_cache->accesses : 0, + l2_cache ? l2_cache->misses : 0); + } + + if (cores > 1) { + sum_stats(); + g_string_append_printf(rep, "%-8s", "sum"); + append_stats_line(rep, l1_dmem_accesses, l1_dmisses, + l1_imem_accesses, l1_imisses, + l2_cache ? l2_mem_accesses : 0, l2_cache ? l2_misses : 0); + } + + g_string_append(rep, "\n"); + qemu_plugin_outs(rep->str); +} + +static void log_top_insns(void) +{ + int i; + GList *curr, *miss_insns; + InsnData *insn; + + miss_insns = g_hash_table_get_values(miss_ht); + miss_insns = g_list_sort(miss_insns, dcmp); + g_autoptr(GString) rep = g_string_new(""); + g_string_append_printf(rep, "%s", "address, data misses, instruction\n"); + + for (curr = miss_insns, i = 0; curr && i < limit; i++, curr = curr->next) { + insn = (InsnData *) curr->data; + g_string_append_printf(rep, "0x%" PRIx64, insn->addr); + if (insn->symbol) { + g_string_append_printf(rep, " (%s)", insn->symbol); + } + g_string_append_printf(rep, ", %" PRId64 ", %s\n", + insn->l1_dmisses, insn->disas_str); + } + + miss_insns = g_list_sort(miss_insns, icmp); + g_string_append_printf(rep, "%s", "\naddress, fetch misses, instruction\n"); + + for (curr = miss_insns, i = 0; curr && i < limit; i++, curr = curr->next) { + insn = (InsnData *) curr->data; + g_string_append_printf(rep, "0x%" PRIx64, insn->addr); + if (insn->symbol) { + g_string_append_printf(rep, " (%s)", insn->symbol); + } + g_string_append_printf(rep, ", %" PRId64 ", %s\n", + insn->l1_imisses, insn->disas_str); + } + + if (!use_l2) { + goto finish; + } + + miss_insns = g_list_sort(miss_insns, l2_cmp); + g_string_append_printf(rep, "%s", "\naddress, L2 misses, instruction\n"); + + for (curr = miss_insns, i = 0; curr && i < limit; i++, curr = curr->next) { + insn = (InsnData *) curr->data; + g_string_append_printf(rep, "0x%" PRIx64, insn->addr); + if (insn->symbol) { + g_string_append_printf(rep, " (%s)", insn->symbol); + } + g_string_append_printf(rep, ", %" PRId64 ", %s\n", + insn->l2_misses, insn->disas_str); + } + +finish: + qemu_plugin_outs(rep->str); + g_list_free(miss_insns); +} + +static void plugin_exit(qemu_plugin_id_t id, void *p) +{ + log_stats(); + log_top_insns(); + + caches_free(l1_dcaches); + caches_free(l1_icaches); + + g_free(l1_dcache_locks); + g_free(l1_icache_locks); + + if (use_l2) { + caches_free(l2_ucaches); + g_free(l2_ucache_locks); + } + + g_hash_table_destroy(miss_ht); +} + +static void policy_init(void) +{ + switch (policy) { + case LRU: + update_hit = lru_update_blk; + update_miss = lru_update_blk; + metadata_init = lru_priorities_init; + metadata_destroy = lru_priorities_destroy; + break; + case FIFO: + update_miss = fifo_update_on_miss; + metadata_init = fifo_init; + metadata_destroy = fifo_destroy; + break; + case RAND: + rng = g_rand_new(); + break; + default: + g_assert_not_reached(); + } +} + +QEMU_PLUGIN_EXPORT +int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info, + int argc, char **argv) +{ + int i; + int l1_iassoc, l1_iblksize, l1_icachesize; + int l1_dassoc, l1_dblksize, l1_dcachesize; + int l2_assoc, l2_blksize, l2_cachesize; + + limit = 32; + sys = info->system_emulation; + + l1_dassoc = 8; + l1_dblksize = 64; + l1_dcachesize = l1_dblksize * l1_dassoc * 32; + + l1_iassoc = 8; + l1_iblksize = 64; + l1_icachesize = l1_iblksize * l1_iassoc * 32; + + l2_assoc = 16; + l2_blksize = 64; + l2_cachesize = l2_assoc * l2_blksize * 2048; + + policy = LRU; + + cores = sys ? info->system.smp_vcpus : 1; + + for (i = 0; i < argc; i++) { + char *opt = argv[i]; + g_auto(GStrv) tokens = g_strsplit(opt, "=", 2); + + if (g_strcmp0(tokens[0], "iblksize") == 0) { + l1_iblksize = STRTOLL(tokens[1]); + } else if (g_strcmp0(tokens[0], "iassoc") == 0) { + l1_iassoc = STRTOLL(tokens[1]); + } else if (g_strcmp0(tokens[0], "icachesize") == 0) { + l1_icachesize = STRTOLL(tokens[1]); + } else if (g_strcmp0(tokens[0], "dblksize") == 0) { + l1_dblksize = STRTOLL(tokens[1]); + } else if (g_strcmp0(tokens[0], "dassoc") == 0) { + l1_dassoc = STRTOLL(tokens[1]); + } else if (g_strcmp0(tokens[0], "dcachesize") == 0) { + l1_dcachesize = STRTOLL(tokens[1]); + } else if (g_strcmp0(tokens[0], "limit") == 0) { + limit = STRTOLL(tokens[1]); + } else if (g_strcmp0(tokens[0], "cores") == 0) { + cores = STRTOLL(tokens[1]); + } else if (g_strcmp0(tokens[0], "l2cachesize") == 0) { + use_l2 = true; + l2_cachesize = STRTOLL(tokens[1]); + } else if (g_strcmp0(tokens[0], "l2blksize") == 0) { + use_l2 = true; + l2_blksize = STRTOLL(tokens[1]); + } else if (g_strcmp0(tokens[0], "l2assoc") == 0) { + use_l2 = true; + l2_assoc = STRTOLL(tokens[1]); + } else if (g_strcmp0(tokens[0], "l2") == 0) { + if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &use_l2)) { + fprintf(stderr, "boolean argument parsing failed: %s\n", opt); + return -1; + } + } else if (g_strcmp0(tokens[0], "evict") == 0) { + if (g_strcmp0(tokens[1], "rand") == 0) { + policy = RAND; + } else if (g_strcmp0(tokens[1], "lru") == 0) { + policy = LRU; + } else if (g_strcmp0(tokens[1], "fifo") == 0) { + policy = FIFO; + } else { + fprintf(stderr, "invalid eviction policy: %s\n", opt); + return -1; + } + } else { + fprintf(stderr, "option parsing failed: %s\n", opt); + return -1; + } + } + + policy_init(); + + l1_dcaches = caches_init(l1_dblksize, l1_dassoc, l1_dcachesize); + if (!l1_dcaches) { + const char *err = cache_config_error(l1_dblksize, l1_dassoc, l1_dcachesize); + fprintf(stderr, "dcache cannot be constructed from given parameters\n"); + fprintf(stderr, "%s\n", err); + return -1; + } + + l1_icaches = caches_init(l1_iblksize, l1_iassoc, l1_icachesize); + if (!l1_icaches) { + const char *err = cache_config_error(l1_iblksize, l1_iassoc, l1_icachesize); + fprintf(stderr, "icache cannot be constructed from given parameters\n"); + fprintf(stderr, "%s\n", err); + return -1; + } + + l2_ucaches = use_l2 ? caches_init(l2_blksize, l2_assoc, l2_cachesize) : NULL; + if (!l2_ucaches && use_l2) { + const char *err = cache_config_error(l2_blksize, l2_assoc, l2_cachesize); + fprintf(stderr, "L2 cache cannot be constructed from given parameters\n"); + fprintf(stderr, "%s\n", err); + return -1; + } + + l1_dcache_locks = g_new0(GMutex, cores); + l1_icache_locks = g_new0(GMutex, cores); + l2_ucache_locks = use_l2 ? g_new0(GMutex, cores) : NULL; + + qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans); + qemu_plugin_register_atexit_cb(id, plugin_exit, NULL); + + miss_ht = g_hash_table_new_full(NULL, g_direct_equal, NULL, insn_free); + + return 0; +} diff --git a/contrib/plugins/drcov.c b/contrib/plugins/drcov.c new file mode 100644 index 0000000000..5edc94dcaf --- /dev/null +++ b/contrib/plugins/drcov.c @@ -0,0 +1,163 @@ +/* + * Copyright (C) 2021, Ivanov Arkady <arkadiy.ivanov@ispras.ru> + * + * Drcov - a DynamoRIO-based tool that collects coverage information + * from a binary. Primary goal this script is to have coverage log + * files that work in Lighthouse. + * + * License: GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include <inttypes.h> +#include <assert.h> +#include <stdlib.h> +#include <inttypes.h> +#include <string.h> +#include <unistd.h> +#include <stdio.h> +#include <glib.h> + +#include <qemu-plugin.h> + +QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION; + +static char header[] = "DRCOV VERSION: 2\n" + "DRCOV FLAVOR: drcov-64\n" + "Module Table: version 2, count 1\n" + "Columns: id, base, end, entry, path\n"; + +static FILE *fp; +static const char *file_name = "file.drcov.trace"; +static GMutex lock; + +typedef struct { + uint32_t start; + uint16_t size; + uint16_t mod_id; + bool exec; +} bb_entry_t; + +/* Translated blocks */ +static GPtrArray *blocks; + +static void printf_header(unsigned long count) +{ + fprintf(fp, "%s", header); + const char *path = qemu_plugin_path_to_binary(); + uint64_t start_code = qemu_plugin_start_code(); + uint64_t end_code = qemu_plugin_end_code(); + uint64_t entry = qemu_plugin_entry_code(); + fprintf(fp, "0, 0x%" PRIx64 ", 0x%" PRIx64 ", 0x%" PRIx64 ", %s\n", + start_code, end_code, entry, path); + fprintf(fp, "BB Table: %ld bbs\n", count); +} + +static void printf_char_array32(uint32_t data) +{ + const uint8_t *bytes = (const uint8_t *)(&data); + fwrite(bytes, sizeof(char), sizeof(data), fp); +} + +static void printf_char_array16(uint16_t data) +{ + const uint8_t *bytes = (const uint8_t *)(&data); + fwrite(bytes, sizeof(char), sizeof(data), fp); +} + + +static void printf_el(gpointer data, gpointer user_data) +{ + bb_entry_t *bb = (bb_entry_t *)data; + if (bb->exec) { + printf_char_array32(bb->start); + printf_char_array16(bb->size); + printf_char_array16(bb->mod_id); + } + g_free(bb); +} + +static void count_block(gpointer data, gpointer user_data) +{ + unsigned long *count = (unsigned long *) user_data; + bb_entry_t *bb = (bb_entry_t *)data; + if (bb->exec) { + *count = *count + 1; + } +} + +static void plugin_exit(qemu_plugin_id_t id, void *p) +{ + unsigned long count = 0; + g_mutex_lock(&lock); + g_ptr_array_foreach(blocks, count_block, &count); + + /* Print function */ + printf_header(count); + g_ptr_array_foreach(blocks, printf_el, NULL); + + /* Clear */ + g_ptr_array_free(blocks, true); + + fclose(fp); + + g_mutex_unlock(&lock); +} + +static void plugin_init(void) +{ + fp = fopen(file_name, "wb"); + blocks = g_ptr_array_sized_new(128); +} + +static void vcpu_tb_exec(unsigned int cpu_index, void *udata) +{ + bb_entry_t *bb = (bb_entry_t *) udata; + + g_mutex_lock(&lock); + bb->exec = true; + g_mutex_unlock(&lock); +} + +static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb) +{ + uint64_t pc = qemu_plugin_tb_vaddr(tb); + size_t n = qemu_plugin_tb_n_insns(tb); + + g_mutex_lock(&lock); + + bb_entry_t *bb = g_new0(bb_entry_t, 1); + for (int i = 0; i < n; i++) { + bb->size += qemu_plugin_insn_size(qemu_plugin_tb_get_insn(tb, i)); + } + + bb->start = pc; + bb->mod_id = 0; + bb->exec = false; + g_ptr_array_add(blocks, bb); + + g_mutex_unlock(&lock); + qemu_plugin_register_vcpu_tb_exec_cb(tb, vcpu_tb_exec, + QEMU_PLUGIN_CB_NO_REGS, + (void *)bb); + +} + +QEMU_PLUGIN_EXPORT +int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info, + int argc, char **argv) +{ + for (int i = 0; i < argc; i++) { + g_auto(GStrv) tokens = g_strsplit(argv[i], "=", 2); + if (g_strcmp0(tokens[0], "filename") == 0) { + file_name = g_strdup(tokens[1]); + } + } + + plugin_init(); + + qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans); + qemu_plugin_register_atexit_cb(id, plugin_exit, NULL); + + return 0; +} diff --git a/contrib/plugins/execlog.c b/contrib/plugins/execlog.c new file mode 100644 index 0000000000..fab18113d4 --- /dev/null +++ b/contrib/plugins/execlog.c @@ -0,0 +1,482 @@ +/* + * Copyright (C) 2021, Alexandre Iooss <erdnaxe@crans.org> + * + * Log instruction execution with memory access and register changes + * + * License: GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#include <glib.h> +#include <inttypes.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include <qemu-plugin.h> + +typedef struct { + struct qemu_plugin_register *handle; + GByteArray *last; + GByteArray *new; + const char *name; +} Register; + +typedef struct CPU { + /* Store last executed instruction on each vCPU as a GString */ + GString *last_exec; + /* Ptr array of Register */ + GPtrArray *registers; +} CPU; + +QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION; + +static GArray *cpus; +static GRWLock expand_array_lock; + +static GPtrArray *imatches; +static GArray *amatches; +static GPtrArray *rmatches; +static bool disas_assist; +static GMutex add_reg_name_lock; +static GPtrArray *all_reg_names; + +static CPU *get_cpu(int vcpu_index) +{ + CPU *c; + g_rw_lock_reader_lock(&expand_array_lock); + c = &g_array_index(cpus, CPU, vcpu_index); + g_rw_lock_reader_unlock(&expand_array_lock); + + return c; +} + +/** + * Add memory read or write information to current instruction log + */ +static void vcpu_mem(unsigned int cpu_index, qemu_plugin_meminfo_t info, + uint64_t vaddr, void *udata) +{ + CPU *c = get_cpu(cpu_index); + GString *s = c->last_exec; + + /* Find vCPU in array */ + + /* Indicate type of memory access */ + if (qemu_plugin_mem_is_store(info)) { + g_string_append(s, ", store"); + } else { + g_string_append(s, ", load"); + } + + /* If full system emulation log physical address and device name */ + struct qemu_plugin_hwaddr *hwaddr = qemu_plugin_get_hwaddr(info, vaddr); + if (hwaddr) { + uint64_t addr = qemu_plugin_hwaddr_phys_addr(hwaddr); + const char *name = qemu_plugin_hwaddr_device_name(hwaddr); + g_string_append_printf(s, ", 0x%08"PRIx64", %s", addr, name); + } else { + g_string_append_printf(s, ", 0x%08"PRIx64, vaddr); + } +} + +/** + * Log instruction execution, outputting the last one. + * + * vcpu_insn_exec() is a copy and paste of vcpu_insn_exec_with_regs() + * without the checking of register values when we've attempted to + * optimise with disas_assist. + */ +static void insn_check_regs(CPU *cpu) +{ + for (int n = 0; n < cpu->registers->len; n++) { + Register *reg = cpu->registers->pdata[n]; + int sz; + + g_byte_array_set_size(reg->new, 0); + sz = qemu_plugin_read_register(reg->handle, reg->new); + g_assert(sz == reg->last->len); + + if (memcmp(reg->last->data, reg->new->data, sz)) { + GByteArray *temp = reg->last; + g_string_append_printf(cpu->last_exec, ", %s -> 0x", reg->name); + /* TODO: handle BE properly */ + for (int i = sz; i >= 0; i--) { + g_string_append_printf(cpu->last_exec, "%02x", + reg->new->data[i]); + } + reg->last = reg->new; + reg->new = temp; + } + } +} + +/* Log last instruction while checking registers */ +static void vcpu_insn_exec_with_regs(unsigned int cpu_index, void *udata) +{ + CPU *cpu = get_cpu(cpu_index); + + /* Print previous instruction in cache */ + if (cpu->last_exec->len) { + if (cpu->registers) { + insn_check_regs(cpu); + } + + qemu_plugin_outs(cpu->last_exec->str); + qemu_plugin_outs("\n"); + } + + /* Store new instruction in cache */ + /* vcpu_mem will add memory access information to last_exec */ + g_string_printf(cpu->last_exec, "%u, ", cpu_index); + g_string_append(cpu->last_exec, (char *)udata); +} + +/* Log last instruction while checking registers, ignore next */ +static void vcpu_insn_exec_only_regs(unsigned int cpu_index, void *udata) +{ + CPU *cpu = get_cpu(cpu_index); + + /* Print previous instruction in cache */ + if (cpu->last_exec->len) { + if (cpu->registers) { + insn_check_regs(cpu); + } + + qemu_plugin_outs(cpu->last_exec->str); + qemu_plugin_outs("\n"); + } + + /* reset */ + cpu->last_exec->len = 0; +} + +/* Log last instruction without checking regs, setup next */ +static void vcpu_insn_exec(unsigned int cpu_index, void *udata) +{ + CPU *cpu = get_cpu(cpu_index); + + /* Print previous instruction in cache */ + if (cpu->last_exec->len) { + qemu_plugin_outs(cpu->last_exec->str); + qemu_plugin_outs("\n"); + } + + /* Store new instruction in cache */ + /* vcpu_mem will add memory access information to last_exec */ + g_string_printf(cpu->last_exec, "%u, ", cpu_index); + g_string_append(cpu->last_exec, (char *)udata); +} + +/** + * On translation block new translation + * + * QEMU convert code by translation block (TB). By hooking here we can then hook + * a callback on each instruction and memory access. + */ +static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb) +{ + struct qemu_plugin_insn *insn; + bool skip = (imatches || amatches); + bool check_regs_this = rmatches; + bool check_regs_next = false; + + size_t n = qemu_plugin_tb_n_insns(tb); + for (size_t i = 0; i < n; i++) { + char *insn_disas; + uint64_t insn_vaddr; + + /* + * `insn` is shared between translations in QEMU, copy needed data here. + * `output` is never freed as it might be used multiple times during + * the emulation lifetime. + * We only consider the first 32 bits of the instruction, this may be + * a limitation for CISC architectures. + */ + insn = qemu_plugin_tb_get_insn(tb, i); + insn_disas = qemu_plugin_insn_disas(insn); + insn_vaddr = qemu_plugin_insn_vaddr(insn); + + /* + * If we are filtering we better check out if we have any + * hits. The skip "latches" so we can track memory accesses + * after the instruction we care about. Also enable register + * checking on the next instruction. + */ + if (skip && imatches) { + int j; + for (j = 0; j < imatches->len && skip; j++) { + char *m = g_ptr_array_index(imatches, j); + if (g_str_has_prefix(insn_disas, m)) { + skip = false; + check_regs_next = rmatches; + } + } + } + + if (skip && amatches) { + int j; + for (j = 0; j < amatches->len && skip; j++) { + uint64_t v = g_array_index(amatches, uint64_t, j); + if (v == insn_vaddr) { + skip = false; + } + } + } + + /* + * Check the disassembly to see if a register we care about + * will be affected by this instruction. This relies on the + * dissembler doing something sensible for the registers we + * care about. + */ + if (disas_assist && rmatches) { + check_regs_next = false; + gchar *args = g_strstr_len(insn_disas, -1, " "); + for (int n = 0; n < all_reg_names->len; n++) { + gchar *reg = g_ptr_array_index(all_reg_names, n); + if (g_strrstr(args, reg)) { + check_regs_next = true; + skip = false; + } + } + } + + /* + * We now have 3 choices: + * + * - Log insn + * - Log insn while checking registers + * - Don't log this insn but check if last insn changed registers + */ + + if (skip) { + if (check_regs_this) { + qemu_plugin_register_vcpu_insn_exec_cb(insn, + vcpu_insn_exec_only_regs, + QEMU_PLUGIN_CB_R_REGS, + NULL); + } + } else { + uint32_t insn_opcode; + insn_opcode = *((uint32_t *)qemu_plugin_insn_data(insn)); + char *output = g_strdup_printf("0x%"PRIx64", 0x%"PRIx32", \"%s\"", + insn_vaddr, insn_opcode, insn_disas); + + /* Register callback on memory read or write */ + qemu_plugin_register_vcpu_mem_cb(insn, vcpu_mem, + QEMU_PLUGIN_CB_NO_REGS, + QEMU_PLUGIN_MEM_RW, NULL); + + /* Register callback on instruction */ + if (check_regs_this) { + qemu_plugin_register_vcpu_insn_exec_cb( + insn, vcpu_insn_exec_with_regs, + QEMU_PLUGIN_CB_R_REGS, + output); + } else { + qemu_plugin_register_vcpu_insn_exec_cb( + insn, vcpu_insn_exec, + QEMU_PLUGIN_CB_NO_REGS, + output); + } + + /* reset skip */ + skip = (imatches || amatches); + } + + /* set regs for next */ + if (disas_assist && rmatches) { + check_regs_this = check_regs_next; + } + + g_free(insn_disas); + } +} + +static Register *init_vcpu_register(qemu_plugin_reg_descriptor *desc) +{ + Register *reg = g_new0(Register, 1); + g_autofree gchar *lower = g_utf8_strdown(desc->name, -1); + int r; + + reg->handle = desc->handle; + reg->name = g_intern_string(lower); + reg->last = g_byte_array_new(); + reg->new = g_byte_array_new(); + + /* read the initial value */ + r = qemu_plugin_read_register(reg->handle, reg->last); + g_assert(r > 0); + return reg; +} + +/* + * g_pattern_match_string has been deprecated in Glib since 2.70 and + * will complain about it if you try to use it. Fortunately the + * signature of both functions is the same making it easy to work + * around. + */ +static inline +gboolean g_pattern_spec_match_string_qemu(GPatternSpec *pspec, + const gchar *string) +{ +#if GLIB_CHECK_VERSION(2, 70, 0) + return g_pattern_spec_match_string(pspec, string); +#else + return g_pattern_match_string(pspec, string); +#endif +}; +#define g_pattern_spec_match_string(p, s) g_pattern_spec_match_string_qemu(p, s) + +static GPtrArray *registers_init(int vcpu_index) +{ + g_autoptr(GPtrArray) registers = g_ptr_array_new(); + g_autoptr(GArray) reg_list = qemu_plugin_get_registers(); + + if (rmatches && reg_list->len) { + /* + * Go through each register in the complete list and + * see if we want to track it. + */ + for (int r = 0; r < reg_list->len; r++) { + qemu_plugin_reg_descriptor *rd = &g_array_index( + reg_list, qemu_plugin_reg_descriptor, r); + for (int p = 0; p < rmatches->len; p++) { + g_autoptr(GPatternSpec) pat = g_pattern_spec_new(rmatches->pdata[p]); + g_autofree gchar *rd_lower = g_utf8_strdown(rd->name, -1); + if (g_pattern_spec_match_string(pat, rd->name) || + g_pattern_spec_match_string(pat, rd_lower)) { + Register *reg = init_vcpu_register(rd); + g_ptr_array_add(registers, reg); + + /* we need a list of regnames at TB translation time */ + if (disas_assist) { + g_mutex_lock(&add_reg_name_lock); + if (!g_ptr_array_find(all_reg_names, reg->name, NULL)) { + g_ptr_array_add(all_reg_names, (gpointer)reg->name); + } + g_mutex_unlock(&add_reg_name_lock); + } + } + } + } + } + + return registers->len ? g_steal_pointer(®isters) : NULL; +} + +/* + * Initialise a new vcpu/thread with: + * - last_exec tracking data + * - list of tracked registers + * - initial value of registers + * + * As we could have multiple threads trying to do this we need to + * serialise the expansion under a lock. + */ +static void vcpu_init(qemu_plugin_id_t id, unsigned int vcpu_index) +{ + CPU *c; + + g_rw_lock_writer_lock(&expand_array_lock); + if (vcpu_index >= cpus->len) { + g_array_set_size(cpus, vcpu_index + 1); + } + g_rw_lock_writer_unlock(&expand_array_lock); + + c = get_cpu(vcpu_index); + c->last_exec = g_string_new(NULL); + c->registers = registers_init(vcpu_index); +} + +/** + * On plugin exit, print last instruction in cache + */ +static void plugin_exit(qemu_plugin_id_t id, void *p) +{ + guint i; + g_rw_lock_reader_lock(&expand_array_lock); + for (i = 0; i < cpus->len; i++) { + CPU *c = get_cpu(i); + if (c->last_exec && c->last_exec->str) { + qemu_plugin_outs(c->last_exec->str); + qemu_plugin_outs("\n"); + } + } + g_rw_lock_reader_unlock(&expand_array_lock); +} + +/* Add a match to the array of matches */ +static void parse_insn_match(char *match) +{ + if (!imatches) { + imatches = g_ptr_array_new(); + } + g_ptr_array_add(imatches, g_strdup(match)); +} + +static void parse_vaddr_match(char *match) +{ + uint64_t v = g_ascii_strtoull(match, NULL, 16); + + if (!amatches) { + amatches = g_array_new(false, true, sizeof(uint64_t)); + } + g_array_append_val(amatches, v); +} + +/* + * We have to wait until vCPUs are started before we can check the + * patterns find anything. + */ +static void add_regpat(char *regpat) +{ + if (!rmatches) { + rmatches = g_ptr_array_new(); + } + g_ptr_array_add(rmatches, g_strdup(regpat)); +} + +/** + * Install the plugin + */ +QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id, + const qemu_info_t *info, int argc, + char **argv) +{ + /* + * Initialize dynamic array to cache vCPU instruction. In user mode + * we don't know the size before emulation. + */ + cpus = g_array_sized_new(true, true, sizeof(CPU), + info->system_emulation ? info->system.max_vcpus : 1); + + for (int i = 0; i < argc; i++) { + char *opt = argv[i]; + g_auto(GStrv) tokens = g_strsplit(opt, "=", 2); + if (g_strcmp0(tokens[0], "ifilter") == 0) { + parse_insn_match(tokens[1]); + } else if (g_strcmp0(tokens[0], "afilter") == 0) { + parse_vaddr_match(tokens[1]); + } else if (g_strcmp0(tokens[0], "reg") == 0) { + add_regpat(tokens[1]); + } else if (g_strcmp0(tokens[0], "rdisas") == 0) { + if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &disas_assist)) { + fprintf(stderr, "boolean argument parsing failed: %s\n", opt); + return -1; + } + all_reg_names = g_ptr_array_new(); + } else { + fprintf(stderr, "option parsing failed: %s\n", opt); + return -1; + } + } + + /* Register init, translation block and exit callbacks */ + qemu_plugin_register_vcpu_init_cb(id, vcpu_init); + qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans); + qemu_plugin_register_atexit_cb(id, plugin_exit, NULL); + + return 0; +} diff --git a/contrib/plugins/hotblocks.c b/contrib/plugins/hotblocks.c new file mode 100644 index 0000000000..02bc5078bd --- /dev/null +++ b/contrib/plugins/hotblocks.c @@ -0,0 +1,165 @@ +/* + * Copyright (C) 2019, Alex BennĂ©e <alex.bennee@linaro.org> + * + * License: GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#include <inttypes.h> +#include <assert.h> +#include <stdlib.h> +#include <inttypes.h> +#include <string.h> +#include <unistd.h> +#include <stdio.h> +#include <glib.h> + +#include <qemu-plugin.h> + +QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION; + +static bool do_inline; + +/* Plugins need to take care of their own locking */ +static GMutex lock; +static GHashTable *hotblocks; +static guint64 limit = 20; + +/* + * Counting Structure + * + * The internals of the TCG are not exposed to plugins so we can only + * get the starting PC for each block. We cheat this slightly by + * xor'ing the number of instructions to the hash to help + * differentiate. + */ +typedef struct { + uint64_t start_addr; + struct qemu_plugin_scoreboard *exec_count; + int trans_count; + unsigned long insns; +} ExecCount; + +static gint cmp_exec_count(gconstpointer a, gconstpointer b) +{ + ExecCount *ea = (ExecCount *) a; + ExecCount *eb = (ExecCount *) b; + uint64_t count_a = + qemu_plugin_u64_sum(qemu_plugin_scoreboard_u64(ea->exec_count)); + uint64_t count_b = + qemu_plugin_u64_sum(qemu_plugin_scoreboard_u64(eb->exec_count)); + return count_a > count_b ? -1 : 1; +} + +static void exec_count_free(gpointer key, gpointer value, gpointer user_data) +{ + ExecCount *cnt = value; + qemu_plugin_scoreboard_free(cnt->exec_count); +} + +static void plugin_exit(qemu_plugin_id_t id, void *p) +{ + g_autoptr(GString) report = g_string_new("collected "); + GList *counts, *it; + int i; + + g_string_append_printf(report, "%d entries in the hash table\n", + g_hash_table_size(hotblocks)); + counts = g_hash_table_get_values(hotblocks); + it = g_list_sort(counts, cmp_exec_count); + + if (it) { + g_string_append_printf(report, "pc, tcount, icount, ecount\n"); + + for (i = 0; i < limit && it->next; i++, it = it->next) { + ExecCount *rec = (ExecCount *) it->data; + g_string_append_printf( + report, "0x%016"PRIx64", %d, %ld, %"PRId64"\n", + rec->start_addr, rec->trans_count, + rec->insns, + qemu_plugin_u64_sum( + qemu_plugin_scoreboard_u64(rec->exec_count))); + } + + g_list_free(it); + } + + qemu_plugin_outs(report->str); + + g_hash_table_foreach(hotblocks, exec_count_free, NULL); + g_hash_table_destroy(hotblocks); +} + +static void plugin_init(void) +{ + hotblocks = g_hash_table_new(NULL, g_direct_equal); +} + +static void vcpu_tb_exec(unsigned int cpu_index, void *udata) +{ + ExecCount *cnt = (ExecCount *)udata; + qemu_plugin_u64_add(qemu_plugin_scoreboard_u64(cnt->exec_count), + cpu_index, 1); +} + +/* + * When do_inline we ask the plugin to increment the counter for us. + * Otherwise a helper is inserted which calls the vcpu_tb_exec + * callback. + */ +static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb) +{ + ExecCount *cnt; + uint64_t pc = qemu_plugin_tb_vaddr(tb); + size_t insns = qemu_plugin_tb_n_insns(tb); + uint64_t hash = pc ^ insns; + + g_mutex_lock(&lock); + cnt = (ExecCount *) g_hash_table_lookup(hotblocks, (gconstpointer) hash); + if (cnt) { + cnt->trans_count++; + } else { + cnt = g_new0(ExecCount, 1); + cnt->start_addr = pc; + cnt->trans_count = 1; + cnt->insns = insns; + cnt->exec_count = qemu_plugin_scoreboard_new(sizeof(uint64_t)); + g_hash_table_insert(hotblocks, (gpointer) hash, (gpointer) cnt); + } + + g_mutex_unlock(&lock); + + if (do_inline) { + qemu_plugin_register_vcpu_tb_exec_inline_per_vcpu( + tb, QEMU_PLUGIN_INLINE_ADD_U64, + qemu_plugin_scoreboard_u64(cnt->exec_count), 1); + } else { + qemu_plugin_register_vcpu_tb_exec_cb(tb, vcpu_tb_exec, + QEMU_PLUGIN_CB_NO_REGS, + (void *)cnt); + } +} + +QEMU_PLUGIN_EXPORT +int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info, + int argc, char **argv) +{ + for (int i = 0; i < argc; i++) { + char *opt = argv[i]; + g_auto(GStrv) tokens = g_strsplit(opt, "=", 2); + if (g_strcmp0(tokens[0], "inline") == 0) { + if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &do_inline)) { + fprintf(stderr, "boolean argument parsing failed: %s\n", opt); + return -1; + } + } else { + fprintf(stderr, "option parsing failed: %s\n", opt); + return -1; + } + } + + plugin_init(); + + qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans); + qemu_plugin_register_atexit_cb(id, plugin_exit, NULL); + return 0; +} diff --git a/contrib/plugins/hotpages.c b/contrib/plugins/hotpages.c new file mode 100644 index 0000000000..8316ae50c7 --- /dev/null +++ b/contrib/plugins/hotpages.c @@ -0,0 +1,203 @@ +/* + * Copyright (C) 2019, Alex BennĂ©e <alex.bennee@linaro.org> + * + * Hot Pages - show which pages saw the most memory accesses. + * + * License: GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include <inttypes.h> +#include <assert.h> +#include <stdlib.h> +#include <inttypes.h> +#include <string.h> +#include <unistd.h> +#include <stdio.h> +#include <glib.h> + +#include <qemu-plugin.h> + +QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION; + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +static uint64_t page_size = 4096; +static uint64_t page_mask; +static int limit = 50; +static enum qemu_plugin_mem_rw rw = QEMU_PLUGIN_MEM_RW; +static bool track_io; + +enum sort_type { + SORT_RW = 0, + SORT_R, + SORT_W, + SORT_A +}; + +static int sort_by = SORT_RW; + +typedef struct { + uint64_t page_address; + int cpu_read; + int cpu_write; + uint64_t reads; + uint64_t writes; +} PageCounters; + +static GMutex lock; +static GHashTable *pages; + +static gint cmp_access_count(gconstpointer a, gconstpointer b) +{ + PageCounters *ea = (PageCounters *) a; + PageCounters *eb = (PageCounters *) b; + int r; + switch (sort_by) { + case SORT_RW: + r = (ea->reads + ea->writes) > (eb->reads + eb->writes) ? -1 : 1; + break; + case SORT_R: + r = ea->reads > eb->reads ? -1 : 1; + break; + case SORT_W: + r = ea->writes > eb->writes ? -1 : 1; + break; + case SORT_A: + r = ea->page_address > eb->page_address ? -1 : 1; + break; + default: + g_assert_not_reached(); + } + return r; +} + + +static void plugin_exit(qemu_plugin_id_t id, void *p) +{ + g_autoptr(GString) report = g_string_new("Addr, RCPUs, Reads, WCPUs, Writes\n"); + int i; + GList *counts; + + counts = g_hash_table_get_values(pages); + if (counts && g_list_next(counts)) { + GList *it; + + it = g_list_sort(counts, cmp_access_count); + + for (i = 0; i < limit && it->next; i++, it = it->next) { + PageCounters *rec = (PageCounters *) it->data; + g_string_append_printf(report, + "0x%016"PRIx64", 0x%04x, %"PRId64 + ", 0x%04x, %"PRId64"\n", + rec->page_address, + rec->cpu_read, rec->reads, + rec->cpu_write, rec->writes); + } + g_list_free(it); + } + + qemu_plugin_outs(report->str); +} + +static void plugin_init(void) +{ + page_mask = (page_size - 1); + pages = g_hash_table_new(NULL, g_direct_equal); +} + +static void vcpu_haddr(unsigned int cpu_index, qemu_plugin_meminfo_t meminfo, + uint64_t vaddr, void *udata) +{ + struct qemu_plugin_hwaddr *hwaddr = qemu_plugin_get_hwaddr(meminfo, vaddr); + uint64_t page; + PageCounters *count; + + /* We only get a hwaddr for system emulation */ + if (track_io) { + if (hwaddr && qemu_plugin_hwaddr_is_io(hwaddr)) { + page = vaddr; + } else { + return; + } + } else { + if (hwaddr && !qemu_plugin_hwaddr_is_io(hwaddr)) { + page = (uint64_t) qemu_plugin_hwaddr_phys_addr(hwaddr); + } else { + page = vaddr; + } + } + page &= ~page_mask; + + g_mutex_lock(&lock); + count = (PageCounters *) g_hash_table_lookup(pages, GUINT_TO_POINTER(page)); + + if (!count) { + count = g_new0(PageCounters, 1); + count->page_address = page; + g_hash_table_insert(pages, GUINT_TO_POINTER(page), (gpointer) count); + } + if (qemu_plugin_mem_is_store(meminfo)) { + count->writes++; + count->cpu_write |= (1 << cpu_index); + } else { + count->reads++; + count->cpu_read |= (1 << cpu_index); + } + + g_mutex_unlock(&lock); +} + +static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb) +{ + size_t n = qemu_plugin_tb_n_insns(tb); + size_t i; + + for (i = 0; i < n; i++) { + struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i); + qemu_plugin_register_vcpu_mem_cb(insn, vcpu_haddr, + QEMU_PLUGIN_CB_NO_REGS, + rw, NULL); + } +} + +QEMU_PLUGIN_EXPORT +int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info, + int argc, char **argv) +{ + int i; + + for (i = 0; i < argc; i++) { + char *opt = argv[i]; + g_auto(GStrv) tokens = g_strsplit(opt, "=", -1); + + if (g_strcmp0(tokens[0], "sortby") == 0) { + if (g_strcmp0(tokens[1], "reads") == 0) { + sort_by = SORT_R; + } else if (g_strcmp0(tokens[1], "writes") == 0) { + sort_by = SORT_W; + } else if (g_strcmp0(tokens[1], "address") == 0) { + sort_by = SORT_A; + } else { + fprintf(stderr, "invalid value to sortby: %s\n", tokens[1]); + return -1; + } + } else if (g_strcmp0(tokens[0], "io") == 0) { + if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &track_io)) { + fprintf(stderr, "boolean argument parsing failed: %s\n", opt); + return -1; + } + } else if (g_strcmp0(tokens[0], "pagesize") == 0) { + page_size = g_ascii_strtoull(tokens[1], NULL, 10); + } else { + fprintf(stderr, "option parsing failed: %s\n", opt); + return -1; + } + } + + plugin_init(); + + qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans); + qemu_plugin_register_atexit_cb(id, plugin_exit, NULL); + return 0; +} diff --git a/contrib/plugins/howvec.c b/contrib/plugins/howvec.c new file mode 100644 index 0000000000..94bbc53820 --- /dev/null +++ b/contrib/plugins/howvec.c @@ -0,0 +1,397 @@ +/* + * Copyright (C) 2019, Alex BennĂ©e <alex.bennee@linaro.org> + * + * How vectorised is this code? + * + * Attempt to measure the amount of vectorisation that has been done + * on some code by counting classes of instruction. + * + * License: GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#include <inttypes.h> +#include <assert.h> +#include <stdlib.h> +#include <inttypes.h> +#include <string.h> +#include <unistd.h> +#include <stdio.h> +#include <glib.h> + +#include <qemu-plugin.h> + +QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION; + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +typedef enum { + COUNT_CLASS, + COUNT_INDIVIDUAL, + COUNT_NONE +} CountType; + +static int limit = 50; +static bool do_inline; +static bool verbose; + +static GMutex lock; +static GHashTable *insns; + +typedef struct { + const char *class; + const char *opt; + uint32_t mask; + uint32_t pattern; + CountType what; + qemu_plugin_u64 count; +} InsnClassExecCount; + +typedef struct { + char *insn; + uint32_t opcode; + qemu_plugin_u64 count; + InsnClassExecCount *class; +} InsnExecCount; + +/* + * Matchers for classes of instructions, order is important. + * + * Your most precise match must be before looser matches. If no match + * is found in the table we can create an individual entry. + * + * 31..28 27..24 23..20 19..16 15..12 11..8 7..4 3..0 + */ +static InsnClassExecCount aarch64_insn_classes[] = { + /* "Reserved"" */ + { " UDEF", "udef", 0xffff0000, 0x00000000, COUNT_NONE}, + { " SVE", "sve", 0x1e000000, 0x04000000, COUNT_CLASS}, + { "Reserved", "res", 0x1e000000, 0x00000000, COUNT_CLASS}, + /* Data Processing Immediate */ + { " PCrel addr", "pcrel", 0x1f000000, 0x10000000, COUNT_CLASS}, + { " Add/Sub (imm,tags)", "asit", 0x1f800000, 0x11800000, COUNT_CLASS}, + { " Add/Sub (imm)", "asi", 0x1f000000, 0x11000000, COUNT_CLASS}, + { " Logical (imm)", "logi", 0x1f800000, 0x12000000, COUNT_CLASS}, + { " Move Wide (imm)", "movwi", 0x1f800000, 0x12800000, COUNT_CLASS}, + { " Bitfield", "bitf", 0x1f800000, 0x13000000, COUNT_CLASS}, + { " Extract", "extr", 0x1f800000, 0x13800000, COUNT_CLASS}, + { "Data Proc Imm", "dpri", 0x1c000000, 0x10000000, COUNT_CLASS}, + /* Branches */ + { " Cond Branch (imm)", "cndb", 0xfe000000, 0x54000000, COUNT_CLASS}, + { " Exception Gen", "excp", 0xff000000, 0xd4000000, COUNT_CLASS}, + { " NOP", "nop", 0xffffffff, 0xd503201f, COUNT_NONE}, + { " Hints", "hint", 0xfffff000, 0xd5032000, COUNT_CLASS}, + { " Barriers", "barr", 0xfffff000, 0xd5033000, COUNT_CLASS}, + { " PSTATE", "psta", 0xfff8f000, 0xd5004000, COUNT_CLASS}, + { " System Insn", "sins", 0xffd80000, 0xd5080000, COUNT_CLASS}, + { " System Reg", "sreg", 0xffd00000, 0xd5100000, COUNT_CLASS}, + { " Branch (reg)", "breg", 0xfe000000, 0xd6000000, COUNT_CLASS}, + { " Branch (imm)", "bimm", 0x7c000000, 0x14000000, COUNT_CLASS}, + { " Cmp & Branch", "cmpb", 0x7e000000, 0x34000000, COUNT_CLASS}, + { " Tst & Branch", "tstb", 0x7e000000, 0x36000000, COUNT_CLASS}, + { "Branches", "branch", 0x1c000000, 0x14000000, COUNT_CLASS}, + /* Loads and Stores */ + { " AdvSimd ldstmult", "advlsm", 0xbfbf0000, 0x0c000000, COUNT_CLASS}, + { " AdvSimd ldstmult++", "advlsmp", 0xbfb00000, 0x0c800000, COUNT_CLASS}, + { " AdvSimd ldst", "advlss", 0xbf9f0000, 0x0d000000, COUNT_CLASS}, + { " AdvSimd ldst++", "advlssp", 0xbf800000, 0x0d800000, COUNT_CLASS}, + { " ldst excl", "ldstx", 0x3f000000, 0x08000000, COUNT_CLASS}, + { " Prefetch", "prfm", 0xff000000, 0xd8000000, COUNT_CLASS}, + { " Load Reg (lit)", "ldlit", 0x1b000000, 0x18000000, COUNT_CLASS}, + { " ldst noalloc pair", "ldstnap", 0x3b800000, 0x28000000, COUNT_CLASS}, + { " ldst pair", "ldstp", 0x38000000, 0x28000000, COUNT_CLASS}, + { " ldst reg", "ldstr", 0x3b200000, 0x38000000, COUNT_CLASS}, + { " Atomic ldst", "atomic", 0x3b200c00, 0x38200000, COUNT_CLASS}, + { " ldst reg (reg off)", "ldstro", 0x3b200b00, 0x38200800, COUNT_CLASS}, + { " ldst reg (pac)", "ldstpa", 0x3b200200, 0x38200800, COUNT_CLASS}, + { " ldst reg (imm)", "ldsti", 0x3b000000, 0x39000000, COUNT_CLASS}, + { "Loads & Stores", "ldst", 0x0a000000, 0x08000000, COUNT_CLASS}, + /* Data Processing Register */ + { "Data Proc Reg", "dprr", 0x0e000000, 0x0a000000, COUNT_CLASS}, + /* Scalar FP */ + { "Scalar FP ", "fpsimd", 0x0e000000, 0x0e000000, COUNT_CLASS}, + /* Unclassified */ + { "Unclassified", "unclas", 0x00000000, 0x00000000, COUNT_CLASS}, +}; + +static InsnClassExecCount sparc32_insn_classes[] = { + { "Call", "call", 0xc0000000, 0x40000000, COUNT_CLASS}, + { "Branch ICond", "bcc", 0xc1c00000, 0x00800000, COUNT_CLASS}, + { "Branch Fcond", "fbcc", 0xc1c00000, 0x01800000, COUNT_CLASS}, + { "SetHi", "sethi", 0xc1c00000, 0x01000000, COUNT_CLASS}, + { "FPU ALU", "fpu", 0xc1f00000, 0x81a00000, COUNT_CLASS}, + { "ALU", "alu", 0xc0000000, 0x80000000, COUNT_CLASS}, + { "Load/Store", "ldst", 0xc0000000, 0xc0000000, COUNT_CLASS}, + /* Unclassified */ + { "Unclassified", "unclas", 0x00000000, 0x00000000, COUNT_INDIVIDUAL}, +}; + +static InsnClassExecCount sparc64_insn_classes[] = { + { "SetHi & Branches", "op0", 0xc0000000, 0x00000000, COUNT_CLASS}, + { "Call", "op1", 0xc0000000, 0x40000000, COUNT_CLASS}, + { "Arith/Logical/Move", "op2", 0xc0000000, 0x80000000, COUNT_CLASS}, + { "Arith/Logical/Move", "op3", 0xc0000000, 0xc0000000, COUNT_CLASS}, + /* Unclassified */ + { "Unclassified", "unclas", 0x00000000, 0x00000000, COUNT_INDIVIDUAL}, +}; + +/* Default matcher for currently unclassified architectures */ +static InsnClassExecCount default_insn_classes[] = { + { "Unclassified", "unclas", 0x00000000, 0x00000000, COUNT_INDIVIDUAL}, +}; + +typedef struct { + const char *qemu_target; + InsnClassExecCount *table; + int table_sz; +} ClassSelector; + +static ClassSelector class_tables[] = { + { "aarch64", aarch64_insn_classes, ARRAY_SIZE(aarch64_insn_classes) }, + { "sparc", sparc32_insn_classes, ARRAY_SIZE(sparc32_insn_classes) }, + { "sparc64", sparc64_insn_classes, ARRAY_SIZE(sparc64_insn_classes) }, + { NULL, default_insn_classes, ARRAY_SIZE(default_insn_classes) }, +}; + +static InsnClassExecCount *class_table; +static int class_table_sz; + +static gint cmp_exec_count(gconstpointer a, gconstpointer b) +{ + InsnExecCount *ea = (InsnExecCount *) a; + InsnExecCount *eb = (InsnExecCount *) b; + uint64_t count_a = qemu_plugin_u64_sum(ea->count); + uint64_t count_b = qemu_plugin_u64_sum(eb->count); + return count_a > count_b ? -1 : 1; +} + +static void free_record(gpointer data) +{ + InsnExecCount *rec = (InsnExecCount *) data; + qemu_plugin_scoreboard_free(rec->count.score); + g_free(rec->insn); + g_free(rec); +} + +static void plugin_exit(qemu_plugin_id_t id, void *p) +{ + g_autoptr(GString) report = g_string_new("Instruction Classes:\n"); + int i; + uint64_t total_count; + GList *counts; + InsnClassExecCount *class = NULL; + + for (i = 0; i < class_table_sz; i++) { + class = &class_table[i]; + switch (class->what) { + case COUNT_CLASS: + total_count = qemu_plugin_u64_sum(class->count); + if (total_count || verbose) { + g_string_append_printf(report, + "Class: %-24s\t(%" PRId64 " hits)\n", + class->class, + total_count); + } + break; + case COUNT_INDIVIDUAL: + g_string_append_printf(report, "Class: %-24s\tcounted individually\n", + class->class); + break; + case COUNT_NONE: + g_string_append_printf(report, "Class: %-24s\tnot counted\n", + class->class); + break; + default: + break; + } + } + + counts = g_hash_table_get_values(insns); + if (counts && g_list_next(counts)) { + g_string_append_printf(report, "Individual Instructions:\n"); + counts = g_list_sort(counts, cmp_exec_count); + + for (i = 0; i < limit && g_list_next(counts); + i++, counts = g_list_next(counts)) { + InsnExecCount *rec = (InsnExecCount *) counts->data; + g_string_append_printf(report, + "Instr: %-24s\t(%" PRId64 " hits)" + "\t(op=0x%08x/%s)\n", + rec->insn, + qemu_plugin_u64_sum(rec->count), + rec->opcode, + rec->class ? + rec->class->class : "un-categorised"); + } + g_list_free(counts); + } + + g_hash_table_destroy(insns); + for (i = 0; i < ARRAY_SIZE(class_tables); i++) { + for (int j = 0; j < class_tables[i].table_sz; ++j) { + qemu_plugin_scoreboard_free(class_tables[i].table[j].count.score); + } + } + + + qemu_plugin_outs(report->str); +} + +static void plugin_init(void) +{ + insns = g_hash_table_new_full(NULL, g_direct_equal, NULL, &free_record); +} + +static void vcpu_insn_exec_before(unsigned int cpu_index, void *udata) +{ + struct qemu_plugin_scoreboard *score = udata; + qemu_plugin_u64_add(qemu_plugin_scoreboard_u64(score), cpu_index, 1); +} + +static struct qemu_plugin_scoreboard *find_counter( + struct qemu_plugin_insn *insn) +{ + int i; + uint64_t *cnt = NULL; + uint32_t opcode; + InsnClassExecCount *class = NULL; + + /* + * We only match the first 32 bits of the instruction which is + * fine for most RISCs but a bit limiting for CISC architectures. + * They would probably benefit from a more tailored plugin. + * However we can fall back to individual instruction counting. + */ + opcode = *((uint32_t *)qemu_plugin_insn_data(insn)); + + for (i = 0; !cnt && i < class_table_sz; i++) { + class = &class_table[i]; + uint32_t masked_bits = opcode & class->mask; + if (masked_bits == class->pattern) { + break; + } + } + + g_assert(class); + + switch (class->what) { + case COUNT_NONE: + return NULL; + case COUNT_CLASS: + return class->count.score; + case COUNT_INDIVIDUAL: + { + InsnExecCount *icount; + + g_mutex_lock(&lock); + icount = (InsnExecCount *) g_hash_table_lookup(insns, + GUINT_TO_POINTER(opcode)); + + if (!icount) { + icount = g_new0(InsnExecCount, 1); + icount->opcode = opcode; + icount->insn = qemu_plugin_insn_disas(insn); + icount->class = class; + struct qemu_plugin_scoreboard *score = + qemu_plugin_scoreboard_new(sizeof(uint64_t)); + icount->count = qemu_plugin_scoreboard_u64(score); + + g_hash_table_insert(insns, GUINT_TO_POINTER(opcode), + (gpointer) icount); + } + g_mutex_unlock(&lock); + + return icount->count.score; + } + default: + g_assert_not_reached(); + } + + return NULL; +} + +static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb) +{ + size_t n = qemu_plugin_tb_n_insns(tb); + size_t i; + + for (i = 0; i < n; i++) { + struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i); + struct qemu_plugin_scoreboard *cnt = find_counter(insn); + + if (cnt) { + if (do_inline) { + qemu_plugin_register_vcpu_insn_exec_inline_per_vcpu( + insn, QEMU_PLUGIN_INLINE_ADD_U64, + qemu_plugin_scoreboard_u64(cnt), 1); + } else { + qemu_plugin_register_vcpu_insn_exec_cb( + insn, vcpu_insn_exec_before, QEMU_PLUGIN_CB_NO_REGS, cnt); + } + } + } +} + +QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id, + const qemu_info_t *info, + int argc, char **argv) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(class_tables); i++) { + for (int j = 0; j < class_tables[i].table_sz; ++j) { + struct qemu_plugin_scoreboard *score = + qemu_plugin_scoreboard_new(sizeof(uint64_t)); + class_tables[i].table[j].count = qemu_plugin_scoreboard_u64(score); + } + } + + /* Select a class table appropriate to the guest architecture */ + for (i = 0; i < ARRAY_SIZE(class_tables); i++) { + ClassSelector *entry = &class_tables[i]; + if (!entry->qemu_target || + strcmp(entry->qemu_target, info->target_name) == 0) { + class_table = entry->table; + class_table_sz = entry->table_sz; + break; + } + } + + for (i = 0; i < argc; i++) { + char *p = argv[i]; + g_auto(GStrv) tokens = g_strsplit(p, "=", -1); + if (g_strcmp0(tokens[0], "inline") == 0) { + if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &do_inline)) { + fprintf(stderr, "boolean argument parsing failed: %s\n", p); + return -1; + } + } else if (g_strcmp0(tokens[0], "verbose") == 0) { + if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &verbose)) { + fprintf(stderr, "boolean argument parsing failed: %s\n", p); + return -1; + } + } else if (g_strcmp0(tokens[0], "count") == 0) { + char *value = tokens[1]; + int j; + CountType type = COUNT_INDIVIDUAL; + if (*value == '!') { + type = COUNT_NONE; + value++; + } + for (j = 0; j < class_table_sz; j++) { + if (strcmp(value, class_table[j].opt) == 0) { + class_table[j].what = type; + break; + } + } + } else { + fprintf(stderr, "option parsing failed: %s\n", p); + return -1; + } + } + + plugin_init(); + + qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans); + qemu_plugin_register_atexit_cb(id, plugin_exit, NULL); + return 0; +} diff --git a/contrib/plugins/hwprofile.c b/contrib/plugins/hwprofile.c new file mode 100644 index 0000000000..739ac0c66b --- /dev/null +++ b/contrib/plugins/hwprofile.c @@ -0,0 +1,320 @@ +/* + * Copyright (C) 2020, Alex BennĂ©e <alex.bennee@linaro.org> + * + * HW Profile - breakdown access patterns for IO to devices + * + * License: GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include <inttypes.h> +#include <assert.h> +#include <stdlib.h> +#include <inttypes.h> +#include <string.h> +#include <unistd.h> +#include <stdio.h> +#include <glib.h> + +#include <qemu-plugin.h> + +QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION; + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +typedef struct { + uint64_t cpu_read; + uint64_t cpu_write; + uint64_t reads; + uint64_t writes; +} IOCounts; + +typedef struct { + uint64_t off_or_pc; + IOCounts counts; +} IOLocationCounts; + +typedef struct { + const char *name; + uint64_t base; + IOCounts totals; + GHashTable *detail; +} DeviceCounts; + +static GMutex lock; +static GHashTable *devices; + +/* track the access pattern to a piece of HW */ +static bool pattern; +/* track the source address of access to HW */ +static bool source; +/* track only matched regions of HW */ +static bool check_match; +static gchar **matches; + +static enum qemu_plugin_mem_rw rw = QEMU_PLUGIN_MEM_RW; + +static inline bool track_reads(void) +{ + return rw == QEMU_PLUGIN_MEM_RW || rw == QEMU_PLUGIN_MEM_R; +} + +static inline bool track_writes(void) +{ + return rw == QEMU_PLUGIN_MEM_RW || rw == QEMU_PLUGIN_MEM_W; +} + +static void plugin_init(void) +{ + devices = g_hash_table_new(NULL, NULL); +} + +static gint sort_cmp(gconstpointer a, gconstpointer b) +{ + DeviceCounts *ea = (DeviceCounts *) a; + DeviceCounts *eb = (DeviceCounts *) b; + return ea->totals.reads + ea->totals.writes > + eb->totals.reads + eb->totals.writes ? -1 : 1; +} + +static gint sort_loc(gconstpointer a, gconstpointer b) +{ + IOLocationCounts *ea = (IOLocationCounts *) a; + IOLocationCounts *eb = (IOLocationCounts *) b; + return ea->off_or_pc > eb->off_or_pc; +} + +static void fmt_iocount_record(GString *s, IOCounts *rec) +{ + if (track_reads()) { + g_string_append_printf(s, ", %"PRIx64", %"PRId64, + rec->cpu_read, rec->reads); + } + if (track_writes()) { + g_string_append_printf(s, ", %"PRIx64", %"PRId64, + rec->cpu_write, rec->writes); + } +} + +static void fmt_dev_record(GString *s, DeviceCounts *rec) +{ + g_string_append_printf(s, "%s, 0x%"PRIx64, + rec->name, rec->base); + fmt_iocount_record(s, &rec->totals); + g_string_append_c(s, '\n'); +} + +static void plugin_exit(qemu_plugin_id_t id, void *p) +{ + g_autoptr(GString) report = g_string_new(""); + GList *counts; + + if (!(pattern || source)) { + g_string_printf(report, "Device, Address"); + if (track_reads()) { + g_string_append_printf(report, ", RCPUs, Reads"); + } + if (track_writes()) { + g_string_append_printf(report, ", WCPUs, Writes"); + } + g_string_append_c(report, '\n'); + } + + counts = g_hash_table_get_values(devices); + if (counts && g_list_next(counts)) { + GList *it; + + it = g_list_sort(counts, sort_cmp); + + while (it) { + DeviceCounts *rec = (DeviceCounts *) it->data; + if (rec->detail) { + GList *accesses = g_hash_table_get_values(rec->detail); + GList *io_it = g_list_sort(accesses, sort_loc); + const char *prefix = pattern ? "off" : "pc"; + g_string_append_printf(report, "%s @ 0x%"PRIx64"\n", + rec->name, rec->base); + while (io_it) { + IOLocationCounts *loc = (IOLocationCounts *) io_it->data; + g_string_append_printf(report, " %s:%08"PRIx64, + prefix, loc->off_or_pc); + fmt_iocount_record(report, &loc->counts); + g_string_append_c(report, '\n'); + io_it = io_it->next; + } + } else { + fmt_dev_record(report, rec); + } + it = it->next; + }; + g_list_free(it); + } + + qemu_plugin_outs(report->str); +} + +static DeviceCounts *new_count(const char *name, uint64_t base) +{ + DeviceCounts *count = g_new0(DeviceCounts, 1); + count->name = name; + count->base = base; + if (pattern || source) { + count->detail = g_hash_table_new(NULL, NULL); + } + g_hash_table_insert(devices, (gpointer) name, count); + return count; +} + +static IOLocationCounts *new_location(GHashTable *table, uint64_t off_or_pc) +{ + IOLocationCounts *loc = g_new0(IOLocationCounts, 1); + loc->off_or_pc = off_or_pc; + g_hash_table_insert(table, (gpointer) off_or_pc, loc); + return loc; +} + +static void hwprofile_match_hit(DeviceCounts *rec, uint64_t off) +{ + g_autoptr(GString) report = g_string_new("hwprofile: match @ offset"); + g_string_append_printf(report, "%"PRIx64", previous hits\n", off); + fmt_dev_record(report, rec); + qemu_plugin_outs(report->str); +} + +static void inc_count(IOCounts *count, bool is_write, unsigned int cpu_index) +{ + if (is_write) { + count->writes++; + count->cpu_write |= (1 << cpu_index); + } else { + count->reads++; + count->cpu_read |= (1 << cpu_index); + } +} + +static void vcpu_haddr(unsigned int cpu_index, qemu_plugin_meminfo_t meminfo, + uint64_t vaddr, void *udata) +{ + struct qemu_plugin_hwaddr *hwaddr = qemu_plugin_get_hwaddr(meminfo, vaddr); + + if (!hwaddr || !qemu_plugin_hwaddr_is_io(hwaddr)) { + return; + } else { + const char *name = qemu_plugin_hwaddr_device_name(hwaddr); + uint64_t off = qemu_plugin_hwaddr_phys_addr(hwaddr); + bool is_write = qemu_plugin_mem_is_store(meminfo); + DeviceCounts *counts; + + g_mutex_lock(&lock); + counts = (DeviceCounts *) g_hash_table_lookup(devices, name); + + if (!counts) { + uint64_t base = vaddr - off; + counts = new_count(name, base); + } + + if (check_match) { + if (g_strv_contains((const char * const *)matches, counts->name)) { + hwprofile_match_hit(counts, off); + inc_count(&counts->totals, is_write, cpu_index); + } + } else { + inc_count(&counts->totals, is_write, cpu_index); + } + + /* either track offsets or source of access */ + if (source) { + off = (uint64_t) udata; + } + + if (pattern || source) { + IOLocationCounts *io_count = g_hash_table_lookup(counts->detail, + (gpointer) off); + if (!io_count) { + io_count = new_location(counts->detail, off); + } + inc_count(&io_count->counts, is_write, cpu_index); + } + + g_mutex_unlock(&lock); + } +} + +static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb) +{ + size_t n = qemu_plugin_tb_n_insns(tb); + size_t i; + + for (i = 0; i < n; i++) { + struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i); + gpointer udata = (gpointer) (source ? qemu_plugin_insn_vaddr(insn) : 0); + qemu_plugin_register_vcpu_mem_cb(insn, vcpu_haddr, + QEMU_PLUGIN_CB_NO_REGS, + rw, udata); + } +} + +QEMU_PLUGIN_EXPORT +int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info, + int argc, char **argv) +{ + int i; + g_autoptr(GString) matches_raw = g_string_new(""); + + for (i = 0; i < argc; i++) { + char *opt = argv[i]; + g_auto(GStrv) tokens = g_strsplit(opt, "=", 2); + + if (g_strcmp0(tokens[0], "track") == 0) { + if (g_strcmp0(tokens[1], "read") == 0) { + rw = QEMU_PLUGIN_MEM_R; + } else if (g_strcmp0(tokens[1], "write") == 0) { + rw = QEMU_PLUGIN_MEM_W; + } else { + fprintf(stderr, "invalid value for track: %s\n", tokens[1]); + return -1; + } + } else if (g_strcmp0(tokens[0], "pattern") == 0) { + if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &pattern)) { + fprintf(stderr, "boolean argument parsing failed: %s\n", opt); + return -1; + } + } else if (g_strcmp0(tokens[0], "source") == 0) { + if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &source)) { + fprintf(stderr, "boolean argument parsing failed: %s\n", opt); + return -1; + } + } else if (g_strcmp0(tokens[0], "match") == 0) { + check_match = true; + g_string_append_printf(matches_raw, "%s,", tokens[1]); + } else { + fprintf(stderr, "option parsing failed: %s\n", opt); + return -1; + } + } + if (check_match) { + matches = g_strsplit(matches_raw->str, ",", -1); + } + + if (source && pattern) { + fprintf(stderr, "can only currently track either source or pattern.\n"); + return -1; + } + + if (!info->system_emulation) { + fprintf(stderr, "hwprofile: plugin only useful for system emulation\n"); + return -1; + } + + /* Just warn about overflow */ + if (info->system.smp_vcpus > 64 || + info->system.max_vcpus > 64) { + fprintf(stderr, "hwprofile: can only track up to 64 CPUs\n"); + } + + plugin_init(); + + qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans); + qemu_plugin_register_atexit_cb(id, plugin_exit, NULL); + return 0; +} diff --git a/contrib/plugins/lockstep.c b/contrib/plugins/lockstep.c new file mode 100644 index 0000000000..237543b43a --- /dev/null +++ b/contrib/plugins/lockstep.c @@ -0,0 +1,372 @@ +/* + * Lockstep Execution Plugin + * + * Allows you to execute two QEMU instances in lockstep and report + * when their execution diverges. This is mainly useful for developers + * who want to see where a change to TCG code generation has + * introduced a subtle and hard to find bug. + * + * Caveats: + * - single-threaded linux-user apps only with non-deterministic syscalls + * - no MTTCG enabled system emulation (icount may help) + * + * While icount makes things more deterministic it doesn't mean a + * particular run may execute the exact same sequence of blocks. An + * asynchronous event (for example X11 graphics update) may cause a + * block to end early and a new partial block to start. This means + * serial only test cases are a better bet. -d nochain may also help. + * + * This code is not thread safe! + * + * Copyright (c) 2020 Linaro Ltd + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include <glib.h> +#include <inttypes.h> +#include <unistd.h> +#include <sys/socket.h> +#include <sys/un.h> +#include <stdio.h> +#include <errno.h> + +#include <qemu-plugin.h> + +QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION; + +/* saved so we can uninstall later */ +static qemu_plugin_id_t our_id; + +static unsigned long bb_count; +static unsigned long insn_count; + +/* Information about a translated block */ +typedef struct { + uint64_t pc; + uint64_t insns; +} BlockInfo; + +/* Information about an execution state in the log */ +typedef struct { + BlockInfo *block; + unsigned long insn_count; + unsigned long block_count; +} ExecInfo; + +/* The execution state we compare */ +typedef struct { + uint64_t pc; + unsigned long insn_count; +} ExecState; + +typedef struct { + GSList *log_pos; + int distance; +} DivergeState; + +/* list of translated block info */ +static GSList *blocks; + +/* execution log and points of divergence */ +static GSList *log, *divergence_log; + +static int socket_fd; +static char *path_to_unlink; + +static bool verbose; + +static void plugin_cleanup(qemu_plugin_id_t id) +{ + /* Free our block data */ + g_slist_free_full(blocks, &g_free); + g_slist_free_full(log, &g_free); + g_slist_free(divergence_log); + + close(socket_fd); + if (path_to_unlink) { + unlink(path_to_unlink); + } +} + +static void plugin_exit(qemu_plugin_id_t id, void *p) +{ + g_autoptr(GString) out = g_string_new("No divergence :-)\n"); + g_string_append_printf(out, "Executed %ld/%d blocks\n", + bb_count, g_slist_length(log)); + g_string_append_printf(out, "Executed ~%ld instructions\n", insn_count); + qemu_plugin_outs(out->str); + + plugin_cleanup(id); +} + +static void report_divergance(ExecState *us, ExecState *them) +{ + DivergeState divrec = { log, 0 }; + g_autoptr(GString) out = g_string_new(""); + bool diverged = false; + + /* + * If we have diverged before did we get back on track or are we + * totally losing it? + */ + if (divergence_log) { + DivergeState *last = (DivergeState *) divergence_log->data; + GSList *entry; + + for (entry = log; g_slist_next(entry); entry = g_slist_next(entry)) { + if (entry == last->log_pos) { + break; + } + divrec.distance++; + } + + /* + * If the last two records are so close it is likely we will + * not recover synchronisation with the other end. + */ + if (divrec.distance == 1 && last->distance == 1) { + diverged = true; + } + } + divergence_log = g_slist_prepend(divergence_log, + g_memdup2(&divrec, sizeof(divrec))); + + /* Output short log entry of going out of sync... */ + if (verbose || divrec.distance == 1 || diverged) { + g_string_printf(out, + "@ 0x%016" PRIx64 " vs 0x%016" PRIx64 + " (%d/%d since last)\n", + us->pc, them->pc, g_slist_length(divergence_log), + divrec.distance); + qemu_plugin_outs(out->str); + } + + if (diverged) { + int i; + GSList *entry; + + g_string_printf(out, + "Δ insn_count @ 0x%016" PRIx64 + " (%ld) vs 0x%016" PRIx64 " (%ld)\n", + us->pc, us->insn_count, them->pc, them->insn_count); + + for (entry = log, i = 0; + g_slist_next(entry) && i < 5; + entry = g_slist_next(entry), i++) { + ExecInfo *prev = (ExecInfo *) entry->data; + g_string_append_printf(out, + " previously @ 0x%016" PRIx64 "/%" PRId64 + " (%ld insns)\n", + prev->block->pc, prev->block->insns, + prev->insn_count); + } + qemu_plugin_outs(out->str); + qemu_plugin_outs("too much divergence... giving up."); + qemu_plugin_uninstall(our_id, plugin_cleanup); + } +} + +static void vcpu_tb_exec(unsigned int cpu_index, void *udata) +{ + BlockInfo *bi = (BlockInfo *) udata; + ExecState us, them; + ssize_t bytes; + ExecInfo *exec; + + us.pc = bi->pc; + us.insn_count = insn_count; + + /* + * Write our current position to the other end. If we fail the + * other end has probably died and we should shut down gracefully. + */ + bytes = write(socket_fd, &us, sizeof(ExecState)); + if (bytes < sizeof(ExecState)) { + qemu_plugin_outs(bytes < 0 ? + "problem writing to socket" : + "wrote less than expected to socket"); + qemu_plugin_uninstall(our_id, plugin_cleanup); + return; + } + + /* + * Now read where our peer has reached. Again a failure probably + * indicates the other end died and we should close down cleanly. + */ + bytes = read(socket_fd, &them, sizeof(ExecState)); + if (bytes < sizeof(ExecState)) { + qemu_plugin_outs(bytes < 0 ? + "problem reading from socket" : + "read less than expected"); + qemu_plugin_uninstall(our_id, plugin_cleanup); + return; + } + + /* + * Compare and report if we have diverged. + */ + if (us.pc != them.pc) { + report_divergance(&us, &them); + } + + /* + * Assume this block will execute fully and record it + * in the execution log. + */ + insn_count += bi->insns; + bb_count++; + exec = g_new0(ExecInfo, 1); + exec->block = bi; + exec->insn_count = insn_count; + exec->block_count = bb_count; + log = g_slist_prepend(log, exec); +} + +static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb) +{ + BlockInfo *bi = g_new0(BlockInfo, 1); + bi->pc = qemu_plugin_tb_vaddr(tb); + bi->insns = qemu_plugin_tb_n_insns(tb); + + /* save a reference so we can free later */ + blocks = g_slist_prepend(blocks, bi); + qemu_plugin_register_vcpu_tb_exec_cb(tb, vcpu_tb_exec, + QEMU_PLUGIN_CB_NO_REGS, (void *)bi); +} + + +/* + * Instead of encoding master/slave status into what is essentially + * two peers we shall just take the simple approach of checking for + * the existence of the pipe and assuming if it's not there we are the + * first process. + */ +static bool setup_socket(const char *path) +{ + struct sockaddr_un sockaddr; + const gsize pathlen = sizeof(sockaddr.sun_path) - 1; + int fd; + + fd = socket(AF_UNIX, SOCK_STREAM, 0); + if (fd < 0) { + perror("create socket"); + return false; + } + + sockaddr.sun_family = AF_UNIX; + if (g_strlcpy(sockaddr.sun_path, path, pathlen) >= pathlen) { + perror("bad path"); + close(fd); + return false; + } + + if (bind(fd, (struct sockaddr *)&sockaddr, sizeof(sockaddr)) < 0) { + perror("bind socket"); + close(fd); + return false; + } + + /* remember to clean-up */ + path_to_unlink = g_strdup(path); + + if (listen(fd, 1) < 0) { + perror("listen socket"); + close(fd); + return false; + } + + socket_fd = accept(fd, NULL, NULL); + if (socket_fd < 0 && errno != EINTR) { + perror("accept socket"); + close(fd); + return false; + } + + qemu_plugin_outs("setup_socket::ready\n"); + + close(fd); + return true; +} + +static bool connect_socket(const char *path) +{ + int fd; + struct sockaddr_un sockaddr; + const gsize pathlen = sizeof(sockaddr.sun_path) - 1; + + fd = socket(AF_UNIX, SOCK_STREAM, 0); + if (fd < 0) { + perror("create socket"); + return false; + } + + sockaddr.sun_family = AF_UNIX; + if (g_strlcpy(sockaddr.sun_path, path, pathlen) >= pathlen) { + perror("bad path"); + close(fd); + return false; + } + + if (connect(fd, (struct sockaddr *)&sockaddr, sizeof(sockaddr)) < 0) { + perror("failed to connect"); + close(fd); + return false; + } + + qemu_plugin_outs("connect_socket::ready\n"); + + socket_fd = fd; + return true; +} + +static bool setup_unix_socket(const char *path) +{ + if (g_file_test(path, G_FILE_TEST_EXISTS)) { + return connect_socket(path); + } else { + return setup_socket(path); + } +} + + +QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id, + const qemu_info_t *info, + int argc, char **argv) +{ + int i; + g_autofree char *sock_path = NULL; + + for (i = 0; i < argc; i++) { + char *p = argv[i]; + g_auto(GStrv) tokens = g_strsplit(p, "=", 2); + + if (g_strcmp0(tokens[0], "verbose") == 0) { + if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &verbose)) { + fprintf(stderr, "boolean argument parsing failed: %s\n", p); + return -1; + } + } else if (g_strcmp0(tokens[0], "sockpath") == 0) { + sock_path = tokens[1]; + } else { + fprintf(stderr, "option parsing failed: %s\n", p); + return -1; + } + } + + if (sock_path == NULL) { + fprintf(stderr, "Need a socket path to talk to other instance.\n"); + return -1; + } + + if (!setup_unix_socket(sock_path)) { + fprintf(stderr, "Failed to setup socket for communications.\n"); + return -1; + } + + our_id = id; + + qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans); + qemu_plugin_register_atexit_cb(id, plugin_exit, NULL); + return 0; +} diff --git a/contrib/plugins/win32_linker.c b/contrib/plugins/win32_linker.c new file mode 100644 index 0000000000..7534b2b8bf --- /dev/null +++ b/contrib/plugins/win32_linker.c @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2023, Greg Manning <gmanning@rapitasystems.com> + * + * This hook, __pfnDliFailureHook2, is documented in the microsoft documentation here: + * https://learn.microsoft.com/en-us/cpp/build/reference/error-handling-and-notification + * It gets called when a delay-loaded DLL encounters various errors. + * We handle the specific case of a DLL looking for a "qemu.exe", + * and give it the running executable (regardless of what it is named). + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + */ + +#include <windows.h> +#include <delayimp.h> + +FARPROC WINAPI dll_failure_hook(unsigned dliNotify, PDelayLoadInfo pdli); + + +PfnDliHook __pfnDliFailureHook2 = dll_failure_hook; + +FARPROC WINAPI dll_failure_hook(unsigned dliNotify, PDelayLoadInfo pdli) { + if (dliNotify == dliFailLoadLib) { + /* If the failing request was for qemu.exe, ... */ + if (strcmp(pdli->szDll, "qemu.exe") == 0) { + /* Then pass back a pointer to the top level module. */ + HMODULE top = GetModuleHandle(NULL); + return (FARPROC) top; + } + } + /* Otherwise we can't do anything special. */ + return 0; +} + diff --git a/contrib/rdmacm-mux/Makefile.objs b/contrib/rdmacm-mux/Makefile.objs deleted file mode 100644 index be3eacb6f7..0000000000 --- a/contrib/rdmacm-mux/Makefile.objs +++ /dev/null @@ -1,4 +0,0 @@ -ifdef CONFIG_PVRDMA -CFLAGS += -libumad -Wno-format-truncation -rdmacm-mux-obj-y = main.o -endif diff --git a/contrib/rdmacm-mux/main.c b/contrib/rdmacm-mux/main.c deleted file mode 100644 index 835a7f9214..0000000000 --- a/contrib/rdmacm-mux/main.c +++ /dev/null @@ -1,798 +0,0 @@ -/* - * QEMU paravirtual RDMA - rdmacm-mux implementation - * - * Copyright (C) 2018 Oracle - * Copyright (C) 2018 Red Hat Inc - * - * Authors: - * Yuval Shaia <yuval.shaia@oracle.com> - * Marcel Apfelbaum <marcel@redhat.com> - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ - -#include "qemu/osdep.h" -#include "sys/poll.h" -#include "sys/ioctl.h" -#include "pthread.h" -#include "syslog.h" - -#include "infiniband/verbs.h" -#include "infiniband/umad.h" -#include "infiniband/umad_types.h" -#include "infiniband/umad_sa.h" -#include "infiniband/umad_cm.h" - -#include "rdmacm-mux.h" - -#define SCALE_US 1000 -#define COMMID_TTL 2 /* How many SCALE_US a context of MAD session is saved */ -#define SLEEP_SECS 5 /* This is used both in poll() and thread */ -#define SERVER_LISTEN_BACKLOG 10 -#define MAX_CLIENTS 4096 -#define MAD_RMPP_VERSION 0 -#define MAD_METHOD_MASK0 0x8 - -#define IB_USER_MAD_LONGS_PER_METHOD_MASK (128 / (8 * sizeof(long))) - -#define CM_REQ_DGID_POS 80 -#define CM_SIDR_REQ_DGID_POS 44 - -/* The below can be override by command line parameter */ -#define UNIX_SOCKET_PATH "/var/run/rdmacm-mux" -#define RDMA_PORT_NUM 1 - -typedef struct RdmaCmServerArgs { - char unix_socket_path[PATH_MAX]; - char rdma_dev_name[NAME_MAX]; - int rdma_port_num; -} RdmaCMServerArgs; - -typedef struct CommId2FdEntry { - int fd; - int ttl; /* Initialized to 2, decrement each timeout, entry delete when 0 */ - __be64 gid_ifid; -} CommId2FdEntry; - -typedef struct RdmaCmUMadAgent { - int port_id; - int agent_id; - GHashTable *gid2fd; /* Used to find fd of a given gid */ - GHashTable *commid2fd; /* Used to find fd on of a given comm_id */ -} RdmaCmUMadAgent; - -typedef struct RdmaCmServer { - bool run; - RdmaCMServerArgs args; - struct pollfd fds[MAX_CLIENTS]; - int nfds; - RdmaCmUMadAgent umad_agent; - pthread_t umad_recv_thread; - pthread_rwlock_t lock; -} RdmaCMServer; - -static RdmaCMServer server = {0}; - -static void usage(const char *progname) -{ - printf("Usage: %s [OPTION]...\n" - "Start a RDMA-CM multiplexer\n" - "\n" - "\t-h Show this help\n" - "\t-d rdma-device-name Name of RDMA device to register with\n" - "\t-s unix-socket-path Path to unix socket to listen on (default %s)\n" - "\t-p rdma-device-port Port number of RDMA device to register with (default %d)\n", - progname, UNIX_SOCKET_PATH, RDMA_PORT_NUM); -} - -static void help(const char *progname) -{ - fprintf(stderr, "Try '%s -h' for more information.\n", progname); -} - -static void parse_args(int argc, char *argv[]) -{ - int c; - char unix_socket_path[PATH_MAX]; - - strcpy(server.args.rdma_dev_name, ""); - strcpy(unix_socket_path, UNIX_SOCKET_PATH); - server.args.rdma_port_num = RDMA_PORT_NUM; - - while ((c = getopt(argc, argv, "hs:d:p:")) != -1) { - switch (c) { - case 'h': - usage(argv[0]); - exit(0); - - case 'd': - strncpy(server.args.rdma_dev_name, optarg, NAME_MAX - 1); - break; - - case 's': - /* This is temporary, final name will build below */ - strncpy(unix_socket_path, optarg, PATH_MAX); - break; - - case 'p': - server.args.rdma_port_num = atoi(optarg); - break; - - default: - help(argv[0]); - exit(1); - } - } - - if (!strcmp(server.args.rdma_dev_name, "")) { - fprintf(stderr, "Missing RDMA device name\n"); - help(argv[0]); - exit(1); - } - - /* Build unique unix-socket file name */ - snprintf(server.args.unix_socket_path, PATH_MAX, "%s-%s-%d", - unix_socket_path, server.args.rdma_dev_name, - server.args.rdma_port_num); - - syslog(LOG_INFO, "unix_socket_path=%s", server.args.unix_socket_path); - syslog(LOG_INFO, "rdma-device-name=%s", server.args.rdma_dev_name); - syslog(LOG_INFO, "rdma-device-port=%d", server.args.rdma_port_num); -} - -static void hash_tbl_alloc(void) -{ - - server.umad_agent.gid2fd = g_hash_table_new_full(g_int64_hash, - g_int64_equal, - g_free, g_free); - server.umad_agent.commid2fd = g_hash_table_new_full(g_int_hash, - g_int_equal, - g_free, g_free); -} - -static void hash_tbl_free(void) -{ - if (server.umad_agent.commid2fd) { - g_hash_table_destroy(server.umad_agent.commid2fd); - } - if (server.umad_agent.gid2fd) { - g_hash_table_destroy(server.umad_agent.gid2fd); - } -} - - -static int _hash_tbl_search_fd_by_ifid(__be64 *gid_ifid) -{ - int *fd; - - fd = g_hash_table_lookup(server.umad_agent.gid2fd, gid_ifid); - if (!fd) { - /* Let's try IPv4 */ - *gid_ifid |= 0x00000000ffff0000; - fd = g_hash_table_lookup(server.umad_agent.gid2fd, gid_ifid); - } - - return fd ? *fd : 0; -} - -static int hash_tbl_search_fd_by_ifid(int *fd, __be64 *gid_ifid) -{ - pthread_rwlock_rdlock(&server.lock); - *fd = _hash_tbl_search_fd_by_ifid(gid_ifid); - pthread_rwlock_unlock(&server.lock); - - if (!fd) { - syslog(LOG_WARNING, "Can't find matching for ifid 0x%llx\n", *gid_ifid); - return -ENOENT; - } - - return 0; -} - -static int hash_tbl_search_fd_by_comm_id(uint32_t comm_id, int *fd, - __be64 *gid_idid) -{ - CommId2FdEntry *fde; - - pthread_rwlock_rdlock(&server.lock); - fde = g_hash_table_lookup(server.umad_agent.commid2fd, &comm_id); - pthread_rwlock_unlock(&server.lock); - - if (!fde) { - syslog(LOG_WARNING, "Can't find matching for comm_id 0x%x\n", comm_id); - return -ENOENT; - } - - *fd = fde->fd; - *gid_idid = fde->gid_ifid; - - return 0; -} - -static RdmaCmMuxErrCode add_fd_ifid_pair(int fd, __be64 gid_ifid) -{ - int fd1; - - pthread_rwlock_wrlock(&server.lock); - - fd1 = _hash_tbl_search_fd_by_ifid(&gid_ifid); - if (fd1) { /* record already exist - an error */ - pthread_rwlock_unlock(&server.lock); - return fd == fd1 ? RDMACM_MUX_ERR_CODE_EEXIST : - RDMACM_MUX_ERR_CODE_EACCES; - } - - g_hash_table_insert(server.umad_agent.gid2fd, g_memdup(&gid_ifid, - sizeof(gid_ifid)), g_memdup(&fd, sizeof(fd))); - - pthread_rwlock_unlock(&server.lock); - - syslog(LOG_INFO, "0x%lx registered on socket %d", - be64toh((uint64_t)gid_ifid), fd); - - return RDMACM_MUX_ERR_CODE_OK; -} - -static RdmaCmMuxErrCode delete_fd_ifid_pair(int fd, __be64 gid_ifid) -{ - int fd1; - - pthread_rwlock_wrlock(&server.lock); - - fd1 = _hash_tbl_search_fd_by_ifid(&gid_ifid); - if (!fd1) { /* record not exist - an error */ - pthread_rwlock_unlock(&server.lock); - return RDMACM_MUX_ERR_CODE_ENOTFOUND; - } - - g_hash_table_remove(server.umad_agent.gid2fd, g_memdup(&gid_ifid, - sizeof(gid_ifid))); - pthread_rwlock_unlock(&server.lock); - - syslog(LOG_INFO, "0x%lx unregistered on socket %d", - be64toh((uint64_t)gid_ifid), fd); - - return RDMACM_MUX_ERR_CODE_OK; -} - -static void hash_tbl_save_fd_comm_id_pair(int fd, uint32_t comm_id, - uint64_t gid_ifid) -{ - CommId2FdEntry fde = {fd, COMMID_TTL, gid_ifid}; - - pthread_rwlock_wrlock(&server.lock); - g_hash_table_insert(server.umad_agent.commid2fd, - g_memdup(&comm_id, sizeof(comm_id)), - g_memdup(&fde, sizeof(fde))); - pthread_rwlock_unlock(&server.lock); -} - -static gboolean remove_old_comm_ids(gpointer key, gpointer value, - gpointer user_data) -{ - CommId2FdEntry *fde = (CommId2FdEntry *)value; - - return !fde->ttl--; -} - -static gboolean remove_entry_from_gid2fd(gpointer key, gpointer value, - gpointer user_data) -{ - if (*(int *)value == *(int *)user_data) { - syslog(LOG_INFO, "0x%lx unregistered on socket %d", - be64toh(*(uint64_t *)key), *(int *)value); - return true; - } - - return false; -} - -static void hash_tbl_remove_fd_ifid_pair(int fd) -{ - pthread_rwlock_wrlock(&server.lock); - g_hash_table_foreach_remove(server.umad_agent.gid2fd, - remove_entry_from_gid2fd, (gpointer)&fd); - pthread_rwlock_unlock(&server.lock); -} - -static int get_fd(const char *mad, int *fd, __be64 *gid_ifid) -{ - struct umad_hdr *hdr = (struct umad_hdr *)mad; - char *data = (char *)hdr + sizeof(*hdr); - int32_t comm_id = 0; - uint16_t attr_id = be16toh(hdr->attr_id); - int rc = 0; - - switch (attr_id) { - case UMAD_CM_ATTR_REQ: - memcpy(gid_ifid, data + CM_REQ_DGID_POS, sizeof(*gid_ifid)); - rc = hash_tbl_search_fd_by_ifid(fd, gid_ifid); - break; - - case UMAD_CM_ATTR_SIDR_REQ: - memcpy(gid_ifid, data + CM_SIDR_REQ_DGID_POS, sizeof(*gid_ifid)); - rc = hash_tbl_search_fd_by_ifid(fd, gid_ifid); - break; - - case UMAD_CM_ATTR_REP: - /* Fall through */ - case UMAD_CM_ATTR_REJ: - /* Fall through */ - case UMAD_CM_ATTR_DREQ: - /* Fall through */ - case UMAD_CM_ATTR_DREP: - /* Fall through */ - case UMAD_CM_ATTR_RTU: - data += sizeof(comm_id); - /* Fall through */ - case UMAD_CM_ATTR_SIDR_REP: - memcpy(&comm_id, data, sizeof(comm_id)); - if (comm_id) { - rc = hash_tbl_search_fd_by_comm_id(comm_id, fd, gid_ifid); - } - break; - - default: - rc = -EINVAL; - syslog(LOG_WARNING, "Unsupported attr_id 0x%x\n", attr_id); - } - - syslog(LOG_DEBUG, "mad_to_vm: %d 0x%x 0x%x\n", *fd, attr_id, comm_id); - - return rc; -} - -static void *umad_recv_thread_func(void *args) -{ - int rc; - RdmaCmMuxMsg msg = {0}; - int fd = -2; - - msg.hdr.msg_type = RDMACM_MUX_MSG_TYPE_REQ; - msg.hdr.op_code = RDMACM_MUX_OP_CODE_MAD; - - while (server.run) { - do { - msg.umad_len = sizeof(msg.umad.mad); - rc = umad_recv(server.umad_agent.port_id, &msg.umad, &msg.umad_len, - SLEEP_SECS * SCALE_US); - if ((rc == -EIO) || (rc == -EINVAL)) { - syslog(LOG_CRIT, "Fatal error while trying to read MAD"); - } - - if (rc == -ETIMEDOUT) { - g_hash_table_foreach_remove(server.umad_agent.commid2fd, - remove_old_comm_ids, NULL); - } - } while (rc && server.run); - - if (server.run) { - rc = get_fd(msg.umad.mad, &fd, &msg.hdr.sgid.global.interface_id); - if (rc) { - continue; - } - - send(fd, &msg, sizeof(msg), 0); - } - } - - return NULL; -} - -static int read_and_process(int fd) -{ - int rc; - RdmaCmMuxMsg msg = {0}; - struct umad_hdr *hdr; - uint32_t *comm_id = 0; - uint16_t attr_id; - - rc = recv(fd, &msg, sizeof(msg), 0); - syslog(LOG_DEBUG, "Socket %d, recv %d\n", fd, rc); - - if (rc < 0 && errno != EWOULDBLOCK) { - syslog(LOG_ERR, "Fail to read from socket %d\n", fd); - return -EIO; - } - - if (!rc) { - syslog(LOG_ERR, "Fail to read from socket %d\n", fd); - return -EPIPE; - } - - if (msg.hdr.msg_type != RDMACM_MUX_MSG_TYPE_REQ) { - syslog(LOG_WARNING, "Got non-request message (%d) from socket %d\n", - msg.hdr.msg_type, fd); - return -EPERM; - } - - switch (msg.hdr.op_code) { - case RDMACM_MUX_OP_CODE_REG: - rc = add_fd_ifid_pair(fd, msg.hdr.sgid.global.interface_id); - break; - - case RDMACM_MUX_OP_CODE_UNREG: - rc = delete_fd_ifid_pair(fd, msg.hdr.sgid.global.interface_id); - break; - - case RDMACM_MUX_OP_CODE_MAD: - /* If this is REQ or REP then store the pair comm_id,fd to be later - * used for other messages where gid is unknown */ - hdr = (struct umad_hdr *)msg.umad.mad; - attr_id = be16toh(hdr->attr_id); - if ((attr_id == UMAD_CM_ATTR_REQ) || (attr_id == UMAD_CM_ATTR_DREQ) || - (attr_id == UMAD_CM_ATTR_SIDR_REQ) || - (attr_id == UMAD_CM_ATTR_REP) || (attr_id == UMAD_CM_ATTR_DREP)) { - comm_id = (uint32_t *)(msg.umad.mad + sizeof(*hdr)); - hash_tbl_save_fd_comm_id_pair(fd, *comm_id, - msg.hdr.sgid.global.interface_id); - } - - syslog(LOG_DEBUG, "vm_to_mad: %d 0x%x 0x%x\n", fd, attr_id, - comm_id ? *comm_id : 0); - rc = umad_send(server.umad_agent.port_id, server.umad_agent.agent_id, - &msg.umad, msg.umad_len, 1, 0); - if (rc) { - syslog(LOG_ERR, - "Fail to send MAD message (0x%x) from socket %d, err=%d", - attr_id, fd, rc); - } - break; - - default: - syslog(LOG_ERR, "Got invalid op_code (%d) from socket %d", - msg.hdr.msg_type, fd); - rc = RDMACM_MUX_ERR_CODE_EINVAL; - } - - msg.hdr.msg_type = RDMACM_MUX_MSG_TYPE_RESP; - msg.hdr.err_code = rc; - rc = send(fd, &msg, sizeof(msg), 0); - - return rc == sizeof(msg) ? 0 : -EPIPE; -} - -static int accept_all(void) -{ - int fd, rc = 0;; - - pthread_rwlock_wrlock(&server.lock); - - do { - if ((server.nfds + 1) > MAX_CLIENTS) { - syslog(LOG_WARNING, "Too many clients (%d)", server.nfds); - rc = -EIO; - goto out; - } - - fd = accept(server.fds[0].fd, NULL, NULL); - if (fd < 0) { - if (errno != EWOULDBLOCK) { - syslog(LOG_WARNING, "accept() failed"); - rc = -EIO; - goto out; - } - break; - } - - syslog(LOG_INFO, "Client connected on socket %d\n", fd); - server.fds[server.nfds].fd = fd; - server.fds[server.nfds].events = POLLIN; - server.nfds++; - } while (fd != -1); - -out: - pthread_rwlock_unlock(&server.lock); - return rc; -} - -static void compress_fds(void) -{ - int i, j; - int closed = 0; - - pthread_rwlock_wrlock(&server.lock); - - for (i = 1; i < server.nfds; i++) { - if (!server.fds[i].fd) { - closed++; - for (j = i; j < server.nfds - 1; j++) { - server.fds[j] = server.fds[j + 1]; - } - } - } - - server.nfds -= closed; - - pthread_rwlock_unlock(&server.lock); -} - -static void close_fd(int idx) -{ - close(server.fds[idx].fd); - syslog(LOG_INFO, "Socket %d closed\n", server.fds[idx].fd); - hash_tbl_remove_fd_ifid_pair(server.fds[idx].fd); - server.fds[idx].fd = 0; -} - -static void run(void) -{ - int rc, nfds, i; - bool compress = false; - - syslog(LOG_INFO, "Service started"); - - while (server.run) { - rc = poll(server.fds, server.nfds, SLEEP_SECS * SCALE_US); - if (rc < 0) { - if (errno != EINTR) { - syslog(LOG_WARNING, "poll() failed"); - } - continue; - } - - if (rc == 0) { - continue; - } - - nfds = server.nfds; - for (i = 0; i < nfds; i++) { - syslog(LOG_DEBUG, "pollfd[%d]: revents 0x%x, events 0x%x\n", i, - server.fds[i].revents, server.fds[i].events); - if (server.fds[i].revents == 0) { - continue; - } - - if (server.fds[i].revents != POLLIN) { - if (i == 0) { - syslog(LOG_NOTICE, "Unexpected poll() event (0x%x)\n", - server.fds[i].revents); - } else { - close_fd(i); - compress = true; - } - continue; - } - - if (i == 0) { - rc = accept_all(); - if (rc) { - continue; - } - } else { - rc = read_and_process(server.fds[i].fd); - if (rc) { - close_fd(i); - compress = true; - } - } - } - - if (compress) { - compress = false; - compress_fds(); - } - } -} - -static void fini_listener(void) -{ - int i; - - if (server.fds[0].fd <= 0) { - return; - } - - for (i = server.nfds - 1; i >= 0; i--) { - if (server.fds[i].fd) { - close(server.fds[i].fd); - } - } - - unlink(server.args.unix_socket_path); -} - -static void fini_umad(void) -{ - if (server.umad_agent.agent_id) { - umad_unregister(server.umad_agent.port_id, server.umad_agent.agent_id); - } - - if (server.umad_agent.port_id) { - umad_close_port(server.umad_agent.port_id); - } - - hash_tbl_free(); -} - -static void fini(void) -{ - if (server.umad_recv_thread) { - pthread_join(server.umad_recv_thread, NULL); - server.umad_recv_thread = 0; - } - fini_umad(); - fini_listener(); - pthread_rwlock_destroy(&server.lock); - - syslog(LOG_INFO, "Service going down"); -} - -static int init_listener(void) -{ - struct sockaddr_un sun; - int rc, on = 1; - - server.fds[0].fd = socket(AF_UNIX, SOCK_STREAM, 0); - if (server.fds[0].fd < 0) { - syslog(LOG_ALERT, "socket() failed"); - return -EIO; - } - - rc = setsockopt(server.fds[0].fd, SOL_SOCKET, SO_REUSEADDR, (char *)&on, - sizeof(on)); - if (rc < 0) { - syslog(LOG_ALERT, "setsockopt() failed"); - rc = -EIO; - goto err; - } - - rc = ioctl(server.fds[0].fd, FIONBIO, (char *)&on); - if (rc < 0) { - syslog(LOG_ALERT, "ioctl() failed"); - rc = -EIO; - goto err; - } - - if (strlen(server.args.unix_socket_path) >= sizeof(sun.sun_path)) { - syslog(LOG_ALERT, - "Invalid unix_socket_path, size must be less than %ld\n", - sizeof(sun.sun_path)); - rc = -EINVAL; - goto err; - } - - sun.sun_family = AF_UNIX; - rc = snprintf(sun.sun_path, sizeof(sun.sun_path), "%s", - server.args.unix_socket_path); - if (rc < 0 || rc >= sizeof(sun.sun_path)) { - syslog(LOG_ALERT, "Could not copy unix socket path\n"); - rc = -EINVAL; - goto err; - } - - rc = bind(server.fds[0].fd, (struct sockaddr *)&sun, sizeof(sun)); - if (rc < 0) { - syslog(LOG_ALERT, "bind() failed"); - rc = -EIO; - goto err; - } - - rc = listen(server.fds[0].fd, SERVER_LISTEN_BACKLOG); - if (rc < 0) { - syslog(LOG_ALERT, "listen() failed"); - rc = -EIO; - goto err; - } - - server.fds[0].events = POLLIN; - server.nfds = 1; - server.run = true; - - return 0; - -err: - close(server.fds[0].fd); - return rc; -} - -static int init_umad(void) -{ - long method_mask[IB_USER_MAD_LONGS_PER_METHOD_MASK]; - - server.umad_agent.port_id = umad_open_port(server.args.rdma_dev_name, - server.args.rdma_port_num); - - if (server.umad_agent.port_id < 0) { - syslog(LOG_WARNING, "umad_open_port() failed"); - return -EIO; - } - - memset(&method_mask, 0, sizeof(method_mask)); - method_mask[0] = MAD_METHOD_MASK0; - server.umad_agent.agent_id = umad_register(server.umad_agent.port_id, - UMAD_CLASS_CM, - UMAD_SA_CLASS_VERSION, - MAD_RMPP_VERSION, method_mask); - if (server.umad_agent.agent_id < 0) { - syslog(LOG_WARNING, "umad_register() failed"); - return -EIO; - } - - hash_tbl_alloc(); - - return 0; -} - -static void signal_handler(int sig, siginfo_t *siginfo, void *context) -{ - static bool warned; - - /* Prevent stop if clients are connected */ - if (server.nfds != 1) { - if (!warned) { - syslog(LOG_WARNING, - "Can't stop while active client exist, resend SIGINT to overid"); - warned = true; - return; - } - } - - if (sig == SIGINT) { - server.run = false; - fini(); - } - - exit(0); -} - -static int init(void) -{ - int rc; - struct sigaction sig = {0}; - - rc = init_listener(); - if (rc) { - return rc; - } - - rc = init_umad(); - if (rc) { - return rc; - } - - pthread_rwlock_init(&server.lock, 0); - - rc = pthread_create(&server.umad_recv_thread, NULL, umad_recv_thread_func, - NULL); - if (rc) { - syslog(LOG_ERR, "Fail to create UMAD receiver thread (%d)\n", rc); - return rc; - } - - sig.sa_sigaction = &signal_handler; - sig.sa_flags = SA_SIGINFO; - rc = sigaction(SIGINT, &sig, NULL); - if (rc < 0) { - syslog(LOG_ERR, "Fail to install SIGINT handler (%d)\n", errno); - return rc; - } - - return 0; -} - -int main(int argc, char *argv[]) -{ - int rc; - - memset(&server, 0, sizeof(server)); - - parse_args(argc, argv); - - rc = init(); - if (rc) { - syslog(LOG_ERR, "Fail to initialize server (%d)\n", rc); - rc = -EAGAIN; - goto out; - } - - run(); - -out: - fini(); - - return rc; -} diff --git a/contrib/rdmacm-mux/rdmacm-mux.h b/contrib/rdmacm-mux/rdmacm-mux.h deleted file mode 100644 index 942a802c47..0000000000 --- a/contrib/rdmacm-mux/rdmacm-mux.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * QEMU paravirtual RDMA - rdmacm-mux declarations - * - * Copyright (C) 2018 Oracle - * Copyright (C) 2018 Red Hat Inc - * - * Authors: - * Yuval Shaia <yuval.shaia@oracle.com> - * Marcel Apfelbaum <marcel@redhat.com> - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ - -#ifndef RDMACM_MUX_H -#define RDMACM_MUX_H - -#include "linux/if.h" -#include "infiniband/verbs.h" -#include "infiniband/umad.h" -#include "rdma/rdma_user_cm.h" - -typedef enum RdmaCmMuxMsgType { - RDMACM_MUX_MSG_TYPE_REQ = 0, - RDMACM_MUX_MSG_TYPE_RESP = 1, -} RdmaCmMuxMsgType; - -typedef enum RdmaCmMuxOpCode { - RDMACM_MUX_OP_CODE_REG = 0, - RDMACM_MUX_OP_CODE_UNREG = 1, - RDMACM_MUX_OP_CODE_MAD = 2, -} RdmaCmMuxOpCode; - -typedef enum RdmaCmMuxErrCode { - RDMACM_MUX_ERR_CODE_OK = 0, - RDMACM_MUX_ERR_CODE_EINVAL = 1, - RDMACM_MUX_ERR_CODE_EEXIST = 2, - RDMACM_MUX_ERR_CODE_EACCES = 3, - RDMACM_MUX_ERR_CODE_ENOTFOUND = 4, -} RdmaCmMuxErrCode; - -typedef struct RdmaCmMuxHdr { - RdmaCmMuxMsgType msg_type; - RdmaCmMuxOpCode op_code; - union ibv_gid sgid; - RdmaCmMuxErrCode err_code; -} RdmaCmUHdr; - -typedef struct RdmaCmUMad { - struct ib_user_mad hdr; - char mad[RDMA_MAX_PRIVATE_DATA]; -} RdmaCmUMad; - -typedef struct RdmaCmMuxMsg { - RdmaCmUHdr hdr; - int umad_len; - RdmaCmUMad umad; -} RdmaCmMuxMsg; - -#endif diff --git a/contrib/vhost-user-blk/Makefile.objs b/contrib/vhost-user-blk/Makefile.objs deleted file mode 100644 index 72e2cdc3ad..0000000000 --- a/contrib/vhost-user-blk/Makefile.objs +++ /dev/null @@ -1 +0,0 @@ -vhost-user-blk-obj-y = vhost-user-blk.o diff --git a/contrib/vhost-user-blk/meson.build b/contrib/vhost-user-blk/meson.build new file mode 100644 index 0000000000..ac1eece37a --- /dev/null +++ b/contrib/vhost-user-blk/meson.build @@ -0,0 +1,4 @@ +executable('vhost-user-blk', files('vhost-user-blk.c'), + dependencies: [qemuutil, vhost_user], + build_by_default: host_os == 'linux', + install: false) diff --git a/contrib/vhost-user-blk/vhost-user-blk.c b/contrib/vhost-user-blk/vhost-user-blk.c index 858221ad95..89e5f11a64 100644 --- a/contrib/vhost-user-blk/vhost-user-blk.c +++ b/contrib/vhost-user-blk/vhost-user-blk.c @@ -17,9 +17,16 @@ #include "qemu/osdep.h" #include "standard-headers/linux/virtio_blk.h" -#include "contrib/libvhost-user/libvhost-user-glib.h" -#include "contrib/libvhost-user/libvhost-user.h" +#include "libvhost-user-glib.h" +#if defined(__linux__) +#include <linux/fs.h> +#include <sys/ioctl.h> +#endif + +enum { + VHOST_USER_BLK_MAX_QUEUES = 8, +}; struct virtio_blk_inhdr { unsigned char status; @@ -59,6 +66,20 @@ static size_t vub_iov_size(const struct iovec *iov, return len; } +static size_t vub_iov_to_buf(const struct iovec *iov, + const unsigned int iov_cnt, void *buf) +{ + size_t len; + unsigned int i; + + len = 0; + for (i = 0; i < iov_cnt; i++) { + memcpy(buf + len, iov[i].iov_base, iov[i].iov_len); + len += iov[i].iov_len; + } + return len; +} + static void vub_panic_cb(VuDev *vu_dev, const char *buf) { VugDev *gdev; @@ -85,10 +106,7 @@ static void vub_req_complete(VubReq *req) req->size + 1); vu_queue_notify(vu_dev, req->vq); - if (req->elem) { - free(req->elem); - } - + g_free(req->elem); g_free(req); } @@ -125,7 +143,7 @@ vub_readv(VubReq *req, struct iovec *iov, uint32_t iovcnt) req->size = vub_iov_size(iov, iovcnt); rc = preadv(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512); if (rc < 0) { - fprintf(stderr, "%s, Sector %"PRIu64", Size %lu failed with %s\n", + fprintf(stderr, "%s, Sector %"PRIu64", Size %zu failed with %s\n", vdev_blk->blk_name, req->sector_num, req->size, strerror(errno)); return -1; @@ -148,7 +166,7 @@ vub_writev(VubReq *req, struct iovec *iov, uint32_t iovcnt) req->size = vub_iov_size(iov, iovcnt); rc = pwritev(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512); if (rc < 0) { - fprintf(stderr, "%s, Sector %"PRIu64", Size %lu failed with %s\n", + fprintf(stderr, "%s, Sector %"PRIu64", Size %zu failed with %s\n", vdev_blk->blk_name, req->sector_num, req->size, strerror(errno)); return -1; @@ -157,6 +175,44 @@ vub_writev(VubReq *req, struct iovec *iov, uint32_t iovcnt) return rc; } +static int +vub_discard_write_zeroes(VubReq *req, struct iovec *iov, uint32_t iovcnt, + uint32_t type) +{ + struct virtio_blk_discard_write_zeroes *desc; + ssize_t size; + void *buf; + + size = vub_iov_size(iov, iovcnt); + if (size != sizeof(*desc)) { + fprintf(stderr, "Invalid size %zd, expect %zd\n", size, sizeof(*desc)); + return -1; + } + buf = g_new0(char, size); + vub_iov_to_buf(iov, iovcnt, buf); + + #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT) + VubDev *vdev_blk = req->vdev_blk; + desc = buf; + uint64_t range[2] = { le64toh(desc->sector) << 9, + le32toh(desc->num_sectors) << 9 }; + if (type == VIRTIO_BLK_T_DISCARD) { + if (ioctl(vdev_blk->blk_fd, BLKDISCARD, range) == 0) { + g_free(buf); + return 0; + } + } else if (type == VIRTIO_BLK_T_WRITE_ZEROES) { + if (ioctl(vdev_blk->blk_fd, BLKZEROOUT, range) == 0) { + g_free(buf); + return 0; + } + } + #endif + + g_free(buf); + return -1; +} + static void vub_flush(VubReq *req) { @@ -184,7 +240,7 @@ static int vub_virtio_process_req(VubDev *vdev_blk, /* refer to hw/block/virtio_blk.c */ if (elem->out_num < 1 || elem->in_num < 1) { fprintf(stderr, "virtio-blk request missing headers\n"); - free(elem); + g_free(elem); return -1; } @@ -212,50 +268,61 @@ static int vub_virtio_process_req(VubDev *vdev_blk, in_num--; type = le32toh(req->out->type); - switch (type & ~(VIRTIO_BLK_T_OUT | VIRTIO_BLK_T_BARRIER)) { - case VIRTIO_BLK_T_IN: { - ssize_t ret = 0; - bool is_write = type & VIRTIO_BLK_T_OUT; - req->sector_num = le64toh(req->out->sector); - if (is_write) { - ret = vub_writev(req, &elem->out_sg[1], out_num); - } else { - ret = vub_readv(req, &elem->in_sg[0], in_num); - } - if (ret >= 0) { - req->in->status = VIRTIO_BLK_S_OK; - } else { - req->in->status = VIRTIO_BLK_S_IOERR; - } - vub_req_complete(req); - break; + switch (type & ~VIRTIO_BLK_T_BARRIER) { + case VIRTIO_BLK_T_IN: + case VIRTIO_BLK_T_OUT: { + ssize_t ret = 0; + bool is_write = type & VIRTIO_BLK_T_OUT; + req->sector_num = le64toh(req->out->sector); + if (is_write) { + ret = vub_writev(req, &elem->out_sg[1], out_num); + } else { + ret = vub_readv(req, &elem->in_sg[0], in_num); } - case VIRTIO_BLK_T_FLUSH: { - vub_flush(req); + if (ret >= 0) { req->in->status = VIRTIO_BLK_S_OK; - vub_req_complete(req); - break; + } else { + req->in->status = VIRTIO_BLK_S_IOERR; } - case VIRTIO_BLK_T_GET_ID: { - size_t size = MIN(vub_iov_size(&elem->in_sg[0], in_num), - VIRTIO_BLK_ID_BYTES); - snprintf(elem->in_sg[0].iov_base, size, "%s", "vhost_user_blk"); + vub_req_complete(req); + break; + } + case VIRTIO_BLK_T_FLUSH: + vub_flush(req); + req->in->status = VIRTIO_BLK_S_OK; + vub_req_complete(req); + break; + case VIRTIO_BLK_T_GET_ID: { + size_t size = MIN(vub_iov_size(&elem->in_sg[0], in_num), + VIRTIO_BLK_ID_BYTES); + snprintf(elem->in_sg[0].iov_base, size, "%s", "vhost_user_blk"); + req->in->status = VIRTIO_BLK_S_OK; + req->size = elem->in_sg[0].iov_len; + vub_req_complete(req); + break; + } + case VIRTIO_BLK_T_DISCARD: + case VIRTIO_BLK_T_WRITE_ZEROES: { + int rc; + rc = vub_discard_write_zeroes(req, &elem->out_sg[1], out_num, type); + if (rc == 0) { req->in->status = VIRTIO_BLK_S_OK; - req->size = elem->in_sg[0].iov_len; - vub_req_complete(req); - break; - } - default: { - req->in->status = VIRTIO_BLK_S_UNSUPP; - vub_req_complete(req); - break; + } else { + req->in->status = VIRTIO_BLK_S_IOERR; } + vub_req_complete(req); + break; + } + default: + req->in->status = VIRTIO_BLK_S_UNSUPP; + vub_req_complete(req); + break; } return 0; err: - free(elem); + g_free(elem); g_free(req); return -1; } @@ -267,12 +334,6 @@ static void vub_process_vq(VuDev *vu_dev, int idx) VuVirtq *vq; int ret; - if ((idx < 0) || (idx >= VHOST_MAX_NR_VIRTQUEUE)) { - fprintf(stderr, "VQ Index out of range: %d\n", idx); - vub_panic_cb(vu_dev, NULL); - return; - } - gdev = container_of(vu_dev, VugDev, parent); vdev_blk = container_of(gdev, VubDev, parent); assert(vdev_blk); @@ -313,9 +374,11 @@ vub_get_features(VuDev *dev) 1ull << VIRTIO_BLK_F_TOPOLOGY | 1ull << VIRTIO_BLK_F_BLK_SIZE | 1ull << VIRTIO_BLK_F_FLUSH | - 1ull << VIRTIO_BLK_F_CONFIG_WCE | - 1ull << VIRTIO_F_VERSION_1 | - 1ull << VHOST_USER_F_PROTOCOL_FEATURES; + #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT) + 1ull << VIRTIO_BLK_F_DISCARD | + 1ull << VIRTIO_BLK_F_WRITE_ZEROES | + #endif + 1ull << VIRTIO_BLK_F_CONFIG_WCE; if (vdev_blk->enable_ro) { features |= 1ull << VIRTIO_BLK_F_RO; @@ -327,7 +390,8 @@ vub_get_features(VuDev *dev) static uint64_t vub_get_protocol_features(VuDev *dev) { - return 1ull << VHOST_USER_PROTOCOL_F_CONFIG; + return 1ull << VHOST_USER_PROTOCOL_F_CONFIG | + 1ull << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD; } static int @@ -336,6 +400,10 @@ vub_get_config(VuDev *vu_dev, uint8_t *config, uint32_t len) VugDev *gdev; VubDev *vdev_blk; + if (len > sizeof(struct virtio_blk_config)) { + return -1; + } + gdev = container_of(vu_dev, VugDev, parent); vdev_blk = container_of(gdev, VubDev, parent); memcpy(config, &vdev_blk->blkcfg, len); @@ -353,7 +421,7 @@ vub_set_config(VuDev *vu_dev, const uint8_t *data, int fd; /* don't support live migration */ - if (flags != VHOST_SET_CONFIG_TYPE_MASTER) { + if (flags != VHOST_SET_CONFIG_TYPE_FRONTEND) { return -1; } @@ -406,7 +474,7 @@ static int unix_sock_new(char *unix_fn) assert(unix_fn); sock = socket(AF_UNIX, SOCK_STREAM, 0); - if (sock <= 0) { + if (sock < 0) { perror("socket"); return -1; } @@ -454,7 +522,7 @@ vub_get_blocksize(int fd) #if defined(__linux__) && defined(BLKSSZGET) if (ioctl(fd, BLKSSZGET, &blocksize) == 0) { - return blocklen; + return blocksize; } #endif @@ -464,9 +532,9 @@ vub_get_blocksize(int fd) static void vub_initialize_config(int fd, struct virtio_blk_config *config) { - off64_t capacity; + off_t capacity; - capacity = lseek64(fd, 0, SEEK_END); + capacity = lseek(fd, 0, SEEK_END); config->capacity = capacity >> 9; config->blk_size = vub_get_blocksize(fd); config->size_max = 65536; @@ -474,6 +542,13 @@ vub_initialize_config(int fd, struct virtio_blk_config *config) config->min_io_size = 1; config->opt_io_size = 1; config->num_queues = 1; + #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT) + config->max_discard_sectors = 32768; + config->max_discard_seg = 1; + config->discard_sector_alignment = config->blk_size >> 9; + config->max_write_zeroes_sectors = 32768; + config->max_write_zeroes_seg = 1; + #endif } static VubDev * @@ -499,66 +574,91 @@ vub_new(char *blk_file) return vdev_blk; } +static int opt_fdnum = -1; +static char *opt_socket_path; +static char *opt_blk_file; +static gboolean opt_print_caps; +static gboolean opt_read_only; + +static GOptionEntry entries[] = { + { "print-capabilities", 'c', 0, G_OPTION_ARG_NONE, &opt_print_caps, + "Print capabilities", NULL }, + { "fd", 'f', 0, G_OPTION_ARG_INT, &opt_fdnum, + "Use inherited fd socket", "FDNUM" }, + { "socket-path", 's', 0, G_OPTION_ARG_FILENAME, &opt_socket_path, + "Use UNIX socket path", "PATH" }, + {"blk-file", 'b', 0, G_OPTION_ARG_FILENAME, &opt_blk_file, + "block device or file path", "PATH"}, + { "read-only", 'r', 0, G_OPTION_ARG_NONE, &opt_read_only, + "Enable read-only", NULL }, + { NULL, }, +}; + int main(int argc, char **argv) { - int opt; - char *unix_socket = NULL; - char *blk_file = NULL; - bool enable_ro = false; int lsock = -1, csock = -1; VubDev *vdev_blk = NULL; - - while ((opt = getopt(argc, argv, "b:rs:h")) != -1) { - switch (opt) { - case 'b': - blk_file = g_strdup(optarg); - break; - case 's': - unix_socket = g_strdup(optarg); - break; - case 'r': - enable_ro = true; - break; - case 'h': - default: - printf("Usage: %s [ -b block device or file, -s UNIX domain socket" - " | -r Enable read-only ] | [ -h ]\n", argv[0]); - return 0; + GError *error = NULL; + GOptionContext *context; + + context = g_option_context_new(NULL); + g_option_context_add_main_entries(context, entries, NULL); + if (!g_option_context_parse(context, &argc, &argv, &error)) { + g_printerr("Option parsing failed: %s\n", error->message); + exit(EXIT_FAILURE); + } + if (opt_print_caps) { + g_print("{\n"); + g_print(" \"type\": \"block\",\n"); + g_print(" \"features\": [\n"); + g_print(" \"read-only\",\n"); + g_print(" \"blk-file\"\n"); + g_print(" ]\n"); + g_print("}\n"); + exit(EXIT_SUCCESS); + } + + if (!opt_blk_file) { + g_print("%s\n", g_option_context_get_help(context, true, NULL)); + exit(EXIT_FAILURE); + } + + if (opt_socket_path) { + lsock = unix_sock_new(opt_socket_path); + if (lsock < 0) { + exit(EXIT_FAILURE); } + } else if (opt_fdnum < 0) { + g_print("%s\n", g_option_context_get_help(context, true, NULL)); + exit(EXIT_FAILURE); + } else { + lsock = opt_fdnum; } - if (!unix_socket || !blk_file) { - printf("Usage: %s [ -b block device or file, -s UNIX domain socket" - " | -r Enable read-only ] | [ -h ]\n", argv[0]); - return -1; - } - - lsock = unix_sock_new(unix_socket); - if (lsock < 0) { - goto err; - } - - csock = accept(lsock, (void *)0, (void *)0); + csock = accept(lsock, NULL, NULL); if (csock < 0) { - fprintf(stderr, "Accept error %s\n", strerror(errno)); - goto err; + g_printerr("Accept error %s\n", strerror(errno)); + exit(EXIT_FAILURE); } - vdev_blk = vub_new(blk_file); + vdev_blk = vub_new(opt_blk_file); if (!vdev_blk) { - goto err; + exit(EXIT_FAILURE); } - if (enable_ro) { + if (opt_read_only) { vdev_blk->enable_ro = true; } - vug_init(&vdev_blk->parent, csock, vub_panic_cb, &vub_iface); + if (!vug_init(&vdev_blk->parent, VHOST_USER_BLK_MAX_QUEUES, csock, + vub_panic_cb, &vub_iface)) { + g_printerr("Failed to initialize libvhost-user-glib\n"); + exit(EXIT_FAILURE); + } g_main_loop_run(vdev_blk->loop); - + g_main_loop_unref(vdev_blk->loop); + g_option_context_free(context); vug_deinit(&vdev_blk->parent); - -err: vub_free(vdev_blk); if (csock >= 0) { close(csock); @@ -566,8 +666,8 @@ err: if (lsock >= 0) { close(lsock); } - g_free(unix_socket); - g_free(blk_file); + g_free(opt_socket_path); + g_free(opt_blk_file); return 0; } diff --git a/contrib/vhost-user-gpu/50-qemu-gpu.json.in b/contrib/vhost-user-gpu/50-qemu-gpu.json.in new file mode 100644 index 0000000000..f5edd097f8 --- /dev/null +++ b/contrib/vhost-user-gpu/50-qemu-gpu.json.in @@ -0,0 +1,5 @@ +{ + "description": "QEMU vhost-user-gpu", + "type": "gpu", + "binary": "@libexecdir@/vhost-user-gpu" +} diff --git a/contrib/vhost-user-gpu/meson.build b/contrib/vhost-user-gpu/meson.build new file mode 100644 index 0000000000..c8883c2d8e --- /dev/null +++ b/contrib/vhost-user-gpu/meson.build @@ -0,0 +1,11 @@ +if have_vhost_user_gpu + executable('vhost-user-gpu', files('vhost-user-gpu.c', 'virgl.c', 'vugbm.c'), + dependencies: [qemuutil, pixman, gbm, virgl, vhost_user, opengl], + install: true, + install_dir: get_option('libexecdir')) + + configure_file(input: '50-qemu-gpu.json.in', + output: '50-qemu-gpu.json', + configuration: { 'libexecdir' : get_option('prefix') / get_option('libexecdir') }, + install_dir: qemu_datadir / 'vhost-user') +endif diff --git a/contrib/vhost-user-gpu/vhost-user-gpu.c b/contrib/vhost-user-gpu/vhost-user-gpu.c new file mode 100644 index 0000000000..bb41758e34 --- /dev/null +++ b/contrib/vhost-user-gpu/vhost-user-gpu.c @@ -0,0 +1,1320 @@ +/* + * Virtio vhost-user GPU Device + * + * Copyright Red Hat, Inc. 2013-2018 + * + * Authors: + * Dave Airlie <airlied@redhat.com> + * Gerd Hoffmann <kraxel@redhat.com> + * Marc-AndrĂ© Lureau <marcandre.lureau@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#include "qemu/osdep.h" +#include "qemu/drm.h" +#include "qapi/error.h" +#include "qemu/sockets.h" + +#include <pixman.h> +#include <glib-unix.h> + +#include "vugpu.h" +#include "hw/virtio/virtio-gpu-bswap.h" +#include "hw/virtio/virtio-gpu-pixman.h" +#include "virgl.h" +#include "vugbm.h" + +enum { + VHOST_USER_GPU_MAX_QUEUES = 2, +}; + +struct virtio_gpu_simple_resource { + uint32_t resource_id; + uint32_t width; + uint32_t height; + uint32_t format; + struct iovec *iov; + unsigned int iov_cnt; + uint32_t scanout_bitmask; + pixman_image_t *image; + struct vugbm_buffer buffer; + QTAILQ_ENTRY(virtio_gpu_simple_resource) next; +}; + +static gboolean opt_print_caps; +static int opt_fdnum = -1; +static char *opt_socket_path; +static char *opt_render_node; +static gboolean opt_virgl; + +static void vg_handle_ctrl(VuDev *dev, int qidx); +static void vg_cleanup_mapping(VuGpu *g, + struct virtio_gpu_simple_resource *res); + +static const char * +vg_cmd_to_string(int cmd) +{ +#define CMD(cmd) [cmd] = #cmd + static const char *vg_cmd_str[] = { + CMD(VIRTIO_GPU_UNDEFINED), + + /* 2d commands */ + CMD(VIRTIO_GPU_CMD_GET_DISPLAY_INFO), + CMD(VIRTIO_GPU_CMD_RESOURCE_CREATE_2D), + CMD(VIRTIO_GPU_CMD_RESOURCE_UNREF), + CMD(VIRTIO_GPU_CMD_SET_SCANOUT), + CMD(VIRTIO_GPU_CMD_RESOURCE_FLUSH), + CMD(VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D), + CMD(VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING), + CMD(VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING), + CMD(VIRTIO_GPU_CMD_GET_CAPSET_INFO), + CMD(VIRTIO_GPU_CMD_GET_CAPSET), + + /* 3d commands */ + CMD(VIRTIO_GPU_CMD_CTX_CREATE), + CMD(VIRTIO_GPU_CMD_CTX_DESTROY), + CMD(VIRTIO_GPU_CMD_CTX_ATTACH_RESOURCE), + CMD(VIRTIO_GPU_CMD_CTX_DETACH_RESOURCE), + CMD(VIRTIO_GPU_CMD_RESOURCE_CREATE_3D), + CMD(VIRTIO_GPU_CMD_TRANSFER_TO_HOST_3D), + CMD(VIRTIO_GPU_CMD_TRANSFER_FROM_HOST_3D), + CMD(VIRTIO_GPU_CMD_SUBMIT_3D), + + /* cursor commands */ + CMD(VIRTIO_GPU_CMD_UPDATE_CURSOR), + CMD(VIRTIO_GPU_CMD_MOVE_CURSOR), + }; +#undef REQ + + if (cmd >= 0 && cmd < G_N_ELEMENTS(vg_cmd_str)) { + return vg_cmd_str[cmd]; + } else { + return "unknown"; + } +} + +static int +vg_sock_fd_read(int sock, void *buf, ssize_t buflen) +{ + int ret; + + do { + ret = read(sock, buf, buflen); + } while (ret < 0 && (errno == EINTR || errno == EAGAIN)); + + g_warn_if_fail(ret == buflen); + return ret; +} + +static void +vg_sock_fd_close(VuGpu *g) +{ + if (g->sock_fd >= 0) { + close(g->sock_fd); + g->sock_fd = -1; + } +} + +static gboolean +source_wait_cb(gint fd, GIOCondition condition, gpointer user_data) +{ + VuGpu *g = user_data; + + if (!vg_recv_msg(g, VHOST_USER_GPU_DMABUF_UPDATE, 0, NULL)) { + return G_SOURCE_CONTINUE; + } + + /* resume */ + g->wait_in = 0; + vg_handle_ctrl(&g->dev.parent, 0); + + return G_SOURCE_REMOVE; +} + +void +vg_wait_ok(VuGpu *g) +{ + assert(g->wait_in == 0); + g->wait_in = g_unix_fd_add(g->sock_fd, G_IO_IN | G_IO_HUP, + source_wait_cb, g); +} + +static int +vg_sock_fd_write(int sock, const void *buf, ssize_t buflen, int fd) +{ + ssize_t ret; + struct iovec iov = { + .iov_base = (void *)buf, + .iov_len = buflen, + }; + struct msghdr msg = { + .msg_iov = &iov, + .msg_iovlen = 1, + }; + union { + struct cmsghdr cmsghdr; + char control[CMSG_SPACE(sizeof(int))]; + } cmsgu; + struct cmsghdr *cmsg; + + if (fd != -1) { + msg.msg_control = cmsgu.control; + msg.msg_controllen = sizeof(cmsgu.control); + + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_len = CMSG_LEN(sizeof(int)); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + + *((int *)CMSG_DATA(cmsg)) = fd; + } + + do { + ret = sendmsg(sock, &msg, 0); + } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); + + g_warn_if_fail(ret == buflen); + return ret; +} + +void +vg_send_msg(VuGpu *vg, const VhostUserGpuMsg *msg, int fd) +{ + if (vg_sock_fd_write(vg->sock_fd, msg, + VHOST_USER_GPU_HDR_SIZE + msg->size, fd) < 0) { + vg_sock_fd_close(vg); + } +} + +bool +vg_recv_msg(VuGpu *g, uint32_t expect_req, uint32_t expect_size, + gpointer payload) +{ + uint32_t req, flags, size; + + if (vg_sock_fd_read(g->sock_fd, &req, sizeof(req)) < 0 || + vg_sock_fd_read(g->sock_fd, &flags, sizeof(flags)) < 0 || + vg_sock_fd_read(g->sock_fd, &size, sizeof(size)) < 0) { + goto err; + } + + g_return_val_if_fail(req == expect_req, false); + g_return_val_if_fail(flags & VHOST_USER_GPU_MSG_FLAG_REPLY, false); + g_return_val_if_fail(size == expect_size, false); + + if (size && vg_sock_fd_read(g->sock_fd, payload, size) != size) { + goto err; + } + + return true; + +err: + vg_sock_fd_close(g); + return false; +} + +static struct virtio_gpu_simple_resource * +virtio_gpu_find_resource(VuGpu *g, uint32_t resource_id) +{ + struct virtio_gpu_simple_resource *res; + + QTAILQ_FOREACH(res, &g->reslist, next) { + if (res->resource_id == resource_id) { + return res; + } + } + return NULL; +} + +void +vg_ctrl_response(VuGpu *g, + struct virtio_gpu_ctrl_command *cmd, + struct virtio_gpu_ctrl_hdr *resp, + size_t resp_len) +{ + size_t s; + + if (cmd->cmd_hdr.flags & VIRTIO_GPU_FLAG_FENCE) { + resp->flags |= VIRTIO_GPU_FLAG_FENCE; + resp->fence_id = cmd->cmd_hdr.fence_id; + resp->ctx_id = cmd->cmd_hdr.ctx_id; + } + virtio_gpu_ctrl_hdr_bswap(resp); + s = iov_from_buf(cmd->elem.in_sg, cmd->elem.in_num, 0, resp, resp_len); + if (s != resp_len) { + g_critical("%s: response size incorrect %zu vs %zu", + __func__, s, resp_len); + } + vu_queue_push(&g->dev.parent, cmd->vq, &cmd->elem, s); + vu_queue_notify(&g->dev.parent, cmd->vq); + cmd->state = VG_CMD_STATE_FINISHED; +} + +void +vg_ctrl_response_nodata(VuGpu *g, + struct virtio_gpu_ctrl_command *cmd, + enum virtio_gpu_ctrl_type type) +{ + struct virtio_gpu_ctrl_hdr resp = { + .type = type, + }; + + vg_ctrl_response(g, cmd, &resp, sizeof(resp)); +} + + +static gboolean +get_display_info_cb(gint fd, GIOCondition condition, gpointer user_data) +{ + struct virtio_gpu_resp_display_info dpy_info = { {} }; + VuGpu *vg = user_data; + struct virtio_gpu_ctrl_command *cmd = QTAILQ_LAST(&vg->fenceq); + + g_debug("disp info cb"); + assert(cmd->cmd_hdr.type == VIRTIO_GPU_CMD_GET_DISPLAY_INFO); + if (!vg_recv_msg(vg, VHOST_USER_GPU_GET_DISPLAY_INFO, + sizeof(dpy_info), &dpy_info)) { + return G_SOURCE_CONTINUE; + } + + QTAILQ_REMOVE(&vg->fenceq, cmd, next); + vg_ctrl_response(vg, cmd, &dpy_info.hdr, sizeof(dpy_info)); + + vg->wait_in = 0; + vg_handle_ctrl(&vg->dev.parent, 0); + + return G_SOURCE_REMOVE; +} + +void +vg_get_display_info(VuGpu *vg, struct virtio_gpu_ctrl_command *cmd) +{ + VhostUserGpuMsg msg = { + .request = VHOST_USER_GPU_GET_DISPLAY_INFO, + .size = 0, + }; + + assert(vg->wait_in == 0); + + vg_send_msg(vg, &msg, -1); + vg->wait_in = g_unix_fd_add(vg->sock_fd, G_IO_IN | G_IO_HUP, + get_display_info_cb, vg); + cmd->state = VG_CMD_STATE_PENDING; +} + +static gboolean +get_edid_cb(gint fd, GIOCondition condition, gpointer user_data) +{ + struct virtio_gpu_resp_edid resp_edid; + VuGpu *vg = user_data; + struct virtio_gpu_ctrl_command *cmd = QTAILQ_LAST(&vg->fenceq); + + g_debug("get edid cb"); + assert(cmd->cmd_hdr.type == VIRTIO_GPU_CMD_GET_EDID); + if (!vg_recv_msg(vg, VHOST_USER_GPU_GET_EDID, + sizeof(resp_edid), &resp_edid)) { + return G_SOURCE_CONTINUE; + } + + QTAILQ_REMOVE(&vg->fenceq, cmd, next); + vg_ctrl_response(vg, cmd, &resp_edid.hdr, sizeof(resp_edid)); + + vg->wait_in = 0; + vg_handle_ctrl(&vg->dev.parent, 0); + + return G_SOURCE_REMOVE; +} + +void +vg_get_edid(VuGpu *vg, struct virtio_gpu_ctrl_command *cmd) +{ + struct virtio_gpu_cmd_get_edid get_edid; + + VUGPU_FILL_CMD(get_edid); + virtio_gpu_bswap_32(&get_edid, sizeof(get_edid)); + + VhostUserGpuMsg msg = { + .request = VHOST_USER_GPU_GET_EDID, + .size = sizeof(VhostUserGpuEdidRequest), + .payload.edid_req = { + .scanout_id = get_edid.scanout, + }, + }; + + assert(vg->wait_in == 0); + + vg_send_msg(vg, &msg, -1); + vg->wait_in = g_unix_fd_add(vg->sock_fd, G_IO_IN | G_IO_HUP, + get_edid_cb, vg); + cmd->state = VG_CMD_STATE_PENDING; +} + +static void +vg_resource_create_2d(VuGpu *g, + struct virtio_gpu_ctrl_command *cmd) +{ + pixman_format_code_t pformat; + struct virtio_gpu_simple_resource *res; + struct virtio_gpu_resource_create_2d c2d; + + VUGPU_FILL_CMD(c2d); + virtio_gpu_bswap_32(&c2d, sizeof(c2d)); + + if (c2d.resource_id == 0) { + g_critical("%s: resource id 0 is not allowed", __func__); + cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; + return; + } + + res = virtio_gpu_find_resource(g, c2d.resource_id); + if (res) { + g_critical("%s: resource already exists %d", __func__, c2d.resource_id); + cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; + return; + } + + res = g_new0(struct virtio_gpu_simple_resource, 1); + res->width = c2d.width; + res->height = c2d.height; + res->format = c2d.format; + res->resource_id = c2d.resource_id; + + pformat = virtio_gpu_get_pixman_format(c2d.format); + if (!pformat) { + g_critical("%s: host couldn't handle guest format %d", + __func__, c2d.format); + g_free(res); + cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; + return; + } + vugbm_buffer_create(&res->buffer, &g->gdev, c2d.width, c2d.height); + res->image = pixman_image_create_bits(pformat, + c2d.width, + c2d.height, + (uint32_t *)res->buffer.mmap, + res->buffer.stride); + if (!res->image) { + g_critical("%s: resource creation failed %d %d %d", + __func__, c2d.resource_id, c2d.width, c2d.height); + vugbm_buffer_destroy(&res->buffer); + g_free(res); + cmd->error = VIRTIO_GPU_RESP_ERR_OUT_OF_MEMORY; + return; + } + + QTAILQ_INSERT_HEAD(&g->reslist, res, next); +} + +static void +vg_disable_scanout(VuGpu *g, int scanout_id) +{ + struct virtio_gpu_scanout *scanout = &g->scanout[scanout_id]; + struct virtio_gpu_simple_resource *res; + + if (scanout->resource_id == 0) { + return; + } + + res = virtio_gpu_find_resource(g, scanout->resource_id); + if (res) { + res->scanout_bitmask &= ~(1 << scanout_id); + } + + scanout->width = 0; + scanout->height = 0; + + if (g->sock_fd >= 0) { + VhostUserGpuMsg msg = { + .request = VHOST_USER_GPU_SCANOUT, + .size = sizeof(VhostUserGpuScanout), + .payload.scanout.scanout_id = scanout_id, + }; + vg_send_msg(g, &msg, -1); + } +} + +static void +vg_resource_destroy(VuGpu *g, + struct virtio_gpu_simple_resource *res) +{ + int i; + + if (res->scanout_bitmask) { + for (i = 0; i < VIRTIO_GPU_MAX_SCANOUTS; i++) { + if (res->scanout_bitmask & (1 << i)) { + vg_disable_scanout(g, i); + } + } + } + + vugbm_buffer_destroy(&res->buffer); + vg_cleanup_mapping(g, res); + pixman_image_unref(res->image); + QTAILQ_REMOVE(&g->reslist, res, next); + g_free(res); +} + +static void +vg_resource_unref(VuGpu *g, + struct virtio_gpu_ctrl_command *cmd) +{ + struct virtio_gpu_simple_resource *res; + struct virtio_gpu_resource_unref unref; + + VUGPU_FILL_CMD(unref); + virtio_gpu_bswap_32(&unref, sizeof(unref)); + + res = virtio_gpu_find_resource(g, unref.resource_id); + if (!res) { + g_critical("%s: illegal resource specified %d", + __func__, unref.resource_id); + cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; + return; + } + vg_resource_destroy(g, res); +} + +int +vg_create_mapping_iov(VuGpu *g, + struct virtio_gpu_resource_attach_backing *ab, + struct virtio_gpu_ctrl_command *cmd, + struct iovec **iov) +{ + struct virtio_gpu_mem_entry *ents; + size_t esize, s; + int i; + + if (ab->nr_entries > 16384) { + g_critical("%s: nr_entries is too big (%d > 16384)", + __func__, ab->nr_entries); + return -1; + } + + esize = sizeof(*ents) * ab->nr_entries; + ents = g_malloc(esize); + s = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, + sizeof(*ab), ents, esize); + if (s != esize) { + g_critical("%s: command data size incorrect %zu vs %zu", + __func__, s, esize); + g_free(ents); + return -1; + } + + *iov = g_new0(struct iovec, ab->nr_entries); + for (i = 0; i < ab->nr_entries; i++) { + uint64_t len = ents[i].length; + (*iov)[i].iov_len = ents[i].length; + (*iov)[i].iov_base = vu_gpa_to_va(&g->dev.parent, &len, ents[i].addr); + if (!(*iov)[i].iov_base || len != ents[i].length) { + g_critical("%s: resource %d element %d", + __func__, ab->resource_id, i); + g_free(*iov); + g_free(ents); + *iov = NULL; + return -1; + } + } + g_free(ents); + return 0; +} + +static void +vg_resource_attach_backing(VuGpu *g, + struct virtio_gpu_ctrl_command *cmd) +{ + struct virtio_gpu_simple_resource *res; + struct virtio_gpu_resource_attach_backing ab; + int ret; + + VUGPU_FILL_CMD(ab); + virtio_gpu_bswap_32(&ab, sizeof(ab)); + + res = virtio_gpu_find_resource(g, ab.resource_id); + if (!res) { + g_critical("%s: illegal resource specified %d", + __func__, ab.resource_id); + cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; + return; + } + + if (res->iov) { + cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC; + return; + } + + ret = vg_create_mapping_iov(g, &ab, cmd, &res->iov); + if (ret != 0) { + cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC; + return; + } + + res->iov_cnt = ab.nr_entries; +} + +/* Though currently only free iov, maybe later will do more work. */ +void vg_cleanup_mapping_iov(VuGpu *g, + struct iovec *iov, uint32_t count) +{ + g_free(iov); +} + +static void +vg_cleanup_mapping(VuGpu *g, + struct virtio_gpu_simple_resource *res) +{ + vg_cleanup_mapping_iov(g, res->iov, res->iov_cnt); + res->iov = NULL; + res->iov_cnt = 0; +} + +static void +vg_resource_detach_backing(VuGpu *g, + struct virtio_gpu_ctrl_command *cmd) +{ + struct virtio_gpu_simple_resource *res; + struct virtio_gpu_resource_detach_backing detach; + + VUGPU_FILL_CMD(detach); + virtio_gpu_bswap_32(&detach, sizeof(detach)); + + res = virtio_gpu_find_resource(g, detach.resource_id); + if (!res || !res->iov) { + g_critical("%s: illegal resource specified %d", + __func__, detach.resource_id); + cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; + return; + } + + vg_cleanup_mapping(g, res); +} + +static void +vg_transfer_to_host_2d(VuGpu *g, + struct virtio_gpu_ctrl_command *cmd) +{ + struct virtio_gpu_simple_resource *res; + int h; + uint32_t src_offset, dst_offset, stride; + int bpp; + pixman_format_code_t format; + struct virtio_gpu_transfer_to_host_2d t2d; + + VUGPU_FILL_CMD(t2d); + virtio_gpu_t2d_bswap(&t2d); + + res = virtio_gpu_find_resource(g, t2d.resource_id); + if (!res || !res->iov) { + g_critical("%s: illegal resource specified %d", + __func__, t2d.resource_id); + cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; + return; + } + + if (t2d.r.x > res->width || + t2d.r.y > res->height || + t2d.r.width > res->width || + t2d.r.height > res->height || + t2d.r.x + t2d.r.width > res->width || + t2d.r.y + t2d.r.height > res->height) { + g_critical("%s: transfer bounds outside resource" + " bounds for resource %d: %d %d %d %d vs %d %d", + __func__, t2d.resource_id, t2d.r.x, t2d.r.y, + t2d.r.width, t2d.r.height, res->width, res->height); + cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; + return; + } + + format = pixman_image_get_format(res->image); + bpp = (PIXMAN_FORMAT_BPP(format) + 7) / 8; + stride = pixman_image_get_stride(res->image); + + if (t2d.offset || t2d.r.x || t2d.r.y || + t2d.r.width != pixman_image_get_width(res->image)) { + void *img_data = pixman_image_get_data(res->image); + for (h = 0; h < t2d.r.height; h++) { + src_offset = t2d.offset + stride * h; + dst_offset = (t2d.r.y + h) * stride + (t2d.r.x * bpp); + + iov_to_buf(res->iov, res->iov_cnt, src_offset, + img_data + + dst_offset, t2d.r.width * bpp); + } + } else { + iov_to_buf(res->iov, res->iov_cnt, 0, + pixman_image_get_data(res->image), + pixman_image_get_stride(res->image) + * pixman_image_get_height(res->image)); + } +} + +static void +vg_set_scanout(VuGpu *g, + struct virtio_gpu_ctrl_command *cmd) +{ + struct virtio_gpu_simple_resource *res, *ores; + struct virtio_gpu_scanout *scanout; + struct virtio_gpu_set_scanout ss; + int fd; + + VUGPU_FILL_CMD(ss); + virtio_gpu_bswap_32(&ss, sizeof(ss)); + + if (ss.scanout_id >= VIRTIO_GPU_MAX_SCANOUTS) { + g_critical("%s: illegal scanout id specified %d", + __func__, ss.scanout_id); + cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_SCANOUT_ID; + return; + } + + if (ss.resource_id == 0) { + vg_disable_scanout(g, ss.scanout_id); + return; + } + + /* create a surface for this scanout */ + res = virtio_gpu_find_resource(g, ss.resource_id); + if (!res) { + g_critical("%s: illegal resource specified %d", + __func__, ss.resource_id); + cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; + return; + } + + if (ss.r.x > res->width || + ss.r.y > res->height || + ss.r.width > res->width || + ss.r.height > res->height || + ss.r.x + ss.r.width > res->width || + ss.r.y + ss.r.height > res->height) { + g_critical("%s: illegal scanout %d bounds for" + " resource %d, (%d,%d)+%d,%d vs %d %d", + __func__, ss.scanout_id, ss.resource_id, ss.r.x, ss.r.y, + ss.r.width, ss.r.height, res->width, res->height); + cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; + return; + } + + scanout = &g->scanout[ss.scanout_id]; + + ores = virtio_gpu_find_resource(g, scanout->resource_id); + if (ores) { + ores->scanout_bitmask &= ~(1 << ss.scanout_id); + } + + res->scanout_bitmask |= (1 << ss.scanout_id); + scanout->resource_id = ss.resource_id; + scanout->x = ss.r.x; + scanout->y = ss.r.y; + scanout->width = ss.r.width; + scanout->height = ss.r.height; + + struct vugbm_buffer *buffer = &res->buffer; + + if (vugbm_buffer_can_get_dmabuf_fd(buffer)) { + VhostUserGpuMsg msg = { + .request = VHOST_USER_GPU_DMABUF_SCANOUT, + .size = sizeof(VhostUserGpuDMABUFScanout), + .payload.dmabuf_scanout = (VhostUserGpuDMABUFScanout) { + .scanout_id = ss.scanout_id, + .x = ss.r.x, + .y = ss.r.y, + .width = ss.r.width, + .height = ss.r.height, + .fd_width = buffer->width, + .fd_height = buffer->height, + .fd_stride = buffer->stride, + .fd_drm_fourcc = buffer->format + } + }; + + if (vugbm_buffer_get_dmabuf_fd(buffer, &fd)) { + vg_send_msg(g, &msg, fd); + close(fd); + } + } else { + VhostUserGpuMsg msg = { + .request = VHOST_USER_GPU_SCANOUT, + .size = sizeof(VhostUserGpuScanout), + .payload.scanout = (VhostUserGpuScanout) { + .scanout_id = ss.scanout_id, + .width = scanout->width, + .height = scanout->height + } + }; + vg_send_msg(g, &msg, -1); + } +} + +static void +vg_resource_flush(VuGpu *g, + struct virtio_gpu_ctrl_command *cmd) +{ + struct virtio_gpu_simple_resource *res; + struct virtio_gpu_resource_flush rf; + pixman_region16_t flush_region; + int i; + + VUGPU_FILL_CMD(rf); + virtio_gpu_bswap_32(&rf, sizeof(rf)); + + res = virtio_gpu_find_resource(g, rf.resource_id); + if (!res) { + g_critical("%s: illegal resource specified %d\n", + __func__, rf.resource_id); + cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; + return; + } + + if (rf.r.x > res->width || + rf.r.y > res->height || + rf.r.width > res->width || + rf.r.height > res->height || + rf.r.x + rf.r.width > res->width || + rf.r.y + rf.r.height > res->height) { + g_critical("%s: flush bounds outside resource" + " bounds for resource %d: %d %d %d %d vs %d %d\n", + __func__, rf.resource_id, rf.r.x, rf.r.y, + rf.r.width, rf.r.height, res->width, res->height); + cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; + return; + } + + pixman_region_init_rect(&flush_region, + rf.r.x, rf.r.y, rf.r.width, rf.r.height); + for (i = 0; i < VIRTIO_GPU_MAX_SCANOUTS; i++) { + struct virtio_gpu_scanout *scanout; + pixman_region16_t region, finalregion; + pixman_box16_t *extents; + + if (!(res->scanout_bitmask & (1 << i))) { + continue; + } + scanout = &g->scanout[i]; + + pixman_region_init(&finalregion); + pixman_region_init_rect(®ion, scanout->x, scanout->y, + scanout->width, scanout->height); + + pixman_region_intersect(&finalregion, &flush_region, ®ion); + + extents = pixman_region_extents(&finalregion); + size_t width = extents->x2 - extents->x1; + size_t height = extents->y2 - extents->y1; + + if (vugbm_buffer_can_get_dmabuf_fd(&res->buffer)) { + VhostUserGpuMsg vmsg = { + .request = VHOST_USER_GPU_DMABUF_UPDATE, + .size = sizeof(VhostUserGpuUpdate), + .payload.update = (VhostUserGpuUpdate) { + .scanout_id = i, + .x = extents->x1, + .y = extents->y1, + .width = width, + .height = height, + } + }; + vg_send_msg(g, &vmsg, -1); + vg_wait_ok(g); + } else { + size_t bpp = + PIXMAN_FORMAT_BPP(pixman_image_get_format(res->image)) / 8; + size_t size = width * height * bpp; + + void *p = g_malloc(VHOST_USER_GPU_HDR_SIZE + + sizeof(VhostUserGpuUpdate) + size); + VhostUserGpuMsg *msg = p; + msg->request = VHOST_USER_GPU_UPDATE; + msg->size = sizeof(VhostUserGpuUpdate) + size; + msg->payload.update = (VhostUserGpuUpdate) { + .scanout_id = i, + .x = extents->x1, + .y = extents->y1, + .width = width, + .height = height, + }; + pixman_image_t *img = + pixman_image_create_bits(pixman_image_get_format(res->image), + msg->payload.update.width, + msg->payload.update.height, + p + offsetof(VhostUserGpuMsg, + payload.update.data), + width * bpp); + pixman_image_composite(PIXMAN_OP_SRC, + res->image, NULL, img, + extents->x1, extents->y1, + 0, 0, 0, 0, + width, height); + pixman_image_unref(img); + vg_send_msg(g, msg, -1); + g_free(msg); + } + pixman_region_fini(®ion); + pixman_region_fini(&finalregion); + } + pixman_region_fini(&flush_region); +} + +static void +vg_process_cmd(VuGpu *vg, struct virtio_gpu_ctrl_command *cmd) +{ + switch (cmd->cmd_hdr.type) { + case VIRTIO_GPU_CMD_GET_DISPLAY_INFO: + vg_get_display_info(vg, cmd); + break; + case VIRTIO_GPU_CMD_RESOURCE_CREATE_2D: + vg_resource_create_2d(vg, cmd); + break; + case VIRTIO_GPU_CMD_RESOURCE_UNREF: + vg_resource_unref(vg, cmd); + break; + case VIRTIO_GPU_CMD_RESOURCE_FLUSH: + vg_resource_flush(vg, cmd); + break; + case VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D: + vg_transfer_to_host_2d(vg, cmd); + break; + case VIRTIO_GPU_CMD_SET_SCANOUT: + vg_set_scanout(vg, cmd); + break; + case VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING: + vg_resource_attach_backing(vg, cmd); + break; + case VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING: + vg_resource_detach_backing(vg, cmd); + break; + case VIRTIO_GPU_CMD_GET_EDID: + vg_get_edid(vg, cmd); + break; + default: + g_warning("TODO handle ctrl %x\n", cmd->cmd_hdr.type); + cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC; + break; + } + if (cmd->state == VG_CMD_STATE_NEW) { + vg_ctrl_response_nodata(vg, cmd, cmd->error ? cmd->error : + VIRTIO_GPU_RESP_OK_NODATA); + } +} + +static void +vg_handle_ctrl(VuDev *dev, int qidx) +{ + VuGpu *vg = container_of(dev, VuGpu, dev.parent); + VuVirtq *vq = vu_get_queue(dev, qidx); + struct virtio_gpu_ctrl_command *cmd = NULL; + size_t len; + + for (;;) { + if (vg->wait_in != 0) { + return; + } + + cmd = vu_queue_pop(dev, vq, sizeof(struct virtio_gpu_ctrl_command)); + if (!cmd) { + break; + } + cmd->vq = vq; + cmd->error = 0; + cmd->state = VG_CMD_STATE_NEW; + + len = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, + 0, &cmd->cmd_hdr, sizeof(cmd->cmd_hdr)); + if (len != sizeof(cmd->cmd_hdr)) { + g_warning("%s: command size incorrect %zu vs %zu\n", + __func__, len, sizeof(cmd->cmd_hdr)); + } + + virtio_gpu_ctrl_hdr_bswap(&cmd->cmd_hdr); + g_debug("%d %s\n", cmd->cmd_hdr.type, + vg_cmd_to_string(cmd->cmd_hdr.type)); + + if (vg->virgl) { + vg_virgl_process_cmd(vg, cmd); + } else { + vg_process_cmd(vg, cmd); + } + + if (cmd->state != VG_CMD_STATE_FINISHED) { + QTAILQ_INSERT_TAIL(&vg->fenceq, cmd, next); + vg->inflight++; + } else { + free(cmd); + } + } +} + +static void +update_cursor_data_simple(VuGpu *g, uint32_t resource_id, gpointer data) +{ + struct virtio_gpu_simple_resource *res; + + res = virtio_gpu_find_resource(g, resource_id); + g_return_if_fail(res != NULL); + g_return_if_fail(pixman_image_get_width(res->image) == 64); + g_return_if_fail(pixman_image_get_height(res->image) == 64); + g_return_if_fail( + PIXMAN_FORMAT_BPP(pixman_image_get_format(res->image)) == 32); + + memcpy(data, pixman_image_get_data(res->image), 64 * 64 * sizeof(uint32_t)); +} + +static void +vg_process_cursor_cmd(VuGpu *g, struct virtio_gpu_update_cursor *cursor) +{ + switch (cursor->hdr.type) { + case VIRTIO_GPU_CMD_MOVE_CURSOR: { + VhostUserGpuMsg msg = { + .request = cursor->resource_id ? + VHOST_USER_GPU_CURSOR_POS : VHOST_USER_GPU_CURSOR_POS_HIDE, + .size = sizeof(VhostUserGpuCursorPos), + .payload.cursor_pos = { + .scanout_id = cursor->pos.scanout_id, + .x = cursor->pos.x, + .y = cursor->pos.y, + } + }; + g_debug("%s: move", G_STRFUNC); + vg_send_msg(g, &msg, -1); + break; + } + case VIRTIO_GPU_CMD_UPDATE_CURSOR: { + VhostUserGpuMsg msg = { + .request = VHOST_USER_GPU_CURSOR_UPDATE, + .size = sizeof(VhostUserGpuCursorUpdate), + .payload.cursor_update = { + .pos = { + .scanout_id = cursor->pos.scanout_id, + .x = cursor->pos.x, + .y = cursor->pos.y, + }, + .hot_x = cursor->hot_x, + .hot_y = cursor->hot_y, + } + }; + g_debug("%s: update", G_STRFUNC); + if (g->virgl) { + vg_virgl_update_cursor_data(g, cursor->resource_id, + msg.payload.cursor_update.data); + } else { + update_cursor_data_simple(g, cursor->resource_id, + msg.payload.cursor_update.data); + } + vg_send_msg(g, &msg, -1); + break; + } + default: + g_debug("%s: unknown cmd %d", G_STRFUNC, cursor->hdr.type); + break; + } +} + +static void +vg_handle_cursor(VuDev *dev, int qidx) +{ + VuGpu *g = container_of(dev, VuGpu, dev.parent); + VuVirtq *vq = vu_get_queue(dev, qidx); + VuVirtqElement *elem; + size_t len; + struct virtio_gpu_update_cursor cursor; + + for (;;) { + elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement)); + if (!elem) { + break; + } + g_debug("cursor out:%d in:%d\n", elem->out_num, elem->in_num); + + len = iov_to_buf(elem->out_sg, elem->out_num, + 0, &cursor, sizeof(cursor)); + if (len != sizeof(cursor)) { + g_warning("%s: cursor size incorrect %zu vs %zu\n", + __func__, len, sizeof(cursor)); + } else { + virtio_gpu_bswap_32(&cursor, sizeof(cursor)); + vg_process_cursor_cmd(g, &cursor); + } + vu_queue_push(dev, vq, elem, 0); + vu_queue_notify(dev, vq); + free(elem); + } +} + +static void +vg_panic(VuDev *dev, const char *msg) +{ + g_critical("%s\n", msg); + exit(1); +} + +static void +vg_queue_set_started(VuDev *dev, int qidx, bool started) +{ + VuVirtq *vq = vu_get_queue(dev, qidx); + + g_debug("queue started %d:%d\n", qidx, started); + + switch (qidx) { + case 0: + vu_set_queue_handler(dev, vq, started ? vg_handle_ctrl : NULL); + break; + case 1: + vu_set_queue_handler(dev, vq, started ? vg_handle_cursor : NULL); + break; + default: + break; + } +} + +static gboolean +protocol_features_cb(gint fd, GIOCondition condition, gpointer user_data) +{ + const uint64_t protocol_edid = (1 << VHOST_USER_GPU_PROTOCOL_F_EDID); + const uint64_t protocol_dmabuf2 = (1 << VHOST_USER_GPU_PROTOCOL_F_DMABUF2); + VuGpu *g = user_data; + uint64_t protocol_features; + VhostUserGpuMsg msg = { + .request = VHOST_USER_GPU_GET_PROTOCOL_FEATURES + }; + + if (!vg_recv_msg(g, msg.request, + sizeof(protocol_features), &protocol_features)) { + return G_SOURCE_CONTINUE; + } + + protocol_features &= (protocol_edid | protocol_dmabuf2); + + msg = (VhostUserGpuMsg) { + .request = VHOST_USER_GPU_SET_PROTOCOL_FEATURES, + .size = sizeof(uint64_t), + .payload.u64 = protocol_features, + }; + vg_send_msg(g, &msg, -1); + + g->wait_in = 0; + vg_handle_ctrl(&g->dev.parent, 0); + + if (g->edid_inited && !(protocol_features & protocol_edid)) { + g_printerr("EDID feature set by the frontend but it does not support " + "the EDID vhost-user-gpu protocol.\n"); + exit(EXIT_FAILURE); + } + + g->use_modifiers = !!(protocol_features & protocol_dmabuf2); + + return G_SOURCE_REMOVE; +} + +static void +set_gpu_protocol_features(VuGpu *g) +{ + VhostUserGpuMsg msg = { + .request = VHOST_USER_GPU_GET_PROTOCOL_FEATURES, + }; + + vg_send_msg(g, &msg, -1); + assert(g->wait_in == 0); + g->wait_in = g_unix_fd_add(g->sock_fd, G_IO_IN | G_IO_HUP, + protocol_features_cb, g); +} + +static int +vg_process_msg(VuDev *dev, VhostUserMsg *msg, int *do_reply) +{ + VuGpu *g = container_of(dev, VuGpu, dev.parent); + + switch (msg->request) { + case VHOST_USER_GPU_SET_SOCKET: { + g_return_val_if_fail(msg->fd_num == 1, 1); + g_return_val_if_fail(g->sock_fd == -1, 1); + g->sock_fd = msg->fds[0]; + set_gpu_protocol_features(g); + return 1; + } + default: + return 0; + } + + return 0; +} + +static uint64_t +vg_get_features(VuDev *dev) +{ + uint64_t features = 0; + + if (opt_virgl) { + features |= 1 << VIRTIO_GPU_F_VIRGL; + } + features |= 1 << VIRTIO_GPU_F_EDID; + + return features; +} + +static void +vg_set_features(VuDev *dev, uint64_t features) +{ + VuGpu *g = container_of(dev, VuGpu, dev.parent); + bool virgl = features & (1 << VIRTIO_GPU_F_VIRGL); + + if (virgl && !g->virgl_inited) { + if (!vg_virgl_init(g)) { + vg_panic(dev, "Failed to initialize virgl"); + } + g->virgl_inited = true; + } + + g->edid_inited = !!(features & (1 << VIRTIO_GPU_F_EDID)); + + g->virgl = virgl; +} + +static int +vg_get_config(VuDev *dev, uint8_t *config, uint32_t len) +{ + VuGpu *g = container_of(dev, VuGpu, dev.parent); + + if (len > sizeof(struct virtio_gpu_config)) { + return -1; + } + + if (opt_virgl) { + g->virtio_config.num_capsets = vg_virgl_get_num_capsets(); + } + + memcpy(config, &g->virtio_config, len); + + return 0; +} + +static int +vg_set_config(VuDev *dev, const uint8_t *data, + uint32_t offset, uint32_t size, + uint32_t flags) +{ + VuGpu *g = container_of(dev, VuGpu, dev.parent); + struct virtio_gpu_config *config = (struct virtio_gpu_config *)data; + + if (config->events_clear) { + g->virtio_config.events_read &= ~config->events_clear; + } + + return 0; +} + +static const VuDevIface vuiface = { + .set_features = vg_set_features, + .get_features = vg_get_features, + .queue_set_started = vg_queue_set_started, + .process_msg = vg_process_msg, + .get_config = vg_get_config, + .set_config = vg_set_config, +}; + +static void +vg_destroy(VuGpu *g) +{ + struct virtio_gpu_simple_resource *res, *tmp; + + vug_deinit(&g->dev); + + vg_sock_fd_close(g); + + QTAILQ_FOREACH_SAFE(res, &g->reslist, next, tmp) { + vg_resource_destroy(g, res); + } + + vugbm_device_destroy(&g->gdev); +} + +static GOptionEntry entries[] = { + { "print-capabilities", 'c', 0, G_OPTION_ARG_NONE, &opt_print_caps, + "Print capabilities", NULL }, + { "fd", 'f', 0, G_OPTION_ARG_INT, &opt_fdnum, + "Use inherited fd socket", "FDNUM" }, + { "socket-path", 's', 0, G_OPTION_ARG_FILENAME, &opt_socket_path, + "Use UNIX socket path", "PATH" }, + { "render-node", 'r', 0, G_OPTION_ARG_FILENAME, &opt_render_node, + "Specify DRM render node", "PATH" }, + { "virgl", 'v', 0, G_OPTION_ARG_NONE, &opt_virgl, + "Turn virgl rendering on", NULL }, + { NULL, } +}; + +int +main(int argc, char *argv[]) +{ + GOptionContext *context; + GError *error = NULL; + GMainLoop *loop = NULL; + int fd; + VuGpu g = { .sock_fd = -1, .drm_rnode_fd = -1 }; + + QTAILQ_INIT(&g.reslist); + QTAILQ_INIT(&g.fenceq); + + context = g_option_context_new("QEMU vhost-user-gpu"); + g_option_context_add_main_entries(context, entries, NULL); + if (!g_option_context_parse(context, &argc, &argv, &error)) { + g_printerr("Option parsing failed: %s\n", error->message); + exit(EXIT_FAILURE); + } + g_option_context_free(context); + + if (opt_print_caps) { + g_print("{\n"); + g_print(" \"type\": \"gpu\",\n"); + g_print(" \"features\": [\n"); + g_print(" \"render-node\",\n"); + g_print(" \"virgl\"\n"); + g_print(" ]\n"); + g_print("}\n"); + exit(EXIT_SUCCESS); + } + + g.drm_rnode_fd = qemu_drm_rendernode_open(opt_render_node); + if (opt_render_node && g.drm_rnode_fd == -1) { + g_printerr("Failed to open DRM rendernode.\n"); + exit(EXIT_FAILURE); + } + + vugbm_device_init(&g.gdev, g.drm_rnode_fd); + + if ((!!opt_socket_path + (opt_fdnum != -1)) != 1) { + g_printerr("Please specify either --fd or --socket-path\n"); + exit(EXIT_FAILURE); + } + + if (opt_socket_path) { + int lsock = unix_listen(opt_socket_path, &error_fatal); + if (lsock < 0) { + g_printerr("Failed to listen on %s.\n", opt_socket_path); + exit(EXIT_FAILURE); + } + fd = accept(lsock, NULL, NULL); + close(lsock); + } else { + fd = opt_fdnum; + } + if (fd == -1) { + g_printerr("Invalid vhost-user socket.\n"); + exit(EXIT_FAILURE); + } + + if (!vug_init(&g.dev, VHOST_USER_GPU_MAX_QUEUES, fd, vg_panic, &vuiface)) { + g_printerr("Failed to initialize libvhost-user-glib.\n"); + exit(EXIT_FAILURE); + } + + loop = g_main_loop_new(NULL, FALSE); + g_main_loop_run(loop); + g_main_loop_unref(loop); + + vg_destroy(&g); + if (g.drm_rnode_fd >= 0) { + close(g.drm_rnode_fd); + } + + return 0; +} diff --git a/contrib/vhost-user-gpu/virgl.c b/contrib/vhost-user-gpu/virgl.c new file mode 100644 index 0000000000..51da0e3667 --- /dev/null +++ b/contrib/vhost-user-gpu/virgl.c @@ -0,0 +1,647 @@ +/* + * Virtio vhost-user GPU Device + * + * Copyright Red Hat, Inc. 2013-2018 + * + * Authors: + * Dave Airlie <airlied@redhat.com> + * Gerd Hoffmann <kraxel@redhat.com> + * Marc-AndrĂ© Lureau <marcandre.lureau@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include <virglrenderer.h> +#include "virgl.h" + +#include <epoxy/gl.h> + +void +vg_virgl_update_cursor_data(VuGpu *g, uint32_t resource_id, + gpointer data) +{ + uint32_t width, height; + uint32_t *cursor; + + cursor = virgl_renderer_get_cursor_data(resource_id, &width, &height); + g_return_if_fail(cursor != NULL); + g_return_if_fail(width == 64); + g_return_if_fail(height == 64); + + memcpy(data, cursor, 64 * 64 * sizeof(uint32_t)); + free(cursor); +} + +static void +virgl_cmd_context_create(VuGpu *g, + struct virtio_gpu_ctrl_command *cmd) +{ + struct virtio_gpu_ctx_create cc; + + VUGPU_FILL_CMD(cc); + + virgl_renderer_context_create(cc.hdr.ctx_id, cc.nlen, + cc.debug_name); +} + +static void +virgl_cmd_context_destroy(VuGpu *g, + struct virtio_gpu_ctrl_command *cmd) +{ + struct virtio_gpu_ctx_destroy cd; + + VUGPU_FILL_CMD(cd); + + virgl_renderer_context_destroy(cd.hdr.ctx_id); +} + +static void +virgl_cmd_create_resource_2d(VuGpu *g, + struct virtio_gpu_ctrl_command *cmd) +{ + struct virtio_gpu_resource_create_2d c2d; + struct virgl_renderer_resource_create_args args; + + VUGPU_FILL_CMD(c2d); + + args.handle = c2d.resource_id; + args.target = 2; + args.format = c2d.format; + args.bind = (1 << 1); + args.width = c2d.width; + args.height = c2d.height; + args.depth = 1; + args.array_size = 1; + args.last_level = 0; + args.nr_samples = 0; + args.flags = VIRTIO_GPU_RESOURCE_FLAG_Y_0_TOP; + virgl_renderer_resource_create(&args, NULL, 0); +} + +static void +virgl_cmd_create_resource_3d(VuGpu *g, + struct virtio_gpu_ctrl_command *cmd) +{ + struct virtio_gpu_resource_create_3d c3d; + struct virgl_renderer_resource_create_args args; + + VUGPU_FILL_CMD(c3d); + + args.handle = c3d.resource_id; + args.target = c3d.target; + args.format = c3d.format; + args.bind = c3d.bind; + args.width = c3d.width; + args.height = c3d.height; + args.depth = c3d.depth; + args.array_size = c3d.array_size; + args.last_level = c3d.last_level; + args.nr_samples = c3d.nr_samples; + args.flags = c3d.flags; + virgl_renderer_resource_create(&args, NULL, 0); +} + +static void +virgl_cmd_resource_unref(VuGpu *g, + struct virtio_gpu_ctrl_command *cmd) +{ + struct virtio_gpu_resource_unref unref; + struct iovec *res_iovs = NULL; + int num_iovs = 0; + + VUGPU_FILL_CMD(unref); + + virgl_renderer_resource_detach_iov(unref.resource_id, + &res_iovs, + &num_iovs); + if (res_iovs != NULL && num_iovs != 0) { + vg_cleanup_mapping_iov(g, res_iovs, num_iovs); + } + virgl_renderer_resource_unref(unref.resource_id); +} + +/* Not yet(?) defined in standard-headers, remove when possible */ +#ifndef VIRTIO_GPU_CAPSET_VIRGL2 +#define VIRTIO_GPU_CAPSET_VIRGL2 2 +#endif + +static void +virgl_cmd_get_capset_info(VuGpu *g, + struct virtio_gpu_ctrl_command *cmd) +{ + struct virtio_gpu_get_capset_info info; + struct virtio_gpu_resp_capset_info resp; + + VUGPU_FILL_CMD(info); + + memset(&resp, 0, sizeof(resp)); + if (info.capset_index == 0) { + resp.capset_id = VIRTIO_GPU_CAPSET_VIRGL; + virgl_renderer_get_cap_set(resp.capset_id, + &resp.capset_max_version, + &resp.capset_max_size); + } else if (info.capset_index == 1) { + resp.capset_id = VIRTIO_GPU_CAPSET_VIRGL2; + virgl_renderer_get_cap_set(resp.capset_id, + &resp.capset_max_version, + &resp.capset_max_size); + } else { + resp.capset_max_version = 0; + resp.capset_max_size = 0; + } + resp.hdr.type = VIRTIO_GPU_RESP_OK_CAPSET_INFO; + vg_ctrl_response(g, cmd, &resp.hdr, sizeof(resp)); +} + +uint32_t +vg_virgl_get_num_capsets(void) +{ + uint32_t capset2_max_ver, capset2_max_size; + virgl_renderer_get_cap_set(VIRTIO_GPU_CAPSET_VIRGL2, + &capset2_max_ver, + &capset2_max_size); + + return capset2_max_ver ? 2 : 1; +} + +static void +virgl_cmd_get_capset(VuGpu *g, + struct virtio_gpu_ctrl_command *cmd) +{ + struct virtio_gpu_get_capset gc; + struct virtio_gpu_resp_capset *resp; + uint32_t max_ver, max_size; + + VUGPU_FILL_CMD(gc); + + virgl_renderer_get_cap_set(gc.capset_id, &max_ver, + &max_size); + if (!max_size) { + cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; + return; + } + resp = g_malloc0(sizeof(*resp) + max_size); + + resp->hdr.type = VIRTIO_GPU_RESP_OK_CAPSET; + virgl_renderer_fill_caps(gc.capset_id, + gc.capset_version, + (void *)resp->capset_data); + vg_ctrl_response(g, cmd, &resp->hdr, sizeof(*resp) + max_size); + g_free(resp); +} + +static void +virgl_cmd_submit_3d(VuGpu *g, + struct virtio_gpu_ctrl_command *cmd) +{ + struct virtio_gpu_cmd_submit cs; + void *buf; + size_t s; + + VUGPU_FILL_CMD(cs); + + buf = g_malloc(cs.size); + s = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, + sizeof(cs), buf, cs.size); + if (s != cs.size) { + g_critical("%s: size mismatch (%zd/%d)", __func__, s, cs.size); + cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; + goto out; + } + + virgl_renderer_submit_cmd(buf, cs.hdr.ctx_id, cs.size / 4); + +out: + g_free(buf); +} + +static void +virgl_cmd_transfer_to_host_2d(VuGpu *g, + struct virtio_gpu_ctrl_command *cmd) +{ + struct virtio_gpu_transfer_to_host_2d t2d; + struct virtio_gpu_box box; + + VUGPU_FILL_CMD(t2d); + + box.x = t2d.r.x; + box.y = t2d.r.y; + box.z = 0; + box.w = t2d.r.width; + box.h = t2d.r.height; + box.d = 1; + + virgl_renderer_transfer_write_iov(t2d.resource_id, + 0, + 0, + 0, + 0, + (struct virgl_box *)&box, + t2d.offset, NULL, 0); +} + +static void +virgl_cmd_transfer_to_host_3d(VuGpu *g, + struct virtio_gpu_ctrl_command *cmd) +{ + struct virtio_gpu_transfer_host_3d t3d; + + VUGPU_FILL_CMD(t3d); + + virgl_renderer_transfer_write_iov(t3d.resource_id, + t3d.hdr.ctx_id, + t3d.level, + t3d.stride, + t3d.layer_stride, + (struct virgl_box *)&t3d.box, + t3d.offset, NULL, 0); +} + +static void +virgl_cmd_transfer_from_host_3d(VuGpu *g, + struct virtio_gpu_ctrl_command *cmd) +{ + struct virtio_gpu_transfer_host_3d tf3d; + + VUGPU_FILL_CMD(tf3d); + + virgl_renderer_transfer_read_iov(tf3d.resource_id, + tf3d.hdr.ctx_id, + tf3d.level, + tf3d.stride, + tf3d.layer_stride, + (struct virgl_box *)&tf3d.box, + tf3d.offset, NULL, 0); +} + +static void +virgl_resource_attach_backing(VuGpu *g, + struct virtio_gpu_ctrl_command *cmd) +{ + struct virtio_gpu_resource_attach_backing att_rb; + struct iovec *res_iovs; + int ret; + + VUGPU_FILL_CMD(att_rb); + + ret = vg_create_mapping_iov(g, &att_rb, cmd, &res_iovs); + if (ret != 0) { + cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC; + return; + } + + ret = virgl_renderer_resource_attach_iov(att_rb.resource_id, + res_iovs, att_rb.nr_entries); + if (ret != 0) { + vg_cleanup_mapping_iov(g, res_iovs, att_rb.nr_entries); + } +} + +static void +virgl_resource_detach_backing(VuGpu *g, + struct virtio_gpu_ctrl_command *cmd) +{ + struct virtio_gpu_resource_detach_backing detach_rb; + struct iovec *res_iovs = NULL; + int num_iovs = 0; + + VUGPU_FILL_CMD(detach_rb); + + virgl_renderer_resource_detach_iov(detach_rb.resource_id, + &res_iovs, + &num_iovs); + if (res_iovs == NULL || num_iovs == 0) { + return; + } + vg_cleanup_mapping_iov(g, res_iovs, num_iovs); +} + +static int +virgl_get_resource_info_modifiers(uint32_t resource_id, + struct virgl_renderer_resource_info *info, + uint64_t *modifiers) +{ + int ret; +#ifdef VIRGL_RENDERER_RESOURCE_INFO_EXT_VERSION + struct virgl_renderer_resource_info_ext info_ext; + ret = virgl_renderer_resource_get_info_ext(resource_id, &info_ext); + if (ret) { + return ret; + } + + *info = info_ext.base; + *modifiers = info_ext.modifiers; +#else + ret = virgl_renderer_resource_get_info(resource_id, info); + if (ret) { + return ret; + } + + /* + * Before virgl_renderer_resource_get_info_ext, + * getting the modifiers was not possible. + */ + *modifiers = 0; +#endif + + return 0; +} + +static void +virgl_cmd_set_scanout(VuGpu *g, + struct virtio_gpu_ctrl_command *cmd) +{ + struct virtio_gpu_set_scanout ss; + struct virgl_renderer_resource_info info; + int ret; + + VUGPU_FILL_CMD(ss); + + if (ss.scanout_id >= VIRTIO_GPU_MAX_SCANOUTS) { + g_critical("%s: illegal scanout id specified %d", + __func__, ss.scanout_id); + cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_SCANOUT_ID; + return; + } + + memset(&info, 0, sizeof(info)); + + if (ss.resource_id && ss.r.width && ss.r.height) { + uint64_t modifiers = 0; + ret = virgl_get_resource_info_modifiers(ss.resource_id, &info, + &modifiers); + if (ret) { + g_critical("%s: illegal resource specified %d\n", + __func__, ss.resource_id); + cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; + return; + } + + int fd = -1; + if (virgl_renderer_get_fd_for_texture(info.tex_id, &fd) < 0) { + g_critical("%s: failed to get fd for texture\n", __func__); + cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; + return; + } + assert(fd >= 0); + VhostUserGpuMsg msg = { + .payload.dmabuf_scanout.scanout_id = ss.scanout_id, + .payload.dmabuf_scanout.x = ss.r.x, + .payload.dmabuf_scanout.y = ss.r.y, + .payload.dmabuf_scanout.width = ss.r.width, + .payload.dmabuf_scanout.height = ss.r.height, + .payload.dmabuf_scanout.fd_width = info.width, + .payload.dmabuf_scanout.fd_height = info.height, + .payload.dmabuf_scanout.fd_stride = info.stride, + .payload.dmabuf_scanout.fd_flags = info.flags, + .payload.dmabuf_scanout.fd_drm_fourcc = info.drm_fourcc + }; + + if (g->use_modifiers) { + /* + * The message uses all the fields set in dmabuf_scanout plus + * modifiers which is appended after VhostUserGpuDMABUFScanout. + */ + msg.request = VHOST_USER_GPU_DMABUF_SCANOUT2; + msg.size = sizeof(VhostUserGpuDMABUFScanout2); + msg.payload.dmabuf_scanout2.modifier = modifiers; + } else { + msg.request = VHOST_USER_GPU_DMABUF_SCANOUT; + msg.size = sizeof(VhostUserGpuDMABUFScanout); + } + + vg_send_msg(g, &msg, fd); + close(fd); + } else { + VhostUserGpuMsg msg = { + .request = VHOST_USER_GPU_DMABUF_SCANOUT, + .size = sizeof(VhostUserGpuDMABUFScanout), + .payload.dmabuf_scanout.scanout_id = ss.scanout_id, + }; + g_debug("disable scanout"); + vg_send_msg(g, &msg, -1); + } + g->scanout[ss.scanout_id].resource_id = ss.resource_id; +} + +static void +virgl_cmd_resource_flush(VuGpu *g, + struct virtio_gpu_ctrl_command *cmd) +{ + struct virtio_gpu_resource_flush rf; + int i; + + VUGPU_FILL_CMD(rf); + + glFlush(); + if (!rf.resource_id) { + g_debug("bad resource id for flush..?"); + return; + } + for (i = 0; i < VIRTIO_GPU_MAX_SCANOUTS; i++) { + if (g->scanout[i].resource_id != rf.resource_id) { + continue; + } + VhostUserGpuMsg msg = { + .request = VHOST_USER_GPU_DMABUF_UPDATE, + .size = sizeof(VhostUserGpuUpdate), + .payload.update.scanout_id = i, + .payload.update.x = rf.r.x, + .payload.update.y = rf.r.y, + .payload.update.width = rf.r.width, + .payload.update.height = rf.r.height + }; + vg_send_msg(g, &msg, -1); + vg_wait_ok(g); + } +} + +static void +virgl_cmd_ctx_attach_resource(VuGpu *g, + struct virtio_gpu_ctrl_command *cmd) +{ + struct virtio_gpu_ctx_resource att_res; + + VUGPU_FILL_CMD(att_res); + + virgl_renderer_ctx_attach_resource(att_res.hdr.ctx_id, att_res.resource_id); +} + +static void +virgl_cmd_ctx_detach_resource(VuGpu *g, + struct virtio_gpu_ctrl_command *cmd) +{ + struct virtio_gpu_ctx_resource det_res; + + VUGPU_FILL_CMD(det_res); + + virgl_renderer_ctx_detach_resource(det_res.hdr.ctx_id, det_res.resource_id); +} + +void vg_virgl_process_cmd(VuGpu *g, struct virtio_gpu_ctrl_command *cmd) +{ + virgl_renderer_force_ctx_0(); + switch (cmd->cmd_hdr.type) { + case VIRTIO_GPU_CMD_CTX_CREATE: + virgl_cmd_context_create(g, cmd); + break; + case VIRTIO_GPU_CMD_CTX_DESTROY: + virgl_cmd_context_destroy(g, cmd); + break; + case VIRTIO_GPU_CMD_RESOURCE_CREATE_2D: + virgl_cmd_create_resource_2d(g, cmd); + break; + case VIRTIO_GPU_CMD_RESOURCE_CREATE_3D: + virgl_cmd_create_resource_3d(g, cmd); + break; + case VIRTIO_GPU_CMD_SUBMIT_3D: + virgl_cmd_submit_3d(g, cmd); + break; + case VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D: + virgl_cmd_transfer_to_host_2d(g, cmd); + break; + case VIRTIO_GPU_CMD_TRANSFER_TO_HOST_3D: + virgl_cmd_transfer_to_host_3d(g, cmd); + break; + case VIRTIO_GPU_CMD_TRANSFER_FROM_HOST_3D: + virgl_cmd_transfer_from_host_3d(g, cmd); + break; + case VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING: + virgl_resource_attach_backing(g, cmd); + break; + case VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING: + virgl_resource_detach_backing(g, cmd); + break; + case VIRTIO_GPU_CMD_SET_SCANOUT: + virgl_cmd_set_scanout(g, cmd); + break; + case VIRTIO_GPU_CMD_RESOURCE_FLUSH: + virgl_cmd_resource_flush(g, cmd); + break; + case VIRTIO_GPU_CMD_RESOURCE_UNREF: + virgl_cmd_resource_unref(g, cmd); + break; + case VIRTIO_GPU_CMD_CTX_ATTACH_RESOURCE: + /* TODO add security */ + virgl_cmd_ctx_attach_resource(g, cmd); + break; + case VIRTIO_GPU_CMD_CTX_DETACH_RESOURCE: + /* TODO add security */ + virgl_cmd_ctx_detach_resource(g, cmd); + break; + case VIRTIO_GPU_CMD_GET_CAPSET_INFO: + virgl_cmd_get_capset_info(g, cmd); + break; + case VIRTIO_GPU_CMD_GET_CAPSET: + virgl_cmd_get_capset(g, cmd); + break; + case VIRTIO_GPU_CMD_GET_DISPLAY_INFO: + vg_get_display_info(g, cmd); + break; + case VIRTIO_GPU_CMD_GET_EDID: + vg_get_edid(g, cmd); + break; + default: + g_debug("TODO handle ctrl %x\n", cmd->cmd_hdr.type); + cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC; + break; + } + + if (cmd->state != VG_CMD_STATE_NEW) { + return; + } + + if (cmd->error) { + g_warning("%s: ctrl 0x%x, error 0x%x\n", __func__, + cmd->cmd_hdr.type, cmd->error); + vg_ctrl_response_nodata(g, cmd, cmd->error); + return; + } + + if (!(cmd->cmd_hdr.flags & VIRTIO_GPU_FLAG_FENCE)) { + vg_ctrl_response_nodata(g, cmd, VIRTIO_GPU_RESP_OK_NODATA); + return; + } + + g_debug("Creating fence id:%" PRId64 " type:%d", + cmd->cmd_hdr.fence_id, cmd->cmd_hdr.type); + virgl_renderer_create_fence(cmd->cmd_hdr.fence_id, cmd->cmd_hdr.type); +} + +static void +virgl_write_fence(void *opaque, uint32_t fence) +{ + VuGpu *g = opaque; + struct virtio_gpu_ctrl_command *cmd, *tmp; + + QTAILQ_FOREACH_SAFE(cmd, &g->fenceq, next, tmp) { + /* + * the guest can end up emitting fences out of order + * so we should check all fenced cmds not just the first one. + */ + if (cmd->cmd_hdr.fence_id > fence) { + continue; + } + g_debug("FENCE %" PRIu64, cmd->cmd_hdr.fence_id); + vg_ctrl_response_nodata(g, cmd, VIRTIO_GPU_RESP_OK_NODATA); + QTAILQ_REMOVE(&g->fenceq, cmd, next); + free(cmd); + g->inflight--; + } +} + +#if defined(VIRGL_RENDERER_CALLBACKS_VERSION) && \ + VIRGL_RENDERER_CALLBACKS_VERSION >= 2 +static int +virgl_get_drm_fd(void *opaque) +{ + VuGpu *g = opaque; + + return g->drm_rnode_fd; +} +#endif + +static struct virgl_renderer_callbacks virgl_cbs = { +#if defined(VIRGL_RENDERER_CALLBACKS_VERSION) && \ + VIRGL_RENDERER_CALLBACKS_VERSION >= 2 + .get_drm_fd = virgl_get_drm_fd, + .version = 2, +#else + .version = 1, +#endif + .write_fence = virgl_write_fence, +}; + +static void +vg_virgl_poll(VuDev *dev, int condition, void *data) +{ + virgl_renderer_poll(); +} + +bool +vg_virgl_init(VuGpu *g) +{ + int ret; + + if (g->drm_rnode_fd && virgl_cbs.version == 1) { + g_warning("virgl will use the default rendernode"); + } + + ret = virgl_renderer_init(g, + VIRGL_RENDERER_USE_EGL | + VIRGL_RENDERER_THREAD_SYNC, + &virgl_cbs); + if (ret != 0) { + return false; + } + + ret = virgl_renderer_get_poll_fd(); + if (ret != -1) { + g->renderer_source = + vug_source_new(&g->dev, ret, G_IO_IN, vg_virgl_poll, g); + } + + return true; +} diff --git a/contrib/vhost-user-gpu/virgl.h b/contrib/vhost-user-gpu/virgl.h new file mode 100644 index 0000000000..17078783a5 --- /dev/null +++ b/contrib/vhost-user-gpu/virgl.h @@ -0,0 +1,26 @@ +/* + * Virtio vhost-user GPU Device + * + * Copyright Red Hat, Inc. 2013-2018 + * + * Authors: + * Dave Airlie <airlied@redhat.com> + * Gerd Hoffmann <kraxel@redhat.com> + * Marc-AndrĂ© Lureau <marcandre.lureau@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef VUGPU_VIRGL_H +#define VUGPU_VIRGL_H + +#include "vugpu.h" + +bool vg_virgl_init(VuGpu *g); +uint32_t vg_virgl_get_num_capsets(void); +void vg_virgl_process_cmd(VuGpu *vg, struct virtio_gpu_ctrl_command *cmd); +void vg_virgl_update_cursor_data(VuGpu *g, uint32_t resource_id, + gpointer data); + +#endif diff --git a/contrib/vhost-user-gpu/vugbm.c b/contrib/vhost-user-gpu/vugbm.c new file mode 100644 index 0000000000..503d0a4566 --- /dev/null +++ b/contrib/vhost-user-gpu/vugbm.c @@ -0,0 +1,325 @@ +/* + * Virtio vhost-user GPU Device + * + * DRM helpers + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "vugbm.h" + +static bool +mem_alloc_bo(struct vugbm_buffer *buf) +{ + buf->mmap = g_malloc(buf->width * buf->height * 4); + buf->stride = buf->width * 4; + return true; +} + +static void +mem_free_bo(struct vugbm_buffer *buf) +{ + g_free(buf->mmap); +} + +static bool +mem_map_bo(struct vugbm_buffer *buf) +{ + return buf->mmap != NULL; +} + +static void +mem_unmap_bo(struct vugbm_buffer *buf) +{ +} + +static void +mem_device_destroy(struct vugbm_device *dev) +{ +} + +#ifdef CONFIG_MEMFD +struct udmabuf_create { + uint32_t memfd; + uint32_t flags; + uint64_t offset; + uint64_t size; +}; + +#define UDMABUF_CREATE _IOW('u', 0x42, struct udmabuf_create) + +static size_t +udmabuf_get_size(struct vugbm_buffer *buf) +{ + return ROUND_UP(buf->width * buf->height * 4, qemu_real_host_page_size()); +} + +static bool +udmabuf_alloc_bo(struct vugbm_buffer *buf) +{ + int ret; + + buf->memfd = memfd_create("udmabuf-bo", MFD_ALLOW_SEALING); + if (buf->memfd < 0) { + return false; + } + + ret = ftruncate(buf->memfd, udmabuf_get_size(buf)); + if (ret < 0) { + close(buf->memfd); + return false; + } + + ret = fcntl(buf->memfd, F_ADD_SEALS, F_SEAL_SHRINK); + if (ret < 0) { + close(buf->memfd); + return false; + } + + buf->stride = buf->width * 4; + + return true; +} + +static void +udmabuf_free_bo(struct vugbm_buffer *buf) +{ + close(buf->memfd); +} + +static bool +udmabuf_map_bo(struct vugbm_buffer *buf) +{ + buf->mmap = mmap(NULL, udmabuf_get_size(buf), + PROT_READ | PROT_WRITE, MAP_SHARED, buf->memfd, 0); + if (buf->mmap == MAP_FAILED) { + return false; + } + + return true; +} + +static bool +udmabuf_get_fd(struct vugbm_buffer *buf, int *fd) +{ + struct udmabuf_create create = { + .memfd = buf->memfd, + .offset = 0, + .size = udmabuf_get_size(buf), + }; + + *fd = ioctl(buf->dev->fd, UDMABUF_CREATE, &create); + + return *fd >= 0; +} + +static void +udmabuf_unmap_bo(struct vugbm_buffer *buf) +{ + munmap(buf->mmap, udmabuf_get_size(buf)); +} + +static void +udmabuf_device_destroy(struct vugbm_device *dev) +{ + close(dev->fd); +} +#endif + +#ifdef CONFIG_GBM +static bool +alloc_bo(struct vugbm_buffer *buf) +{ + struct gbm_device *dev = buf->dev->dev; + + assert(!buf->bo); + + buf->bo = gbm_bo_create(dev, buf->width, buf->height, + buf->format, + GBM_BO_USE_RENDERING | GBM_BO_USE_LINEAR); + + if (buf->bo) { + buf->stride = gbm_bo_get_stride(buf->bo); + return true; + } + + return false; +} + +static void +free_bo(struct vugbm_buffer *buf) +{ + gbm_bo_destroy(buf->bo); +} + +static bool +map_bo(struct vugbm_buffer *buf) +{ + uint32_t stride; + + buf->mmap = gbm_bo_map(buf->bo, 0, 0, buf->width, buf->height, + GBM_BO_TRANSFER_READ_WRITE, &stride, + &buf->mmap_data); + + assert(stride == buf->stride); + + return buf->mmap != NULL; +} + +static void +unmap_bo(struct vugbm_buffer *buf) +{ + gbm_bo_unmap(buf->bo, buf->mmap_data); +} + +static bool +get_fd(struct vugbm_buffer *buf, int *fd) +{ + *fd = gbm_bo_get_fd(buf->bo); + + return *fd >= 0; +} + +static void +device_destroy(struct vugbm_device *dev) +{ + gbm_device_destroy(dev->dev); +} +#endif + +void +vugbm_device_destroy(struct vugbm_device *dev) +{ + if (!dev->inited) { + return; + } + + dev->device_destroy(dev); +} + +void +vugbm_device_init(struct vugbm_device *dev, int fd) +{ + assert(!dev->inited); + +#ifdef CONFIG_GBM + if (fd >= 0) { + dev->dev = gbm_create_device(fd); + } + if (dev->dev != NULL) { + dev->fd = fd; + dev->alloc_bo = alloc_bo; + dev->free_bo = free_bo; + dev->get_fd = get_fd; + dev->map_bo = map_bo; + dev->unmap_bo = unmap_bo; + dev->device_destroy = device_destroy; + dev->inited = true; + } +#endif +#ifdef CONFIG_MEMFD + if (!dev->inited && g_file_test("/dev/udmabuf", G_FILE_TEST_EXISTS)) { + dev->fd = open("/dev/udmabuf", O_RDWR); + if (dev->fd >= 0) { + g_debug("Using experimental udmabuf backend"); + dev->alloc_bo = udmabuf_alloc_bo; + dev->free_bo = udmabuf_free_bo; + dev->get_fd = udmabuf_get_fd; + dev->map_bo = udmabuf_map_bo; + dev->unmap_bo = udmabuf_unmap_bo; + dev->device_destroy = udmabuf_device_destroy; + dev->inited = true; + } + } +#endif + if (!dev->inited) { + g_debug("Using mem fallback"); + dev->alloc_bo = mem_alloc_bo; + dev->free_bo = mem_free_bo; + dev->map_bo = mem_map_bo; + dev->unmap_bo = mem_unmap_bo; + dev->device_destroy = mem_device_destroy; + dev->inited = true; + } + assert(dev->inited); +} + +static bool +vugbm_buffer_map(struct vugbm_buffer *buf) +{ + struct vugbm_device *dev = buf->dev; + + return dev->map_bo(buf); +} + +static void +vugbm_buffer_unmap(struct vugbm_buffer *buf) +{ + struct vugbm_device *dev = buf->dev; + + dev->unmap_bo(buf); +} + +bool +vugbm_buffer_can_get_dmabuf_fd(struct vugbm_buffer *buffer) +{ + if (!buffer->dev->get_fd) { + return false; + } + + return true; +} + +bool +vugbm_buffer_get_dmabuf_fd(struct vugbm_buffer *buffer, int *fd) +{ + if (!vugbm_buffer_can_get_dmabuf_fd(buffer) || + !buffer->dev->get_fd(buffer, fd)) { + g_warning("Failed to get dmabuf"); + return false; + } + + if (*fd < 0) { + g_warning("error: dmabuf_fd < 0"); + return false; + } + + return true; +} + +bool +vugbm_buffer_create(struct vugbm_buffer *buffer, struct vugbm_device *dev, + uint32_t width, uint32_t height) +{ + buffer->dev = dev; + buffer->width = width; + buffer->height = height; + buffer->format = GBM_FORMAT_XRGB8888; + buffer->stride = 0; /* modified during alloc */ + if (!dev->alloc_bo(buffer)) { + g_warning("alloc_bo failed"); + return false; + } + + if (!vugbm_buffer_map(buffer)) { + g_warning("map_bo failed"); + goto err; + } + + return true; + +err: + dev->free_bo(buffer); + return false; +} + +void +vugbm_buffer_destroy(struct vugbm_buffer *buffer) +{ + struct vugbm_device *dev = buffer->dev; + + vugbm_buffer_unmap(buffer); + dev->free_bo(buffer); +} diff --git a/contrib/vhost-user-gpu/vugbm.h b/contrib/vhost-user-gpu/vugbm.h new file mode 100644 index 0000000000..82bc4934e1 --- /dev/null +++ b/contrib/vhost-user-gpu/vugbm.h @@ -0,0 +1,66 @@ +/* + * Virtio vhost-user GPU Device + * + * GBM helpers + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef VHOST_USER_GPU_VUGBM_H +#define VHOST_USER_GPU_VUGBM_H + + +#ifdef CONFIG_MEMFD +#include <sys/ioctl.h> +#endif + +#ifdef CONFIG_GBM +#include <gbm.h> +#endif + +struct vugbm_buffer; + +struct vugbm_device { + bool inited; + int fd; +#ifdef CONFIG_GBM + struct gbm_device *dev; +#endif + + bool (*alloc_bo)(struct vugbm_buffer *buf); + void (*free_bo)(struct vugbm_buffer *buf); + bool (*get_fd)(struct vugbm_buffer *buf, int *fd); + bool (*map_bo)(struct vugbm_buffer *buf); + void (*unmap_bo)(struct vugbm_buffer *buf); + void (*device_destroy)(struct vugbm_device *dev); +}; + +struct vugbm_buffer { + struct vugbm_device *dev; + +#ifdef CONFIG_MEMFD + int memfd; +#endif +#ifdef CONFIG_GBM + struct gbm_bo *bo; + void *mmap_data; +#endif + + uint8_t *mmap; + uint32_t width; + uint32_t height; + uint32_t stride; + uint32_t format; +}; + +void vugbm_device_init(struct vugbm_device *dev, int fd); +void vugbm_device_destroy(struct vugbm_device *dev); + +bool vugbm_buffer_create(struct vugbm_buffer *buffer, struct vugbm_device *dev, + uint32_t width, uint32_t height); +bool vugbm_buffer_can_get_dmabuf_fd(struct vugbm_buffer *buffer); +bool vugbm_buffer_get_dmabuf_fd(struct vugbm_buffer *buffer, int *fd); +void vugbm_buffer_destroy(struct vugbm_buffer *buffer); + +#endif diff --git a/contrib/vhost-user-gpu/vugpu.h b/contrib/vhost-user-gpu/vugpu.h new file mode 100644 index 0000000000..654c392fbb --- /dev/null +++ b/contrib/vhost-user-gpu/vugpu.h @@ -0,0 +1,203 @@ +/* + * Virtio vhost-user GPU Device + * + * Copyright Red Hat, Inc. 2013-2018 + * + * Authors: + * Dave Airlie <airlied@redhat.com> + * Gerd Hoffmann <kraxel@redhat.com> + * Marc-AndrĂ© Lureau <marcandre.lureau@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef VUGPU_H +#define VUGPU_H + + +#include "libvhost-user-glib.h" +#include "standard-headers/linux/virtio_gpu.h" + +#include "qemu/queue.h" +#include "qemu/iov.h" +#include "qemu/bswap.h" +#include "vugbm.h" + +typedef enum VhostUserGpuRequest { + VHOST_USER_GPU_NONE = 0, + VHOST_USER_GPU_GET_PROTOCOL_FEATURES, + VHOST_USER_GPU_SET_PROTOCOL_FEATURES, + VHOST_USER_GPU_GET_DISPLAY_INFO, + VHOST_USER_GPU_CURSOR_POS, + VHOST_USER_GPU_CURSOR_POS_HIDE, + VHOST_USER_GPU_CURSOR_UPDATE, + VHOST_USER_GPU_SCANOUT, + VHOST_USER_GPU_UPDATE, + VHOST_USER_GPU_DMABUF_SCANOUT, + VHOST_USER_GPU_DMABUF_UPDATE, + VHOST_USER_GPU_GET_EDID, + VHOST_USER_GPU_DMABUF_SCANOUT2, +} VhostUserGpuRequest; + +typedef struct VhostUserGpuDisplayInfoReply { + struct virtio_gpu_resp_display_info info; +} VhostUserGpuDisplayInfoReply; + +typedef struct VhostUserGpuCursorPos { + uint32_t scanout_id; + uint32_t x; + uint32_t y; +} QEMU_PACKED VhostUserGpuCursorPos; + +typedef struct VhostUserGpuCursorUpdate { + VhostUserGpuCursorPos pos; + uint32_t hot_x; + uint32_t hot_y; + uint32_t data[64 * 64]; +} QEMU_PACKED VhostUserGpuCursorUpdate; + +typedef struct VhostUserGpuScanout { + uint32_t scanout_id; + uint32_t width; + uint32_t height; +} QEMU_PACKED VhostUserGpuScanout; + +typedef struct VhostUserGpuUpdate { + uint32_t scanout_id; + uint32_t x; + uint32_t y; + uint32_t width; + uint32_t height; + uint8_t data[]; +} QEMU_PACKED VhostUserGpuUpdate; + +typedef struct VhostUserGpuDMABUFScanout { + uint32_t scanout_id; + uint32_t x; + uint32_t y; + uint32_t width; + uint32_t height; + uint32_t fd_width; + uint32_t fd_height; + uint32_t fd_stride; + uint32_t fd_flags; + int fd_drm_fourcc; +} QEMU_PACKED VhostUserGpuDMABUFScanout; + +typedef struct VhostUserGpuDMABUFScanout2 { + struct VhostUserGpuDMABUFScanout dmabuf_scanout; + uint64_t modifier; +} QEMU_PACKED VhostUserGpuDMABUFScanout2; + +typedef struct VhostUserGpuEdidRequest { + uint32_t scanout_id; +} QEMU_PACKED VhostUserGpuEdidRequest; + +typedef struct VhostUserGpuMsg { + uint32_t request; /* VhostUserGpuRequest */ + uint32_t flags; + uint32_t size; /* the following payload size */ + union { + VhostUserGpuCursorPos cursor_pos; + VhostUserGpuCursorUpdate cursor_update; + VhostUserGpuScanout scanout; + VhostUserGpuUpdate update; + VhostUserGpuDMABUFScanout dmabuf_scanout; + VhostUserGpuDMABUFScanout2 dmabuf_scanout2; + VhostUserGpuEdidRequest edid_req; + struct virtio_gpu_resp_edid resp_edid; + struct virtio_gpu_resp_display_info display_info; + uint64_t u64; + } payload; +} QEMU_PACKED VhostUserGpuMsg; + +static VhostUserGpuMsg m __attribute__ ((unused)); +#define VHOST_USER_GPU_HDR_SIZE \ + (sizeof(m.request) + sizeof(m.flags) + sizeof(m.size)) + +#define VHOST_USER_GPU_MSG_FLAG_REPLY 0x4 + +#define VHOST_USER_GPU_PROTOCOL_F_EDID 0 +#define VHOST_USER_GPU_PROTOCOL_F_DMABUF2 1 + +struct virtio_gpu_scanout { + uint32_t width, height; + int x, y; + int invalidate; + uint32_t resource_id; +}; + +typedef struct VuGpu { + VugDev dev; + struct virtio_gpu_config virtio_config; + struct vugbm_device gdev; + int sock_fd; + int drm_rnode_fd; + GSource *renderer_source; + guint wait_in; + + bool virgl; + bool virgl_inited; + bool edid_inited; + bool use_modifiers; + uint32_t inflight; + + struct virtio_gpu_scanout scanout[VIRTIO_GPU_MAX_SCANOUTS]; + QTAILQ_HEAD(, virtio_gpu_simple_resource) reslist; + QTAILQ_HEAD(, virtio_gpu_ctrl_command) fenceq; +} VuGpu; + +enum { + VG_CMD_STATE_NEW, + VG_CMD_STATE_PENDING, + VG_CMD_STATE_FINISHED, +}; + +struct virtio_gpu_ctrl_command { + VuVirtqElement elem; + VuVirtq *vq; + struct virtio_gpu_ctrl_hdr cmd_hdr; + uint32_t error; + int state; + QTAILQ_ENTRY(virtio_gpu_ctrl_command) next; +}; + +#define VUGPU_FILL_CMD(out) do { \ + size_t vugpufillcmd_s_ = \ + iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, 0, \ + &out, sizeof(out)); \ + if (vugpufillcmd_s_ != sizeof(out)) { \ + g_critical("%s: command size incorrect %zu vs %zu", \ + __func__, vugpufillcmd_s_, sizeof(out)); \ + return; \ + } \ + } while (0) + + +void vg_ctrl_response(VuGpu *g, + struct virtio_gpu_ctrl_command *cmd, + struct virtio_gpu_ctrl_hdr *resp, + size_t resp_len); + +void vg_ctrl_response_nodata(VuGpu *g, + struct virtio_gpu_ctrl_command *cmd, + enum virtio_gpu_ctrl_type type); + +int vg_create_mapping_iov(VuGpu *g, + struct virtio_gpu_resource_attach_backing *ab, + struct virtio_gpu_ctrl_command *cmd, + struct iovec **iov); +void vg_cleanup_mapping_iov(VuGpu *g, struct iovec *iov, uint32_t count); +void vg_get_display_info(VuGpu *vg, struct virtio_gpu_ctrl_command *cmd); +void vg_get_edid(VuGpu *vg, struct virtio_gpu_ctrl_command *cmd); + +void vg_wait_ok(VuGpu *g); + +void vg_send_msg(VuGpu *g, const VhostUserGpuMsg *msg, int fd); + +bool vg_recv_msg(VuGpu *g, uint32_t expect_req, uint32_t expect_size, + gpointer payload); + + +#endif diff --git a/contrib/vhost-user-input/main.c b/contrib/vhost-user-input/main.c new file mode 100644 index 0000000000..081230da54 --- /dev/null +++ b/contrib/vhost-user-input/main.c @@ -0,0 +1,412 @@ +/* + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + */ + +#include "qemu/osdep.h" + +#include <sys/ioctl.h> + +#include "qemu/iov.h" +#include "qemu/bswap.h" +#include "qemu/sockets.h" +#include "libvhost-user-glib.h" +#include "standard-headers/linux/input.h" +#include "standard-headers/linux/virtio_input.h" +#include "qapi/error.h" + +enum { + VHOST_USER_INPUT_MAX_QUEUES = 2, +}; + +typedef struct virtio_input_event virtio_input_event; +typedef struct virtio_input_config virtio_input_config; + +typedef struct VuInput { + VugDev dev; + GSource *evsrc; + int evdevfd; + GArray *config; + virtio_input_config *sel_config; + struct { + virtio_input_event event; + VuVirtqElement *elem; + } *queue; + uint32_t qindex, qsize; +} VuInput; + +static void vi_input_send(VuInput *vi, struct virtio_input_event *event) +{ + VuDev *dev = &vi->dev.parent; + VuVirtq *vq = vu_get_queue(dev, 0); + VuVirtqElement *elem; + int i, len; + + /* queue up events ... */ + if (vi->qindex == vi->qsize) { + vi->qsize++; + vi->queue = g_realloc_n(vi->queue, vi->qsize, sizeof(vi->queue[0])); + } + vi->queue[vi->qindex++].event = *event; + + /* ... until we see a report sync ... */ + if (event->type != htole16(EV_SYN) || + event->code != htole16(SYN_REPORT)) { + return; + } + + /* ... then check available space ... */ + for (i = 0; i < vi->qindex; i++) { + elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement)); + if (!elem) { + while (--i >= 0) { + vu_queue_unpop(dev, vq, vi->queue[i].elem, 0); + } + vi->qindex = 0; + g_warning("virtio-input queue full"); + return; + } + vi->queue[i].elem = elem; + } + + /* ... and finally pass them to the guest */ + for (i = 0; i < vi->qindex; i++) { + elem = vi->queue[i].elem; + len = iov_from_buf(elem->in_sg, elem->in_num, + 0, &vi->queue[i].event, sizeof(virtio_input_event)); + vu_queue_push(dev, vq, elem, len); + free(elem); + } + + vu_queue_notify(&vi->dev.parent, vq); + vi->qindex = 0; +} + +static void +vi_evdev_watch(VuDev *dev, int condition, void *data) +{ + VuInput *vi = data; + int fd = vi->evdevfd; + + g_debug("Got evdev condition %x", condition); + + struct virtio_input_event virtio; + struct input_event evdev; + int rc; + + for (;;) { + rc = read(fd, &evdev, sizeof(evdev)); + if (rc != sizeof(evdev)) { + break; + } + + g_debug("input %d %d %d", evdev.type, evdev.code, evdev.value); + + virtio.type = htole16(evdev.type); + virtio.code = htole16(evdev.code); + virtio.value = htole32(evdev.value); + vi_input_send(vi, &virtio); + } +} + + +static void vi_handle_status(VuInput *vi, virtio_input_event *event) +{ + struct input_event evdev; + struct timeval tval; + int rc; + + if (gettimeofday(&tval, NULL)) { + perror("vi_handle_status: gettimeofday"); + return; + } + + evdev.input_event_sec = tval.tv_sec; + evdev.input_event_usec = tval.tv_usec; + evdev.type = le16toh(event->type); + evdev.code = le16toh(event->code); + evdev.value = le32toh(event->value); + + rc = write(vi->evdevfd, &evdev, sizeof(evdev)); + if (rc == -1) { + perror("vi_host_handle_status: write"); + } +} + +static void vi_handle_sts(VuDev *dev, int qidx) +{ + VuInput *vi = container_of(dev, VuInput, dev.parent); + VuVirtq *vq = vu_get_queue(dev, qidx); + virtio_input_event event; + VuVirtqElement *elem; + int len; + + g_debug("%s", G_STRFUNC); + + for (;;) { + elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement)); + if (!elem) { + break; + } + + memset(&event, 0, sizeof(event)); + len = iov_to_buf(elem->out_sg, elem->out_num, + 0, &event, sizeof(event)); + vi_handle_status(vi, &event); + vu_queue_push(dev, vq, elem, len); + free(elem); + } + + vu_queue_notify(&vi->dev.parent, vq); +} + +static void +vi_panic(VuDev *dev, const char *msg) +{ + g_critical("%s\n", msg); + exit(EXIT_FAILURE); +} + +static void +vi_queue_set_started(VuDev *dev, int qidx, bool started) +{ + VuInput *vi = container_of(dev, VuInput, dev.parent); + VuVirtq *vq = vu_get_queue(dev, qidx); + + g_debug("queue started %d:%d", qidx, started); + + if (qidx == 1) { + vu_set_queue_handler(dev, vq, started ? vi_handle_sts : NULL); + } + + started = vu_queue_started(dev, vu_get_queue(dev, 0)) && + vu_queue_started(dev, vu_get_queue(dev, 1)); + + if (started && !vi->evsrc) { + vi->evsrc = vug_source_new(&vi->dev, vi->evdevfd, + G_IO_IN, vi_evdev_watch, vi); + } + + if (!started && vi->evsrc) { + vug_source_destroy(vi->evsrc); + vi->evsrc = NULL; + } +} + +static virtio_input_config * +vi_find_config(VuInput *vi, uint8_t select, uint8_t subsel) +{ + virtio_input_config *cfg; + int i; + + for (i = 0; i < vi->config->len; i++) { + cfg = &g_array_index(vi->config, virtio_input_config, i); + if (select == cfg->select && subsel == cfg->subsel) { + return cfg; + } + } + + return NULL; +} + +static int vi_get_config(VuDev *dev, uint8_t *config, uint32_t len) +{ + VuInput *vi = container_of(dev, VuInput, dev.parent); + + if (len > sizeof(*vi->sel_config)) { + return -1; + } + + if (vi->sel_config) { + memcpy(config, vi->sel_config, len); + } else { + memset(config, 0, len); + } + + return 0; +} + +static int vi_set_config(VuDev *dev, const uint8_t *data, + uint32_t offset, uint32_t size, + uint32_t flags) +{ + VuInput *vi = container_of(dev, VuInput, dev.parent); + virtio_input_config *config = (virtio_input_config *)data; + + vi->sel_config = vi_find_config(vi, config->select, config->subsel); + + return 0; +} + +static const VuDevIface vuiface = { + .queue_set_started = vi_queue_set_started, + .get_config = vi_get_config, + .set_config = vi_set_config, +}; + +static void +vi_bits_config(VuInput *vi, int type, int count) +{ + virtio_input_config bits; + int rc, i, size = 0; + + memset(&bits, 0, sizeof(bits)); + rc = ioctl(vi->evdevfd, EVIOCGBIT(type, count / 8), bits.u.bitmap); + if (rc < 0) { + return; + } + + for (i = 0; i < count / 8; i++) { + if (bits.u.bitmap[i]) { + size = i + 1; + } + } + if (size == 0) { + return; + } + + bits.select = VIRTIO_INPUT_CFG_EV_BITS; + bits.subsel = type; + bits.size = size; + g_array_append_val(vi->config, bits); +} + +static char *opt_evdev; +static int opt_fdnum = -1; +static char *opt_socket_path; +static gboolean opt_nograb; +static gboolean opt_print_caps; + +static GOptionEntry entries[] = { + { "print-capabilities", 'c', 0, G_OPTION_ARG_NONE, &opt_print_caps, + "Print capabilities", NULL }, + { "no-grab", 'n', 0, G_OPTION_ARG_NONE, &opt_nograb, + "Don't grab device", NULL }, + { "fd", 'f', 0, G_OPTION_ARG_INT, &opt_fdnum, + "Use inherited fd socket", "FDNUM" }, + { "socket-path", 's', 0, G_OPTION_ARG_FILENAME, &opt_socket_path, + "Use UNIX socket path", "PATH" }, + { "evdev-path", 'p', 0, G_OPTION_ARG_FILENAME, &opt_evdev, + "evdev input device path", "PATH" }, + { NULL, } +}; + +int +main(int argc, char *argv[]) +{ + GMainLoop *loop = NULL; + VuInput vi = { 0, }; + int rc, ver, fd; + virtio_input_config id; + struct input_id ids; + GError *error = NULL; + GOptionContext *context; + + context = g_option_context_new(NULL); + g_option_context_add_main_entries(context, entries, NULL); + if (!g_option_context_parse(context, &argc, &argv, &error)) { + g_printerr("Option parsing failed: %s\n", error->message); + exit(EXIT_FAILURE); + } + if (opt_print_caps) { + g_print("{\n"); + g_print(" \"type\": \"input\",\n"); + g_print(" \"features\": [\n"); + g_print(" \"evdev-path\",\n"); + g_print(" \"no-grab\"\n"); + g_print(" ]\n"); + g_print("}\n"); + exit(EXIT_SUCCESS); + } + if (!opt_evdev) { + g_printerr("Please specify an evdev path\n"); + exit(EXIT_FAILURE); + } + if ((!!opt_socket_path + (opt_fdnum != -1)) != 1) { + g_printerr("Please specify either --fd or --socket-path\n"); + exit(EXIT_FAILURE); + } + + vi.evdevfd = open(opt_evdev, O_RDWR); + if (vi.evdevfd < 0) { + g_printerr("Failed to open evdev: %s\n", g_strerror(errno)); + exit(EXIT_FAILURE); + } + + rc = ioctl(vi.evdevfd, EVIOCGVERSION, &ver); + if (rc < 0) { + g_printerr("%s: is not an evdev device\n", argv[1]); + exit(EXIT_FAILURE); + } + + if (!opt_nograb) { + rc = ioctl(vi.evdevfd, EVIOCGRAB, 1); + if (rc < 0) { + g_printerr("Failed to grab device\n"); + exit(EXIT_FAILURE); + } + } + + vi.config = g_array_new(false, false, sizeof(virtio_input_config)); + memset(&id, 0, sizeof(id)); + if (ioctl(vi.evdevfd, EVIOCGNAME(sizeof(id.u.string) - 1), + id.u.string) < 0) { + g_printerr("Failed to get evdev name: %s\n", g_strerror(errno)); + exit(EXIT_FAILURE); + } + id.select = VIRTIO_INPUT_CFG_ID_NAME; + id.size = strlen(id.u.string); + g_array_append_val(vi.config, id); + + if (ioctl(vi.evdevfd, EVIOCGID, &ids) == 0) { + memset(&id, 0, sizeof(id)); + id.select = VIRTIO_INPUT_CFG_ID_DEVIDS; + id.size = sizeof(struct virtio_input_devids); + id.u.ids.bustype = cpu_to_le16(ids.bustype); + id.u.ids.vendor = cpu_to_le16(ids.vendor); + id.u.ids.product = cpu_to_le16(ids.product); + id.u.ids.version = cpu_to_le16(ids.version); + g_array_append_val(vi.config, id); + } + + vi_bits_config(&vi, EV_KEY, KEY_CNT); + vi_bits_config(&vi, EV_REL, REL_CNT); + vi_bits_config(&vi, EV_ABS, ABS_CNT); + vi_bits_config(&vi, EV_MSC, MSC_CNT); + vi_bits_config(&vi, EV_SW, SW_CNT); + g_debug("config length: %u", vi.config->len); + + if (opt_socket_path) { + int lsock = unix_listen(opt_socket_path, &error_fatal); + if (lsock < 0) { + g_printerr("Failed to listen on %s.\n", opt_socket_path); + exit(EXIT_FAILURE); + } + fd = accept(lsock, NULL, NULL); + close(lsock); + } else { + fd = opt_fdnum; + } + if (fd == -1) { + g_printerr("Invalid vhost-user socket.\n"); + exit(EXIT_FAILURE); + } + + if (!vug_init(&vi.dev, VHOST_USER_INPUT_MAX_QUEUES, fd, vi_panic, + &vuiface)) { + g_printerr("Failed to initialize libvhost-user-glib.\n"); + exit(EXIT_FAILURE); + } + + loop = g_main_loop_new(NULL, FALSE); + g_main_loop_run(loop); + g_main_loop_unref(loop); + + vug_deinit(&vi.dev); + + vug_source_destroy(vi.evsrc); + g_array_free(vi.config, TRUE); + g_free(vi.queue); + return 0; +} diff --git a/contrib/vhost-user-input/meson.build b/contrib/vhost-user-input/meson.build new file mode 100644 index 0000000000..840d866594 --- /dev/null +++ b/contrib/vhost-user-input/meson.build @@ -0,0 +1,4 @@ +executable('vhost-user-input', files('main.c'), + dependencies: [qemuutil, vhost_user], + build_by_default: host_os == 'linux', + install: false) diff --git a/contrib/vhost-user-scsi/Makefile.objs b/contrib/vhost-user-scsi/Makefile.objs deleted file mode 100644 index e83a38a85b..0000000000 --- a/contrib/vhost-user-scsi/Makefile.objs +++ /dev/null @@ -1 +0,0 @@ -vhost-user-scsi-obj-y = vhost-user-scsi.o diff --git a/contrib/vhost-user-scsi/meson.build b/contrib/vhost-user-scsi/meson.build new file mode 100644 index 0000000000..44be04853e --- /dev/null +++ b/contrib/vhost-user-scsi/meson.build @@ -0,0 +1,6 @@ +if libiscsi.found() + executable('vhost-user-scsi', files('vhost-user-scsi.c'), + dependencies: [qemuutil, libiscsi, vhost_user], + build_by_default: host_os == 'linux', + install: false) +endif diff --git a/contrib/vhost-user-scsi/vhost-user-scsi.c b/contrib/vhost-user-scsi/vhost-user-scsi.c index 496dd6e693..9ef61cf5a7 100644 --- a/contrib/vhost-user-scsi/vhost-user-scsi.c +++ b/contrib/vhost-user-scsi/vhost-user-scsi.c @@ -12,13 +12,19 @@ #include "qemu/osdep.h" #include <iscsi/iscsi.h> +#define inline __attribute__((gnu_inline)) /* required for libiscsi v1.9.0 */ #include <iscsi/scsi-lowlevel.h> -#include "contrib/libvhost-user/libvhost-user-glib.h" +#undef inline +#include "libvhost-user-glib.h" #include "standard-headers/linux/virtio_scsi.h" #define VUS_ISCSI_INITIATOR "iqn.2016-11.com.nutanix:vhost-user-scsi" +enum { + VHOST_USER_SCSI_MAX_QUEUES = 8, +}; + typedef struct VusIscsiLun { struct iscsi_context *iscsi_ctx; int iscsi_lun; @@ -111,7 +117,7 @@ static int get_cdb_len(uint8_t *cdb) case 4: return 16; case 5: return 12; } - g_warning("Unable to determine cdb len (0x%02hhX)", cdb[0] >> 5); + g_warning("Unable to determine cdb len (0x%02hhX)", (uint8_t)(cdb[0] >> 5)); return -1; } @@ -226,16 +232,12 @@ static void vus_proc_req(VuDev *vu_dev, int idx) VugDev *gdev; VusDev *vdev_scsi; VuVirtq *vq; + VuVirtqElement *elem = NULL; assert(vu_dev); gdev = container_of(vu_dev, VugDev, parent); vdev_scsi = container_of(gdev, VusDev, parent); - if (idx < 0 || idx >= VHOST_MAX_NR_VIRTQUEUE) { - g_warning("VQ Index out of range: %d", idx); - vus_panic_cb(vu_dev, NULL); - return; - } vq = vu_get_queue(vu_dev, idx); if (!vq) { @@ -247,7 +249,6 @@ static void vus_proc_req(VuDev *vu_dev, int idx) g_debug("Got kicked on vq[%d]@%p", idx, vq); while (1) { - VuVirtqElement *elem; VirtIOSCSICmdReq *req; VirtIOSCSICmdResp *rsp; @@ -287,6 +288,7 @@ static void vus_proc_req(VuDev *vu_dev, int idx) free(elem); } + free(elem); } static void vus_queue_set_started(VuDev *vu_dev, int idx, bool started) @@ -295,12 +297,6 @@ static void vus_queue_set_started(VuDev *vu_dev, int idx, bool started) assert(vu_dev); - if (idx < 0 || idx >= VHOST_MAX_NR_VIRTQUEUE) { - g_warning("VQ Index out of range: %d", idx); - vus_panic_cb(vu_dev, NULL); - return; - } - vq = vu_get_queue(vu_dev, idx); if (idx == 0 || idx == 1) { @@ -325,7 +321,7 @@ static int unix_sock_new(char *unix_fn) assert(unix_fn); sock = socket(AF_UNIX, SOCK_STREAM, 0); - if (sock <= 0) { + if (sock < 0) { perror("socket"); return -1; } @@ -355,34 +351,59 @@ fail: /** vhost-user-scsi **/ +static int opt_fdnum = -1; +static char *opt_socket_path; +static gboolean opt_print_caps; +static char *iscsi_uri; + +static GOptionEntry entries[] = { + { "print-capabilities", 'c', 0, G_OPTION_ARG_NONE, &opt_print_caps, + "Print capabilities", NULL }, + { "fd", 'f', 0, G_OPTION_ARG_INT, &opt_fdnum, + "Use inherited fd socket", "FDNUM" }, + { "iscsi-uri", 'i', 0, G_OPTION_ARG_FILENAME, &iscsi_uri, + "iSCSI URI to connect to", "FDNUM" }, + { "socket-path", 's', 0, G_OPTION_ARG_FILENAME, &opt_socket_path, + "Use UNIX socket path", "PATH" }, + { NULL, } +}; + int main(int argc, char **argv) { VusDev *vdev_scsi = NULL; - char *unix_fn = NULL; - char *iscsi_uri = NULL; - int lsock = -1, csock = -1, opt, err = EXIT_SUCCESS; - - while ((opt = getopt(argc, argv, "u:i:")) != -1) { - switch (opt) { - case 'h': - goto help; - case 'u': - unix_fn = g_strdup(optarg); - break; - case 'i': - iscsi_uri = g_strdup(optarg); - break; - default: - goto help; - } + int lsock = -1, csock = -1, err = EXIT_SUCCESS; + + GError *error = NULL; + GOptionContext *context; + + context = g_option_context_new(NULL); + g_option_context_add_main_entries(context, entries, NULL); + if (!g_option_context_parse(context, &argc, &argv, &error)) { + g_printerr("Option parsing failed: %s\n", error->message); + exit(EXIT_FAILURE); } - if (!unix_fn || !iscsi_uri) { + + if (opt_print_caps) { + g_print("{\n"); + g_print(" \"type\": \"scsi\"\n"); + g_print("}\n"); + goto out; + } + + if (!iscsi_uri) { goto help; } - lsock = unix_sock_new(unix_fn); - if (lsock < 0) { - goto err; + if (opt_socket_path) { + lsock = unix_sock_new(opt_socket_path); + if (lsock < 0) { + exit(EXIT_FAILURE); + } + } else if (opt_fdnum < 0) { + g_print("%s\n", g_option_context_get_help(context, true, NULL)); + exit(EXIT_FAILURE); + } else { + lsock = opt_fdnum; } csock = accept(lsock, NULL, NULL); @@ -398,7 +419,11 @@ int main(int argc, char **argv) goto err; } - vug_init(&vdev_scsi->parent, csock, vus_panic_cb, &vus_iface); + if (!vug_init(&vdev_scsi->parent, VHOST_USER_SCSI_MAX_QUEUES, csock, + vus_panic_cb, &vus_iface)) { + g_printerr("Failed to initialize libvhost-user-glib\n"); + goto err; + } g_main_loop_run(vdev_scsi->loop); @@ -408,15 +433,18 @@ out: if (vdev_scsi) { g_main_loop_unref(vdev_scsi->loop); g_free(vdev_scsi); - unlink(unix_fn); } if (csock >= 0) { close(csock); } if (lsock >= 0) { close(lsock); + + if (opt_socket_path) { + unlink(opt_socket_path); + } } - g_free(unix_fn); + g_free(opt_socket_path); g_free(iscsi_uri); return err; @@ -426,10 +454,12 @@ err: goto out; help: - fprintf(stderr, "Usage: %s [ -u unix_sock_path -i iscsi_uri ] | [ -h ]\n", + fprintf(stderr, "Usage: %s [ -s socket-path -i iscsi-uri -f fd -p print-capabilities ] | [ -h ]\n", argv[0]); - fprintf(stderr, " -u path to unix socket\n"); - fprintf(stderr, " -i iscsi uri for lun 0\n"); + fprintf(stderr, " -s, --socket-path=SOCKET_PATH path to unix socket\n"); + fprintf(stderr, " -i, --iscsi-uri=ISCSI_URI iscsi uri for lun 0\n"); + fprintf(stderr, " -f, --fd=FILE_DESCRIPTOR file-descriptor\n"); + fprintf(stderr, " -p, --print-capabilities=PRINT_CAPABILITIES denotes print-capabilities\n"); fprintf(stderr, " -h print help and quit\n"); goto err; |