/* $Id$ */ /* * NetBSD kernel wrapper for KQEMU */ /* * Based on Linux kernel wrapper for KQEMU * * Copyright (C) 2004-2007 Fabrice Bellard * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * version 2 as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include #include #include #include #include #include #include #include #include #if __NetBSD_Version__ >= 499001000 /* post-newlock2 */ #include #endif #include #include #include #include "kqemu-kernel.h" extern int kqemu_lkmentry(struct lkm_table *, int, int); MALLOC_DEFINE(M_KQEMU, "kqemu", "kqemu"); MALLOC_DECLARE(M_KQEMU); int kqemu_debug = 0; #define DPRINTF if (kqemu_debug) kqemu_log #define DPRINTF1 if (kqemu_debug>1) kqemu_log #define DPRINTF2 if (kqemu_debug>2) kqemu_log int lock_count = 0; int page_alloc_count = 0; int malloc_count = 0; int io_map_count = 0; /* Lock the page at virtual address 'user_addr' and return its physical address (page index). Return a host OS private user page identifier or NULL if error */ /* Use VA as the identifier (see FreeBSD and Windows) */ struct kqemu_user_page *CDECL kqemu_lock_user_page(unsigned long *ppage_index, unsigned long user_addr) { struct vm_map *map = &curproc->p_vmspace->vm_map; vaddr_t va = user_addr; paddr_t pa; int ret; DPRINTF1("kqemu_lock_user_page(%08lx)\n", user_addr); ret = uvm_map_pageable(map, va, va+PAGE_SIZE, FALSE, 0); if (ret != 0) { printf("kqemu_lock_user_page: uvm_map_pageable failed, " "pid %d, vm_map %p, va %08lx, errno %d\n", curproc->p_pid, map, va, ret); return NULL; } if (pmap_extract(vm_map_pmap(map), va, &pa) == FALSE) { printf("kqemu_lock_user_page: pmap_extract failed, " "pmap %p, va %08lx\n", vm_map_pmap(map), user_addr); return NULL; } *ppage_index = pa >> PAGE_SHIFT; lock_count++; return (struct kqemu_user_page *)va; } void CDECL kqemu_unlock_user_page(struct kqemu_user_page *page1) { struct vm_map *map = &curproc->p_vmspace->vm_map; vaddr_t va; int ret; DPRINTF1("kqemu_unlock_user_page(%08lx)\n", page1); va = (vaddr_t) page1; ret = uvm_map_pageable(map, va, va+PAGE_SIZE, TRUE, 0); if (ret != 0) { printf("kqemu_unlock_user_page: uvm_map_pageable failed, " "pid %d, vm_map %p, va %08lx, errno %d\n", curproc->p_pid, map, va, ret); } lock_count--; } /* Allocate a new page and return its physical address (page index). Return a host OS private page identifier or NULL if error */ struct kqemu_page *CDECL kqemu_alloc_zeroed_page(unsigned long *ppage_index) { vaddr_t va; paddr_t pa; va = uvm_km_alloc(kernel_map, PAGE_SIZE, PAGE_SIZE, UVM_KMF_WIRED|UVM_KMF_ZERO); if (va == 0) { printf("kqemu_alloc_zeroed_page: uvm_km_alloc failed.\n"); return NULL; } if (pmap_extract(pmap_kernel(), va, &pa) == FALSE) { printf("kqemu_alloc_zerod_page: pmap_extract failed, " "pmap %p, va %08lx\n", pmap_kernel(), va); uvm_km_free(kernel_map, va, PAGE_SIZE, UVM_KMF_WIRED); return NULL; } DPRINTF1("kqemu_alloc_zeroed_page: %08x\n", pa); *ppage_index = pa >> PAGE_SHIFT; page_alloc_count++; return (struct kqemu_page *)va; } void CDECL kqemu_free_page(struct kqemu_page *page) { DPRINTF1("kqemu_free_page(%p)\n", page); uvm_km_free(kernel_map, (vaddr_t) page, PAGE_SIZE, UVM_KMF_WIRED); page_alloc_count--; } /* Return a host kernel address of the physical page whose private identifier is 'page1' */ void * CDECL kqemu_page_kaddr(struct kqemu_page *page) { vaddr_t va = (vaddr_t)page; return (void *)va; } /* Allocate 'size' bytes of memory in host kernel address space (size is a multiple of 4 KB) and return the address or NULL if error. The allocated memory must be marked as executable by the host kernel and must be page aligned. On i386 with PAE (but not on x86_64), it must be allocated in the first 4 GB of physical memory. */ void * CDECL kqemu_vmalloc(unsigned int size) { void *ptr; int r, s; /* to avoid reuse of the allocated area, we waste some memory when we are allocating small amount. otherwise we have to reset the protection before it is freed, but at that time we do not know the size of the allocated area, which must be passed to uvm_map_protect(). */ if (size <= MAXALLOCSAVE) size = MAXALLOCSAVE + 1; MALLOC(ptr, void*, size, M_KQEMU, M_WAITOK); DPRINTF1("kqemu_vmalloc(%u) -> %p\n", size, ptr); s = splvm(); /* XXX? */ r = uvm_map_protect(kmem_map, (vaddr_t) ptr, (vaddr_t) ptr + size, UVM_PROT_ALL, TRUE); splx(s); if (r != 0) { FREE(ptr, M_KQEMU); printf("kqemu_vmalloc: uvm_map_protect failed, errno %d\n", r); return 0; } malloc_count++; return ptr; } void CDECL kqemu_vfree(void *ptr) { DPRINTF1("kqemu_vfree(%p)\n", ptr); FREE(ptr, M_KQEMU); malloc_count--; } /* Convert a page aligned address inside a memory area allocated by kqemu_vmalloc() to a physical address (page index) */ unsigned long kqemu_vmalloc_to_phys(const void *vaddr) { vaddr_t va = (vaddr_t) vaddr; paddr_t pa; if (pmap_extract(pmap_kernel(), va, &pa) == FALSE) { printf("kqemu_vmalloc_to_phys: pmap_extract failed, " "va %08lx\n", va); return -1; } DPRINTF1("kqemu_vmalloc_to_phys(%p)->%08x\n", vaddr, pa); return pa >> PAGE_SHIFT; } #if 1 /* Map a IO area in the kernel address space and return its address. Return NULL if error or not implemented. This function is only used if an APIC is detected on the host CPU. */ void * CDECL kqemu_io_map(unsigned long page_index, unsigned int size) { vsize_t rsize = roundup(size, PAGE_SIZE); vaddr_t va; voff_t offset; va = uvm_km_alloc(kernel_map, rsize, 0, UVM_KMF_VAONLY|UVM_KMF_CANFAIL|UVM_KMF_WAITVA); if (va == 0) { kqemu_log("uvm_km_alloc failed\n"); return NULL; } for (offset = 0; offset < rsize; offset += PAGE_SIZE) pmap_kenter_pa(va + offset, page_index++ << PAGE_SHIFT, VM_PROT_READ | VM_PROT_WRITE); io_map_count += rsize >> PAGE_SHIFT; return (void*) va; } /* Unmap the IO area */ void CDECL kqemu_io_unmap(void *ptr, unsigned int size) { vaddr_t va = (vaddr_t)ptr; vsize_t rsize = roundup(size, PAGE_SIZE); pmap_kremove(va, rsize); uvm_km_free(kernel_map, va, PAGE_SIZE, UVM_KMF_VAONLY); io_map_count -= rsize >> PAGE_SHIFT; } #else /* Map a IO area in the kernel address space and return its address. Return NULL if error or not implemented. This function is only used if an APIC is detected on the host CPU. */ void * CDECL kqemu_io_map(unsigned long page_index, unsigned int size) { return NULL; } /* Unmap the IO area */ void CDECL kqemu_io_unmap(void *ptr, unsigned int size) { } #endif /* return TRUE if a signal is pending (i.e. the guest must stop execution) */ int CDECL kqemu_schedule(void) { DPRINTF2("kqemu_schedule\n"); yield(); #if 1 #if __NetBSD_Version__ < 499001000 /* pre-newlock2 */ return CURSIG(curlwp)?TRUE:FALSE; #else return ((curlwp->l_flag & LW_PENDSIG) != 0); #endif #else return TRUE; #endif } static char log_buf[4096]; void CDECL kqemu_log(const char *fmt, ...) { va_list ap; va_start(ap, fmt); vsnprintf(log_buf, sizeof(log_buf), fmt, ap); printf("kqemu: %s", log_buf); va_end(ap); } /*********************************************************/ static struct kqemu_global_state *kqemu_gs; struct kqemu_instance { #if __NetBSD_Version__ < 499001000 /* pre-newlock2 */ /* NetBSD <= 4.x: giant lock */ #define mutex_enter(a) do {} while (/* CONSTCOND */0) #define mutex_exit(a) do {} while (/* CONSTCOND */0) #else kmutex_t mutex; #endif struct kqemu_state *state; }; int kqemu_refcnt = 0; static const struct fileops kqemu_fileops; static int kqemu_open(dev_t dev, int flags, int ifmt, struct lwp *l) { int r, fd; struct file *filep; struct kqemu_instance *ks; DPRINTF("kqemu_open: entry\n"); #if __NetBSD_Version__ < 499005600 r = falloc(l, &filep, &fd); if (r != 0) { printf("kqemu_open: falloc failed, errno %d\n", r); return r; } #else r = fd_allocfile(&filep, &fd); if (r != 0) { printf("kqemu_open: fd_allocfile failed, errno %d\n", r); return r; } #endif MALLOC(ks, struct kqemu_instance*, sizeof(*ks), M_KQEMU, M_WAITOK); memset(ks, 0, sizeof(*ks)); mutex_init(&ks->mutex, MUTEX_DEFAULT, IPL_NONE); kqemu_refcnt++; /* fdclone never fails */ #if __NetBSD_Version__ < 499005600 return fdclone(l, filep, fd, flags, &kqemu_fileops, ks); #else return fd_clone(filep, fd, flags, &kqemu_fileops, ks); #endif } static struct cdevsw kqemu_cdevsw = { kqemu_open, noclose, noread, nowrite, noioctl, nostop, notty, nopoll, nommap, nokqfilter, D_OTHER, }; #if __NetBSD_Version__ < 499005600 static int kqemu_fops_close(struct file *filep, struct lwp *l) #else static int kqemu_fops_close(struct file *filep) #endif { struct kqemu_instance *ks = filep->f_data; DPRINTF("kqemu_fops_close: entry\n"); if (ks == 0) { printf("kqemu_fops_close: NULL f_data\n"); return ENODEV; } mutex_enter(&ks->mutex); if (ks->state) { kqemu_delete(ks->state); ks->state = NULL; } mutex_exit(&ks->mutex); FREE(ks, M_KQEMU); kqemu_refcnt--; DPRINTF("lock_count=%d, page_alloc_count=%d, io_map_count=%d\n", lock_count, page_alloc_count, malloc_count, io_map_count); return 0; } #if __NetBSD_Version__ < 499003100 static int fbadop_read(struct file *a, off_t *b, struct uio *c, kauth_cred_t d, int e) { return ENODEV; } static int fbadop_write(struct file *a, off_t *b, struct uio *c, kauth_cred_t d, int e) { return ENODEV; } #endif #if __NetBSD_Version__ < 499005600 static int kqemu_fops_ioctl(struct file *filep, u_long cmd, void *data, struct lwp *l) #else static int kqemu_fops_ioctl(struct file *filep, u_long cmd, void *data) #endif { struct kqemu_instance *ks = filep->f_data; struct kqemu_state *s; int ret; if (ks == 0) { printf("kqemu_fops_close: NULL f_data\n"); return EIO; } s = ks->state; mutex_enter(&ks->mutex); switch (cmd) { case KQEMU_INIT: { struct kqemu_init d1, *d = &d1, *arg = data; DPRINTF("kqemu_fops_ioctl: cmd KQEMU_INIT\n"); if (s != NULL) { ret = EIO; break; } d1 = *arg; s = kqemu_init(d, kqemu_gs); if (!s) { ret = ENOMEM; break; } ks->state = s; ret = 0; break; } case KQEMU_EXEC: { struct kqemu_cpu_state *ctx, *arg = data; // int hold_count DPRINTF2("kqemu_fops_ioctl: cmd KQEMU_EXEC\n"); if (!s) { ret = EIO; break; } ctx = kqemu_get_cpu_state(s); *ctx = *arg; /* we are leaving kernel code, thus we can release the lock, right? */ /* no, kqemu_exec may call kqemu_alloc_zeroed_page() etc. since our vm code is not yet smp-safe, we need the lock */ /* or we could acquire the lock in those callbacks. */ // hold_count = KERNEL_LOCK_RELEASE_ALL(); ret = kqemu_exec(s); // KERNEL_LOCK_ACQUIRE_COUNT(hold_count); if (ctx->retval == KQEMU_RET_SYSCALL) { DPRINTF2("kqemu_exec: syscall\n"); } else if ((ctx->retval & 0xff00) == KQEMU_RET_INT) { DPRINTF2("kqemu_exec: interrupt 0x%02x\n", ctx->retval & 0xff); } else if ((ctx->retval & 0xff00) == KQEMU_RET_EXCEPTION) { DPRINTF2("kqemu_exec: exception 0x%02x\n", ctx->retval & 0xff); } else if (ctx->retval == KQEMU_RET_INTR) { #if __NetBSD_Version__ < 499001000 /* pre-newlock2 */ DPRINTF2("kqemu_exec: signal %d\n", CURSIG(curlwp)); #else DPRINTF2("kqemu_exec: signal\n"); #endif } else if (ctx->retval == KQEMU_RET_SOFTMMU) { DPRINTF2("kqemu_exec: soft MMU\n"); } else if (ctx->retval == KQEMU_RET_ABORT) { DPRINTF2("kqemu_exec: abort\n"); } *arg = *ctx; break; } case KQEMU_GET_VERSION: DPRINTF("kqemu_fops_ioctl: cmd KQEMU_GET_VERSION\n"); *(int *)data = KQEMU_VERSION; ret = 0; break; default: DPRINTF("kqemu_fops_ioctl: unknown cmd\n"); ret = EINVAL; break; } mutex_exit(&ks->mutex); return ret; } static const struct fileops kqemu_fileops = { fbadop_read, fbadop_write, kqemu_fops_ioctl, fnullop_fcntl, fnullop_poll, fbadop_stat, kqemu_fops_close, fnullop_kqfilter, }; MOD_DEV("kqemu", "kqemu", NULL, -1, &kqemu_cdevsw, -1); static int sysctl_num = -1; static int kqemu_attach(struct lkm_table *lkmtp, int cmd) { int max_locked_pages; const struct sysctlnode *node; int ret = 0; char buf[9]; printf("QEMU Accelerator Module version %d.%d.%d, " "Copyright (c) 2005-2007 Fabrice Bellard\n", (KQEMU_VERSION >> 16), (KQEMU_VERSION >> 8) & 0xff, (KQEMU_VERSION) & 0xff); max_locked_pages = physmem / 2; kqemu_gs = kqemu_global_init(max_locked_pages); if (!kqemu_gs) return ENOMEM; ret = sysctl_createv(NULL, 0, NULL, &node, CTLFLAG_READWRITE, CTLTYPE_INT, "kqemu", SYSCTL_DESCR("kqemu debug level"), NULL, 0, &kqemu_debug, 0, CTL_DEBUG, CTL_CREATE, CTL_EOL); if (ret != 0) printf("sysctl_kqemu_setup: sysctl_createv failed, errno %d\n", ret); else sysctl_num = node->sysctl_num; format_bytes(buf, sizeof(buf), max_locked_pages * PAGE_SIZE); printf("KQEMU installed, max_locked_mem=%s, major=%d.\n", buf, cdevsw_lookup_major(&kqemu_cdevsw)); return 0; } static int kqemu_detach(struct lkm_table *lkmtp, int cmd) { if (kqemu_refcnt != 0) { DPRINTF("kqemu_detach: refcnt %d\n", kqemu_refcnt); return EBUSY; } if (kqemu_gs) { kqemu_global_delete(kqemu_gs); kqemu_gs = NULL; } if (lock_count != 0 || page_alloc_count != 0 || io_map_count != 0 || malloc_count != 0) { printf("kqemu_detach: lock_count=%d, page_alloc_count=%d, " "io_map_count=%d, malloc_count=%d\n", lock_count, page_alloc_count, io_map_count, malloc_count); /* return EBUSY; */ } if (sysctl_num >= 0) sysctl_destroyv(NULL, CTL_DEBUG, sysctl_num, CTL_EOL); return 0; } static int kqemu_stat(struct lkm_table *lkmtp, int cmd) /* XXX */ { DPRINTF("lock_count=%d, page_alloc_count=%d, " "io_map_count=%d, malloc_count=%d\n", lock_count, page_alloc_count, io_map_count, malloc_count); return 0; } int kqemu_lkmentry(struct lkm_table *lkmtp, int cmd, int ver) { LKM_DISPATCH(lkmtp, cmd, NULL, kqemu_attach, kqemu_detach, kqemu_stat); }