diff options
Diffstat (limited to 'arch/um/kernel')
-rw-r--r-- | arch/um/kernel/irq.c | 534 | ||||
-rw-r--r-- | arch/um/kernel/process.c | 29 | ||||
-rw-r--r-- | arch/um/kernel/sigio.c | 2 | ||||
-rw-r--r-- | arch/um/kernel/skas/clone.c | 2 | ||||
-rw-r--r-- | arch/um/kernel/time.c | 97 | ||||
-rw-r--r-- | arch/um/kernel/tlb.c | 54 | ||||
-rw-r--r-- | arch/um/kernel/um_arch.c | 67 |
7 files changed, 502 insertions, 283 deletions
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index 3577118bb4a5..3741d2380060 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c @@ -19,27 +19,40 @@ #include <kern_util.h> #include <os.h> #include <irq_user.h> +#include <irq_kern.h> +#include <as-layout.h> extern void free_irqs(void); /* When epoll triggers we do not know why it did so * we can also have different IRQs for read and write. - * This is why we keep a small irq_fd array for each fd - + * This is why we keep a small irq_reg array for each fd - * one entry per IRQ type */ +struct irq_reg { + void *id; + int irq; + /* it's cheaper to store this than to query it */ + int events; + bool active; + bool pending; + bool wakeup; +}; struct irq_entry { - struct irq_entry *next; + struct list_head list; int fd; - struct irq_fd *irq_array[MAX_IRQ_TYPE + 1]; + struct irq_reg reg[NUM_IRQ_TYPES]; + bool suspended; + bool sigio_workaround; }; -static struct irq_entry *active_fds; - static DEFINE_SPINLOCK(irq_lock); +static LIST_HEAD(active_fds); +static DECLARE_BITMAP(irqs_allocated, NR_IRQS); -static void irq_io_loop(struct irq_fd *irq, struct uml_pt_regs *regs) +static void irq_io_loop(struct irq_reg *irq, struct uml_pt_regs *regs) { /* * irq->active guards against reentry @@ -49,23 +62,27 @@ static void irq_io_loop(struct irq_fd *irq, struct uml_pt_regs *regs) */ if (irq->active) { irq->active = false; + do { irq->pending = false; do_IRQ(irq->irq, regs); - } while (irq->pending && (!irq->purge)); - if (!irq->purge) - irq->active = true; + } while (irq->pending); + + irq->active = true; } else { irq->pending = true; } } +void sigio_handler_suspend(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) +{ + /* nothing */ +} + void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) { struct irq_entry *irq_entry; - struct irq_fd *irq; - - int n, i, j; + int n, i; while (1) { /* This is now lockless - epoll keeps back-referencesto the irqs @@ -84,21 +101,18 @@ void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) } for (i = 0; i < n ; i++) { - /* Epoll back reference is the entry with 3 irq_fd - * leaves - one for each irq type. - */ - irq_entry = (struct irq_entry *) - os_epoll_get_data_pointer(i); - for (j = 0; j < MAX_IRQ_TYPE ; j++) { - irq = irq_entry->irq_array[j]; - if (irq == NULL) + enum um_irq_type t; + + irq_entry = os_epoll_get_data_pointer(i); + + for (t = 0; t < NUM_IRQ_TYPES; t++) { + int events = irq_entry->reg[t].events; + + if (!events) continue; - if (os_epoll_triggered(i, irq->events) > 0) - irq_io_loop(irq, regs); - if (irq->purge) { - irq_entry->irq_array[j] = NULL; - kfree(irq); - } + + if (os_epoll_triggered(i, events) > 0) + irq_io_loop(&irq_entry->reg[t], regs); } } } @@ -106,32 +120,59 @@ void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) free_irqs(); } -static int assign_epoll_events_to_irq(struct irq_entry *irq_entry) +static struct irq_entry *get_irq_entry_by_fd(int fd) { - int i; - int events = 0; - struct irq_fd *irq; + struct irq_entry *walk; - for (i = 0; i < MAX_IRQ_TYPE ; i++) { - irq = irq_entry->irq_array[i]; - if (irq != NULL) - events = irq->events | events; - } - if (events > 0) { - /* os_add_epoll will call os_mod_epoll if this already exists */ - return os_add_epoll_fd(events, irq_entry->fd, irq_entry); + lockdep_assert_held(&irq_lock); + + list_for_each_entry(walk, &active_fds, list) { + if (walk->fd == fd) + return walk; } - /* No events - delete */ - return os_del_epoll_fd(irq_entry->fd); + + return NULL; } +static void free_irq_entry(struct irq_entry *to_free, bool remove) +{ + if (!to_free) + return; + + if (remove) + os_del_epoll_fd(to_free->fd); + list_del(&to_free->list); + kfree(to_free); +} + +static bool update_irq_entry(struct irq_entry *entry) +{ + enum um_irq_type i; + int events = 0; + + for (i = 0; i < NUM_IRQ_TYPES; i++) + events |= entry->reg[i].events; + + if (events) { + /* will modify (instead of add) if needed */ + os_add_epoll_fd(events, entry->fd, entry); + return true; + } + + os_del_epoll_fd(entry->fd); + return false; +} +static void update_or_free_irq_entry(struct irq_entry *entry) +{ + if (!update_irq_entry(entry)) + free_irq_entry(entry, false); +} -static int activate_fd(int irq, int fd, int type, void *dev_id) +static int activate_fd(int irq, int fd, enum um_irq_type type, void *dev_id) { - struct irq_fd *new_fd; struct irq_entry *irq_entry; - int i, err, events; + int err, events = os_event_mask(type); unsigned long flags; err = os_set_fd_async(fd); @@ -139,73 +180,34 @@ static int activate_fd(int irq, int fd, int type, void *dev_id) goto out; spin_lock_irqsave(&irq_lock, flags); - - /* Check if we have an entry for this fd */ - - err = -EBUSY; - for (irq_entry = active_fds; - irq_entry != NULL; irq_entry = irq_entry->next) { - if (irq_entry->fd == fd) - break; - } - - if (irq_entry == NULL) { - /* This needs to be atomic as it may be called from an - * IRQ context. - */ - irq_entry = kmalloc(sizeof(struct irq_entry), GFP_ATOMIC); - if (irq_entry == NULL) { - printk(KERN_ERR - "Failed to allocate new IRQ entry\n"); + irq_entry = get_irq_entry_by_fd(fd); + if (irq_entry) { + /* cannot register the same FD twice with the same type */ + if (WARN_ON(irq_entry->reg[type].events)) { + err = -EALREADY; goto out_unlock; } - irq_entry->fd = fd; - for (i = 0; i < MAX_IRQ_TYPE; i++) - irq_entry->irq_array[i] = NULL; - irq_entry->next = active_fds; - active_fds = irq_entry; - } - - /* Check if we are trying to re-register an interrupt for a - * particular fd - */ - if (irq_entry->irq_array[type] != NULL) { - printk(KERN_ERR - "Trying to reregister IRQ %d FD %d TYPE %d ID %p\n", - irq, fd, type, dev_id - ); - goto out_unlock; + /* temporarily disable to avoid IRQ-side locking */ + os_del_epoll_fd(fd); } else { - /* New entry for this fd */ - - err = -ENOMEM; - new_fd = kmalloc(sizeof(struct irq_fd), GFP_ATOMIC); - if (new_fd == NULL) + irq_entry = kzalloc(sizeof(*irq_entry), GFP_ATOMIC); + if (!irq_entry) { + err = -ENOMEM; goto out_unlock; - - events = os_event_mask(type); - - *new_fd = ((struct irq_fd) { - .id = dev_id, - .irq = irq, - .type = type, - .events = events, - .active = true, - .pending = false, - .purge = false - }); - /* Turn off any IO on this fd - allows us to - * avoid locking the IRQ loop - */ - os_del_epoll_fd(irq_entry->fd); - irq_entry->irq_array[type] = new_fd; + } + irq_entry->fd = fd; + list_add_tail(&irq_entry->list, &active_fds); + maybe_sigio_broken(fd); } - /* Turn back IO on with the correct (new) IO event mask */ - assign_epoll_events_to_irq(irq_entry); + irq_entry->reg[type].id = dev_id; + irq_entry->reg[type].irq = irq; + irq_entry->reg[type].active = true; + irq_entry->reg[type].events = events; + + WARN_ON(!update_irq_entry(irq_entry)); spin_unlock_irqrestore(&irq_lock, flags); - maybe_sigio_broken(fd, (type != IRQ_NONE)); return 0; out_unlock: @@ -215,104 +217,10 @@ out: } /* - * Walk the IRQ list and dispose of any unused entries. - * Should be done under irq_lock. + * Remove the entry or entries for a specific FD, if you + * don't want to remove all the possible entries then use + * um_free_irq() or deactivate_fd() instead. */ - -static void garbage_collect_irq_entries(void) -{ - int i; - bool reap; - struct irq_entry *walk; - struct irq_entry *previous = NULL; - struct irq_entry *to_free; - - if (active_fds == NULL) - return; - walk = active_fds; - while (walk != NULL) { - reap = true; - for (i = 0; i < MAX_IRQ_TYPE ; i++) { - if (walk->irq_array[i] != NULL) { - reap = false; - break; - } - } - if (reap) { - if (previous == NULL) - active_fds = walk->next; - else - previous->next = walk->next; - to_free = walk; - } else { - to_free = NULL; - } - walk = walk->next; - kfree(to_free); - } -} - -/* - * Walk the IRQ list and get the descriptor for our FD - */ - -static struct irq_entry *get_irq_entry_by_fd(int fd) -{ - struct irq_entry *walk = active_fds; - - while (walk != NULL) { - if (walk->fd == fd) - return walk; - walk = walk->next; - } - return NULL; -} - - -/* - * Walk the IRQ list and dispose of an entry for a specific - * device, fd and number. Note - if sharing an IRQ for read - * and writefor the same FD it will be disposed in either case. - * If this behaviour is undesirable use different IRQ ids. - */ - -#define IGNORE_IRQ 1 -#define IGNORE_DEV (1<<1) - -static void do_free_by_irq_and_dev( - struct irq_entry *irq_entry, - unsigned int irq, - void *dev, - int flags -) -{ - int i; - struct irq_fd *to_free; - - for (i = 0; i < MAX_IRQ_TYPE ; i++) { - if (irq_entry->irq_array[i] != NULL) { - if ( - ((flags & IGNORE_IRQ) || - (irq_entry->irq_array[i]->irq == irq)) && - ((flags & IGNORE_DEV) || - (irq_entry->irq_array[i]->id == dev)) - ) { - /* Turn off any IO on this fd - allows us to - * avoid locking the IRQ loop - */ - os_del_epoll_fd(irq_entry->fd); - to_free = irq_entry->irq_array[i]; - irq_entry->irq_array[i] = NULL; - assign_epoll_events_to_irq(irq_entry); - if (to_free->active) - to_free->purge = true; - else - kfree(to_free); - } - } - } -} - void free_irq_by_fd(int fd) { struct irq_entry *to_free; @@ -320,58 +228,64 @@ void free_irq_by_fd(int fd) spin_lock_irqsave(&irq_lock, flags); to_free = get_irq_entry_by_fd(fd); - if (to_free != NULL) { - do_free_by_irq_and_dev( - to_free, - -1, - NULL, - IGNORE_IRQ | IGNORE_DEV - ); - } - garbage_collect_irq_entries(); + free_irq_entry(to_free, true); spin_unlock_irqrestore(&irq_lock, flags); } EXPORT_SYMBOL(free_irq_by_fd); static void free_irq_by_irq_and_dev(unsigned int irq, void *dev) { - struct irq_entry *to_free; + struct irq_entry *entry; unsigned long flags; spin_lock_irqsave(&irq_lock, flags); - to_free = active_fds; - while (to_free != NULL) { - do_free_by_irq_and_dev( - to_free, - irq, - dev, - 0 - ); - to_free = to_free->next; + list_for_each_entry(entry, &active_fds, list) { + enum um_irq_type i; + + for (i = 0; i < NUM_IRQ_TYPES; i++) { + struct irq_reg *reg = &entry->reg[i]; + + if (!reg->events) + continue; + if (reg->irq != irq) + continue; + if (reg->id != dev) + continue; + + os_del_epoll_fd(entry->fd); + reg->events = 0; + update_or_free_irq_entry(entry); + goto out; + } } - garbage_collect_irq_entries(); +out: spin_unlock_irqrestore(&irq_lock, flags); } - void deactivate_fd(int fd, int irqnum) { - struct irq_entry *to_free; + struct irq_entry *entry; unsigned long flags; + enum um_irq_type i; os_del_epoll_fd(fd); + spin_lock_irqsave(&irq_lock, flags); - to_free = get_irq_entry_by_fd(fd); - if (to_free != NULL) { - do_free_by_irq_and_dev( - to_free, - irqnum, - NULL, - IGNORE_DEV - ); + entry = get_irq_entry_by_fd(fd); + if (!entry) + goto out; + + for (i = 0; i < NUM_IRQ_TYPES; i++) { + if (!entry->reg[i].events) + continue; + if (entry->reg[i].irq == irqnum) + entry->reg[i].events = 0; } - garbage_collect_irq_entries(); + + update_or_free_irq_entry(entry); +out: spin_unlock_irqrestore(&irq_lock, flags); + ignore_sigio_fd(fd); } EXPORT_SYMBOL(deactivate_fd); @@ -384,24 +298,17 @@ EXPORT_SYMBOL(deactivate_fd); */ int deactivate_all_fds(void) { - struct irq_entry *to_free; + struct irq_entry *entry; /* Stop IO. The IRQ loop has no lock so this is our * only way of making sure we are safe to dispose * of all IRQ handlers */ os_set_ioignore(); - to_free = active_fds; - while (to_free != NULL) { - do_free_by_irq_and_dev( - to_free, - -1, - NULL, - IGNORE_IRQ | IGNORE_DEV - ); - to_free = to_free->next; - } - /* don't garbage collect - we can no longer call kfree() here */ + + /* we can no longer call kfree() here so just deactivate */ + list_for_each_entry(entry, &active_fds, list) + os_del_epoll_fd(entry->fd); os_close_epoll_fd(); return 0; } @@ -421,31 +328,146 @@ unsigned int do_IRQ(int irq, struct uml_pt_regs *regs) return 1; } -void um_free_irq(unsigned int irq, void *dev) +void um_free_irq(int irq, void *dev) { + if (WARN(irq < 0 || irq > NR_IRQS, "freeing invalid irq %d", irq)) + return; + free_irq_by_irq_and_dev(irq, dev); free_irq(irq, dev); + clear_bit(irq, irqs_allocated); } EXPORT_SYMBOL(um_free_irq); -int um_request_irq(unsigned int irq, int fd, int type, - irq_handler_t handler, - unsigned long irqflags, const char * devname, - void *dev_id) +int um_request_irq(int irq, int fd, enum um_irq_type type, + irq_handler_t handler, unsigned long irqflags, + const char *devname, void *dev_id) { int err; + if (irq == UM_IRQ_ALLOC) { + int i; + + for (i = UM_FIRST_DYN_IRQ; i < NR_IRQS; i++) { + if (!test_and_set_bit(i, irqs_allocated)) { + irq = i; + break; + } + } + } + + if (irq < 0) + return -ENOSPC; + if (fd != -1) { err = activate_fd(irq, fd, type, dev_id); if (err) - return err; + goto error; } - return request_irq(irq, handler, irqflags, devname, dev_id); -} + err = request_irq(irq, handler, irqflags, devname, dev_id); + if (err < 0) + goto error; + return irq; +error: + clear_bit(irq, irqs_allocated); + return err; +} EXPORT_SYMBOL(um_request_irq); +#ifdef CONFIG_PM_SLEEP +void um_irqs_suspend(void) +{ + struct irq_entry *entry; + unsigned long flags; + + sig_info[SIGIO] = sigio_handler_suspend; + + spin_lock_irqsave(&irq_lock, flags); + list_for_each_entry(entry, &active_fds, list) { + enum um_irq_type t; + bool wake = false; + + for (t = 0; t < NUM_IRQ_TYPES; t++) { + if (!entry->reg[t].events) + continue; + + /* + * For the SIGIO_WRITE_IRQ, which is used to handle the + * SIGIO workaround thread, we need special handling: + * enable wake for it itself, but below we tell it about + * any FDs that should be suspended. + */ + if (entry->reg[t].wakeup || + entry->reg[t].irq == SIGIO_WRITE_IRQ) { + wake = true; + break; + } + } + + if (!wake) { + entry->suspended = true; + os_clear_fd_async(entry->fd); + entry->sigio_workaround = + !__ignore_sigio_fd(entry->fd); + } + } + spin_unlock_irqrestore(&irq_lock, flags); +} + +void um_irqs_resume(void) +{ + struct irq_entry *entry; + unsigned long flags; + + spin_lock_irqsave(&irq_lock, flags); + list_for_each_entry(entry, &active_fds, list) { + if (entry->suspended) { + int err = os_set_fd_async(entry->fd); + + WARN(err < 0, "os_set_fd_async returned %d\n", err); + entry->suspended = false; + + if (entry->sigio_workaround) { + err = __add_sigio_fd(entry->fd); + WARN(err < 0, "add_sigio_returned %d\n", err); + } + } + } + spin_unlock_irqrestore(&irq_lock, flags); + + sig_info[SIGIO] = sigio_handler; + send_sigio_to_self(); +} + +static int normal_irq_set_wake(struct irq_data *d, unsigned int on) +{ + struct irq_entry *entry; + unsigned long flags; + + spin_lock_irqsave(&irq_lock, flags); + list_for_each_entry(entry, &active_fds, list) { + enum um_irq_type t; + + for (t = 0; t < NUM_IRQ_TYPES; t++) { + if (!entry->reg[t].events) + continue; + + if (entry->reg[t].irq != d->irq) + continue; + entry->reg[t].wakeup = on; + goto unlock; + } + } +unlock: + spin_unlock_irqrestore(&irq_lock, flags); + return 0; +} +#else +#define normal_irq_set_wake NULL +#endif + /* * irq_chip must define at least enable/disable and ack when * the edge handler is used. @@ -454,7 +476,7 @@ static void dummy(struct irq_data *d) { } -/* This is used for everything else than the timer. */ +/* This is used for everything other than the timer. */ static struct irq_chip normal_irq_type = { .name = "SIGIO", .irq_disable = dummy, @@ -462,10 +484,11 @@ static struct irq_chip normal_irq_type = { .irq_ack = dummy, .irq_mask = dummy, .irq_unmask = dummy, + .irq_set_wake = normal_irq_set_wake, }; -static struct irq_chip SIGVTALRM_irq_type = { - .name = "SIGVTALRM", +static struct irq_chip alarm_irq_type = { + .name = "SIGALRM", .irq_disable = dummy, .irq_enable = dummy, .irq_ack = dummy, @@ -477,10 +500,9 @@ void __init init_IRQ(void) { int i; - irq_set_chip_and_handler(TIMER_IRQ, &SIGVTALRM_irq_type, handle_edge_irq); - + irq_set_chip_and_handler(TIMER_IRQ, &alarm_irq_type, handle_edge_irq); - for (i = 1; i <= LAST_IRQ; i++) + for (i = 1; i < NR_IRQS; i++) irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq); /* Initialize EPOLL Loop */ os_setup_epoll(); diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 3bed09538dd9..2a986ece5478 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -32,6 +32,7 @@ #include <os.h> #include <skas.h> #include <linux/time-internal.h> +#include <asm/set_memory.h> /* * This is a per-cpu array. A processor only modifies its entry and it only @@ -62,16 +63,18 @@ void free_stack(unsigned long stack, int order) free_pages(stack, order); } -unsigned long alloc_stack(int order, int atomic) +unsigned long alloc_stack(int atomic) { - unsigned long page; + unsigned long addr; gfp_t flags = GFP_KERNEL; if (atomic) flags = GFP_ATOMIC; - page = __get_free_pages(flags, order); + addr = __get_free_pages(flags, 1); - return page; + set_memory_ro(addr, 1); + + return addr + PAGE_SIZE; } static inline void set_current(struct task_struct *task) @@ -99,7 +102,8 @@ void interrupt_end(void) if (need_resched()) schedule(); - if (test_thread_flag(TIF_SIGPENDING)) + if (test_thread_flag(TIF_SIGPENDING) || + test_thread_flag(TIF_NOTIFY_SIGNAL)) do_signal(regs); if (test_thread_flag(TIF_NOTIFY_RESUME)) tracehook_notify_resume(regs); @@ -202,22 +206,19 @@ void initial_thread_cb(void (*proc)(void *), void *arg) kmalloc_ok = save_kmalloc_ok; } -static void um_idle_sleep(void) +void um_idle_sleep(void) { - unsigned long long duration = UM_NSEC_PER_SEC; - - if (time_travel_mode != TT_MODE_OFF) { - time_travel_sleep(duration); - } else { - os_idle_sleep(duration); - } + if (time_travel_mode != TT_MODE_OFF) + time_travel_sleep(); + else + os_idle_sleep(); } void arch_cpu_idle(void) { cpu_tasks[current_thread_info()->cpu].pid = os_getpid(); um_idle_sleep(); - local_irq_enable(); + raw_local_irq_enable(); } int __cant_sleep(void) { diff --git a/arch/um/kernel/sigio.c b/arch/um/kernel/sigio.c index d1cffc2a7f21..5085a50c3b8c 100644 --- a/arch/um/kernel/sigio.c +++ b/arch/um/kernel/sigio.c @@ -25,7 +25,7 @@ int write_sigio_irq(int fd) err = um_request_irq(SIGIO_WRITE_IRQ, fd, IRQ_READ, sigio_interrupt, 0, "write sigio", NULL); - if (err) { + if (err < 0) { printk(KERN_ERR "write_sigio_irq : um_request_irq failed, " "err = %d\n", err); return -1; diff --git a/arch/um/kernel/skas/clone.c b/arch/um/kernel/skas/clone.c index 95c355181dcd..bfb70c456b30 100644 --- a/arch/um/kernel/skas/clone.c +++ b/arch/um/kernel/skas/clone.c @@ -21,7 +21,7 @@ * on some systems. */ -void __section(".__syscall_stub") +void __attribute__ ((__section__ (".__syscall_stub"))) stub_clone_handler(void) { struct stub_data *data = (struct stub_data *) STUB_DATA; diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index 3d109ff3309b..f4db89b5b5a6 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -31,6 +31,7 @@ static bool time_travel_start_set; static unsigned long long time_travel_start; static unsigned long long time_travel_time; static LIST_HEAD(time_travel_events); +static LIST_HEAD(time_travel_irqs); static unsigned long long time_travel_timer_interval; static unsigned long long time_travel_next_event; static struct time_travel_event time_travel_timer_event; @@ -46,6 +47,9 @@ static void time_travel_set_time(unsigned long long ns) if (unlikely(ns < time_travel_time)) panic("time-travel: time goes backwards %lld -> %lld\n", time_travel_time, ns); + else if (unlikely(ns >= S64_MAX)) + panic("The system was going to sleep forever, aborting"); + time_travel_time = ns; } @@ -180,6 +184,14 @@ static void time_travel_ext_update_request(unsigned long long time) time == time_travel_ext_prev_request) return; + /* + * if we're running and are allowed to run past the request + * then we don't need to update it either + */ + if (!time_travel_ext_waiting && time_travel_ext_free_until_valid && + time < time_travel_ext_free_until) + return; + time_travel_ext_prev_request = time; time_travel_ext_prev_request_valid = true; time_travel_ext_req(UM_TIMETRAVEL_REQUEST, time); @@ -187,7 +199,13 @@ static void time_travel_ext_update_request(unsigned long long time) void __time_travel_propagate_time(void) { + static unsigned long long last_propagated; + + if (last_propagated == time_travel_time) + return; + time_travel_ext_req(UM_TIMETRAVEL_UPDATE, time_travel_time); + last_propagated = time_travel_time; } EXPORT_SYMBOL_GPL(__time_travel_propagate_time); @@ -214,6 +232,7 @@ static void time_travel_ext_wait(bool idle) }; time_travel_ext_prev_request_valid = false; + time_travel_ext_free_until_valid = false; time_travel_ext_waiting++; time_travel_ext_req(UM_TIMETRAVEL_WAIT, -1); @@ -260,11 +279,6 @@ static void __time_travel_add_event(struct time_travel_event *e, struct time_travel_event *tmp; bool inserted = false; - if (WARN(time_travel_mode == TT_MODE_BASIC && - e != &time_travel_timer_event, - "only timer events can be handled in basic mode")) - return; - if (e->pending) return; @@ -311,6 +325,35 @@ void time_travel_periodic_timer(struct time_travel_event *e) deliver_alarm(); } +void deliver_time_travel_irqs(void) +{ + struct time_travel_event *e; + unsigned long flags; + + /* + * Don't do anything for most cases. Note that because here we have + * to disable IRQs (and re-enable later) we'll actually recurse at + * the end of the function, so this is strictly necessary. + */ + if (likely(list_empty(&time_travel_irqs))) + return; + + local_irq_save(flags); + irq_enter(); + while ((e = list_first_entry_or_null(&time_travel_irqs, + struct time_travel_event, + list))) { + WARN(e->time != time_travel_time, + "time moved from %lld to %lld before IRQ delivery\n", + time_travel_time, e->time); + list_del(&e->list); + e->pending = false; + e->fn(e); + } + irq_exit(); + local_irq_restore(flags); +} + static void time_travel_deliver_event(struct time_travel_event *e) { if (e == &time_travel_timer_event) { @@ -319,6 +362,14 @@ static void time_travel_deliver_event(struct time_travel_event *e) * by itself, so must handle it specially here */ e->fn(e); + } else if (irqs_disabled()) { + list_add_tail(&e->list, &time_travel_irqs); + /* + * set pending again, it was set to false when the + * event was deleted from the original list, but + * now it's still pending until we deliver the IRQ. + */ + e->pending = true; } else { unsigned long flags; @@ -404,9 +455,14 @@ static void time_travel_oneshot_timer(struct time_travel_event *e) deliver_alarm(); } -void time_travel_sleep(unsigned long long duration) +void time_travel_sleep(void) { - unsigned long long next = time_travel_time + duration; + /* + * Wait "forever" (using S64_MAX because there are some potential + * wrapping issues, especially with the current TT_MODE_EXTERNAL + * controller application. + */ + unsigned long long next = S64_MAX; if (time_travel_mode == TT_MODE_BASIC) os_timer_disable(); @@ -483,6 +539,7 @@ invalid_number: #define time_travel_start_set 0 #define time_travel_start 0 #define time_travel_time 0 +#define time_travel_ext_waiting 0 static inline void time_travel_update_time(unsigned long long ns, bool retearly) { @@ -628,7 +685,8 @@ static u64 timer_read(struct clocksource *cs) * "what do I do next" and onstack event we use to know when * to return from time_travel_update_time(). */ - if (!irqs_disabled() && !in_interrupt() && !in_softirq()) + if (!irqs_disabled() && !in_interrupt() && !in_softirq() && + !time_travel_ext_waiting) time_travel_update_time(time_travel_time + TIMER_MULTIPLIER, false); @@ -673,10 +731,8 @@ void read_persistent_clock64(struct timespec64 *ts) { long long nsecs; - if (time_travel_start_set) + if (time_travel_mode != TT_MODE_OFF) nsecs = time_travel_start + time_travel_time; - else if (time_travel_mode == TT_MODE_EXTERNAL) - nsecs = time_travel_ext_req(UM_TIMETRAVEL_GET_TOD, -1); else nsecs = os_persistent_clock_emulation(); @@ -686,6 +742,25 @@ void read_persistent_clock64(struct timespec64 *ts) void __init time_init(void) { +#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT + switch (time_travel_mode) { + case TT_MODE_EXTERNAL: + time_travel_start = time_travel_ext_req(UM_TIMETRAVEL_GET_TOD, -1); + /* controller gave us the *current* time, so adjust by that */ + time_travel_ext_get_time(); + time_travel_start -= time_travel_time; + break; + case TT_MODE_INFCPU: + case TT_MODE_BASIC: + if (!time_travel_start_set) + time_travel_start = os_persistent_clock_emulation(); + break; + case TT_MODE_OFF: + /* we just read the host clock with os_persistent_clock_emulation() */ + break; + } +#endif + timer_set_signal_handler(); late_time_init = um_timer_setup; } diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c index 61776790cd67..437d1f1cc5ec 100644 --- a/arch/um/kernel/tlb.c +++ b/arch/um/kernel/tlb.c @@ -608,3 +608,57 @@ void force_flush_all(void) vma = vma->vm_next; } } + +struct page_change_data { + unsigned int set_mask, clear_mask; +}; + +static int change_page_range(pte_t *ptep, unsigned long addr, void *data) +{ + struct page_change_data *cdata = data; + pte_t pte = READ_ONCE(*ptep); + + pte_clear_bits(pte, cdata->clear_mask); + pte_set_bits(pte, cdata->set_mask); + + set_pte(ptep, pte); + return 0; +} + +static int change_memory(unsigned long start, unsigned long pages, + unsigned int set_mask, unsigned int clear_mask) +{ + unsigned long size = pages * PAGE_SIZE; + struct page_change_data data; + int ret; + + data.set_mask = set_mask; + data.clear_mask = clear_mask; + + ret = apply_to_page_range(&init_mm, start, size, change_page_range, + &data); + + flush_tlb_kernel_range(start, start + size); + + return ret; +} + +int set_memory_ro(unsigned long addr, int numpages) +{ + return change_memory(addr, numpages, 0, _PAGE_RW); +} + +int set_memory_rw(unsigned long addr, int numpages) +{ + return change_memory(addr, numpages, _PAGE_RW, 0); +} + +int set_memory_nx(unsigned long addr, int numpages) +{ + return -EOPNOTSUPP; +} + +int set_memory_x(unsigned long addr, int numpages) +{ + return -EOPNOTSUPP; +} diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 76b37297b7d4..31d356b1ffd8 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -13,6 +13,7 @@ #include <linux/sched.h> #include <linux/sched/task.h> #include <linux/kmsg_dump.h> +#include <linux/suspend.h> #include <asm/processor.h> #include <asm/sections.h> @@ -377,3 +378,69 @@ void *text_poke(void *addr, const void *opcode, size_t len) void text_poke_sync(void) { } + +void uml_pm_wake(void) +{ + pm_system_wakeup(); +} + +#ifdef CONFIG_PM_SLEEP +static int um_suspend_valid(suspend_state_t state) +{ + return state == PM_SUSPEND_MEM; +} + +static int um_suspend_prepare(void) +{ + um_irqs_suspend(); + return 0; +} + +static int um_suspend_enter(suspend_state_t state) +{ + if (WARN_ON(state != PM_SUSPEND_MEM)) + return -EINVAL; + + /* + * This is identical to the idle sleep, but we've just + * (during suspend) turned off all interrupt sources + * except for the ones we want, so now we can only wake + * up on something we actually want to wake up on. All + * timing has also been suspended. + */ + um_idle_sleep(); + return 0; +} + +static void um_suspend_finish(void) +{ + um_irqs_resume(); +} + +const struct platform_suspend_ops um_suspend_ops = { + .valid = um_suspend_valid, + .prepare = um_suspend_prepare, + .enter = um_suspend_enter, + .finish = um_suspend_finish, +}; + +static int init_pm_wake_signal(void) +{ + /* + * In external time-travel mode we can't use signals to wake up + * since that would mess with the scheduling. We'll have to do + * some additional work to support wakeup on virtio devices or + * similar, perhaps implementing a fake RTC controller that can + * trigger wakeup (and request the appropriate scheduling from + * the external scheduler when going to suspend.) + */ + if (time_travel_mode != TT_MODE_EXTERNAL) + register_pm_wake_signal(); + + suspend_set_ops(&um_suspend_ops); + + return 0; +} + +late_initcall(init_pm_wake_signal); +#endif |